commonmarker 0.23.10 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +1221 -0
- data/Cargo.toml +7 -0
- data/README.md +233 -172
- data/ext/commonmarker/Cargo.toml +20 -0
- data/ext/commonmarker/extconf.rb +3 -6
- data/ext/commonmarker/src/lib.rs +103 -0
- data/ext/commonmarker/src/node.rs +1160 -0
- data/ext/commonmarker/src/options.rs +216 -0
- data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
- data/ext/commonmarker/src/plugins.rs +6 -0
- data/ext/commonmarker/src/utils.rs +8 -0
- data/lib/commonmarker/config.rb +91 -40
- data/lib/commonmarker/constants.rb +7 -0
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/node/ast.rb +8 -0
- data/lib/commonmarker/node/inspect.rb +14 -4
- data/lib/commonmarker/node.rb +29 -47
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/utils.rb +22 -0
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +27 -25
- metadata +38 -186
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/commonmarker.gemspec +0 -38
- data/ext/commonmarker/arena.c +0 -104
- data/ext/commonmarker/autolink.c +0 -508
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1622
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -833
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -514
- data/ext/commonmarker/commonmarker.c +0 -1308
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -502
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1788
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -468
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -274
- data/ext/commonmarker/map.c +0 -129
- data/ext/commonmarker/map.h +0 -44
- data/ext/commonmarker/node.c +0 -1045
- data/ext/commonmarker/node.h +0 -167
- data/ext/commonmarker/parser.h +0 -59
- data/ext/commonmarker/plaintext.c +0 -218
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -43
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -213
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -14056
- data/ext/commonmarker/scanners.h +0 -70
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -917
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -182
- data/lib/commonmarker/renderer/html_renderer.rb +0 -256
data/ext/commonmarker/autolink.c
DELETED
@@ -1,508 +0,0 @@
|
|
1
|
-
#include "autolink.h"
|
2
|
-
#include <parser.h>
|
3
|
-
#include <string.h>
|
4
|
-
#include <utf8.h>
|
5
|
-
#include <stddef.h>
|
6
|
-
|
7
|
-
#if defined(_WIN32)
|
8
|
-
#define strncasecmp _strnicmp
|
9
|
-
#else
|
10
|
-
#include <strings.h>
|
11
|
-
#endif
|
12
|
-
|
13
|
-
static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
|
14
|
-
int32_t ch;
|
15
|
-
int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
|
16
|
-
if (r < 0)
|
17
|
-
return 0;
|
18
|
-
return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
|
19
|
-
}
|
20
|
-
|
21
|
-
static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
|
22
|
-
static const size_t valid_uris_count = 3;
|
23
|
-
static const char *valid_uris[] = {"http://", "https://", "ftp://"};
|
24
|
-
|
25
|
-
size_t i;
|
26
|
-
|
27
|
-
for (i = 0; i < valid_uris_count; ++i) {
|
28
|
-
size_t len = strlen(valid_uris[i]);
|
29
|
-
|
30
|
-
if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
31
|
-
is_valid_hostchar(link + len, link_len - len))
|
32
|
-
return 1;
|
33
|
-
}
|
34
|
-
|
35
|
-
return 0;
|
36
|
-
}
|
37
|
-
|
38
|
-
static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
39
|
-
size_t i;
|
40
|
-
size_t closing = 0;
|
41
|
-
size_t opening = 0;
|
42
|
-
|
43
|
-
for (i = 0; i < link_end; ++i) {
|
44
|
-
const uint8_t c = data[i];
|
45
|
-
if (c == '<') {
|
46
|
-
link_end = i;
|
47
|
-
break;
|
48
|
-
} else if (c == '(') {
|
49
|
-
opening++;
|
50
|
-
} else if (c == ')') {
|
51
|
-
closing++;
|
52
|
-
}
|
53
|
-
}
|
54
|
-
|
55
|
-
while (link_end > 0) {
|
56
|
-
switch (data[link_end - 1]) {
|
57
|
-
case ')':
|
58
|
-
/* Allow any number of matching brackets (as recognised in copen/cclose)
|
59
|
-
* at the end of the URL. If there is a greater number of closing
|
60
|
-
* brackets than opening ones, we remove one character from the end of
|
61
|
-
* the link.
|
62
|
-
*
|
63
|
-
* Examples (input text => output linked portion):
|
64
|
-
*
|
65
|
-
* http://www.pokemon.com/Pikachu_(Electric)
|
66
|
-
* => http://www.pokemon.com/Pikachu_(Electric)
|
67
|
-
*
|
68
|
-
* http://www.pokemon.com/Pikachu_((Electric)
|
69
|
-
* => http://www.pokemon.com/Pikachu_((Electric)
|
70
|
-
*
|
71
|
-
* http://www.pokemon.com/Pikachu_(Electric))
|
72
|
-
* => http://www.pokemon.com/Pikachu_(Electric)
|
73
|
-
*
|
74
|
-
* http://www.pokemon.com/Pikachu_((Electric))
|
75
|
-
* => http://www.pokemon.com/Pikachu_((Electric))
|
76
|
-
*/
|
77
|
-
if (closing <= opening) {
|
78
|
-
return link_end;
|
79
|
-
}
|
80
|
-
closing--;
|
81
|
-
link_end--;
|
82
|
-
break;
|
83
|
-
case '?':
|
84
|
-
case '!':
|
85
|
-
case '.':
|
86
|
-
case ',':
|
87
|
-
case ':':
|
88
|
-
case '*':
|
89
|
-
case '_':
|
90
|
-
case '~':
|
91
|
-
case '\'':
|
92
|
-
case '"':
|
93
|
-
link_end--;
|
94
|
-
break;
|
95
|
-
case ';': {
|
96
|
-
size_t new_end = link_end - 2;
|
97
|
-
|
98
|
-
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
99
|
-
new_end--;
|
100
|
-
|
101
|
-
if (new_end < link_end - 2 && data[new_end] == '&')
|
102
|
-
link_end = new_end;
|
103
|
-
else
|
104
|
-
link_end--;
|
105
|
-
break;
|
106
|
-
}
|
107
|
-
|
108
|
-
default:
|
109
|
-
return link_end;
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
return link_end;
|
114
|
-
}
|
115
|
-
|
116
|
-
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
117
|
-
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
|
118
|
-
|
119
|
-
/* The purpose of this code is to reject urls that contain an underscore
|
120
|
-
* in one of the last two segments. Examples:
|
121
|
-
*
|
122
|
-
* www.xxx.yyy.zzz autolinked
|
123
|
-
* www.xxx.yyy._zzz not autolinked
|
124
|
-
* www.xxx._yyy.zzz not autolinked
|
125
|
-
* www._xxx.yyy.zzz autolinked
|
126
|
-
*
|
127
|
-
* The reason is that domain names are allowed to include underscores,
|
128
|
-
* but host names are not. See: https://stackoverflow.com/a/2183140
|
129
|
-
*/
|
130
|
-
for (i = 1; i < size - 1; i++) {
|
131
|
-
if (data[i] == '\\' && i < size - 2)
|
132
|
-
i++;
|
133
|
-
if (data[i] == '_')
|
134
|
-
uscore2++;
|
135
|
-
else if (data[i] == '.') {
|
136
|
-
uscore1 = uscore2;
|
137
|
-
uscore2 = 0;
|
138
|
-
np++;
|
139
|
-
} else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
|
140
|
-
break;
|
141
|
-
}
|
142
|
-
|
143
|
-
if (uscore1 > 0 || uscore2 > 0) {
|
144
|
-
/* If the url is very long then accept it despite the underscores,
|
145
|
-
* to avoid quadratic behavior causing a denial of service. See:
|
146
|
-
* https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
|
147
|
-
* Reasonable urls are unlikely to have more than 10 segments, so
|
148
|
-
* this extra condition shouldn't have any impact on normal usage.
|
149
|
-
*/
|
150
|
-
if (np <= 10) {
|
151
|
-
return 0;
|
152
|
-
}
|
153
|
-
}
|
154
|
-
|
155
|
-
if (allow_short) {
|
156
|
-
/* We don't need a valid domain in the strict sense (with
|
157
|
-
* least one dot; so just make sure it's composed of valid
|
158
|
-
* domain characters and return the length of the the valid
|
159
|
-
* sequence. */
|
160
|
-
return i;
|
161
|
-
} else {
|
162
|
-
/* a valid domain needs to have at least a dot.
|
163
|
-
* that's as far as we get */
|
164
|
-
return np ? i : 0;
|
165
|
-
}
|
166
|
-
}
|
167
|
-
|
168
|
-
static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
169
|
-
cmark_inline_parser *inline_parser) {
|
170
|
-
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
171
|
-
size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
172
|
-
uint8_t *data = chunk->data + max_rewind;
|
173
|
-
size_t size = chunk->len - max_rewind;
|
174
|
-
int start = cmark_inline_parser_get_column(inline_parser);
|
175
|
-
|
176
|
-
size_t link_end;
|
177
|
-
|
178
|
-
if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
|
179
|
-
!cmark_isspace(data[-1]))
|
180
|
-
return 0;
|
181
|
-
|
182
|
-
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
183
|
-
return 0;
|
184
|
-
|
185
|
-
link_end = check_domain(data, size, 0);
|
186
|
-
|
187
|
-
if (link_end == 0)
|
188
|
-
return NULL;
|
189
|
-
|
190
|
-
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
191
|
-
link_end++;
|
192
|
-
|
193
|
-
link_end = autolink_delim(data, link_end);
|
194
|
-
|
195
|
-
if (link_end == 0)
|
196
|
-
return NULL;
|
197
|
-
|
198
|
-
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
199
|
-
|
200
|
-
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
201
|
-
|
202
|
-
cmark_strbuf buf;
|
203
|
-
cmark_strbuf_init(parser->mem, &buf, 10);
|
204
|
-
cmark_strbuf_puts(&buf, "http://");
|
205
|
-
cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
|
206
|
-
node->as.link.url = cmark_chunk_buf_detach(&buf);
|
207
|
-
|
208
|
-
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
209
|
-
text->as.literal =
|
210
|
-
cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
|
211
|
-
cmark_node_append_child(node, text);
|
212
|
-
|
213
|
-
node->start_line = text->start_line =
|
214
|
-
node->end_line = text->end_line =
|
215
|
-
cmark_inline_parser_get_line(inline_parser);
|
216
|
-
|
217
|
-
node->start_column = text->start_column = start - 1;
|
218
|
-
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
219
|
-
|
220
|
-
return node;
|
221
|
-
}
|
222
|
-
|
223
|
-
static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
224
|
-
cmark_inline_parser *inline_parser) {
|
225
|
-
size_t link_end, domain_len;
|
226
|
-
int rewind = 0;
|
227
|
-
|
228
|
-
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
229
|
-
int max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
230
|
-
uint8_t *data = chunk->data + max_rewind;
|
231
|
-
size_t size = chunk->len - max_rewind;
|
232
|
-
|
233
|
-
if (size < 4 || data[1] != '/' || data[2] != '/')
|
234
|
-
return 0;
|
235
|
-
|
236
|
-
while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
|
237
|
-
rewind++;
|
238
|
-
|
239
|
-
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
240
|
-
return 0;
|
241
|
-
|
242
|
-
link_end = strlen("://");
|
243
|
-
|
244
|
-
domain_len = check_domain(data + link_end, size - link_end, 1);
|
245
|
-
|
246
|
-
if (domain_len == 0)
|
247
|
-
return 0;
|
248
|
-
|
249
|
-
link_end += domain_len;
|
250
|
-
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
251
|
-
link_end++;
|
252
|
-
|
253
|
-
link_end = autolink_delim(data, link_end);
|
254
|
-
|
255
|
-
if (link_end == 0)
|
256
|
-
return NULL;
|
257
|
-
|
258
|
-
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
259
|
-
cmark_node_unput(parent, rewind);
|
260
|
-
|
261
|
-
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
262
|
-
|
263
|
-
cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
|
264
|
-
(bufsize_t)(link_end + rewind));
|
265
|
-
node->as.link.url = url;
|
266
|
-
|
267
|
-
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
268
|
-
text->as.literal = url;
|
269
|
-
cmark_node_append_child(node, text);
|
270
|
-
|
271
|
-
node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
|
272
|
-
|
273
|
-
node->start_column = text->start_column = max_rewind - rewind;
|
274
|
-
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
275
|
-
|
276
|
-
return node;
|
277
|
-
}
|
278
|
-
|
279
|
-
static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
|
280
|
-
cmark_node *parent, unsigned char c,
|
281
|
-
cmark_inline_parser *inline_parser) {
|
282
|
-
if (cmark_inline_parser_in_bracket(inline_parser, false) ||
|
283
|
-
cmark_inline_parser_in_bracket(inline_parser, true))
|
284
|
-
return NULL;
|
285
|
-
|
286
|
-
if (c == ':')
|
287
|
-
return url_match(parser, parent, inline_parser);
|
288
|
-
|
289
|
-
if (c == 'w')
|
290
|
-
return www_match(parser, parent, inline_parser);
|
291
|
-
|
292
|
-
return NULL;
|
293
|
-
|
294
|
-
// note that we could end up re-consuming something already a
|
295
|
-
// part of an inline, because we don't track when the last
|
296
|
-
// inline was finished in inlines.c.
|
297
|
-
}
|
298
|
-
|
299
|
-
static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
|
300
|
-
size_t len = strlen(protocol);
|
301
|
-
|
302
|
-
if (len > (max_rewind - rewind)) {
|
303
|
-
return false;
|
304
|
-
}
|
305
|
-
|
306
|
-
// Check that the protocol matches
|
307
|
-
if (memcmp(data - rewind - len, protocol, len) != 0) {
|
308
|
-
return false;
|
309
|
-
}
|
310
|
-
|
311
|
-
if (len == (max_rewind - rewind)) {
|
312
|
-
return true;
|
313
|
-
}
|
314
|
-
|
315
|
-
char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
|
316
|
-
|
317
|
-
// Make sure the character before the protocol is non-alphanumeric
|
318
|
-
return !cmark_isalnum(prev_char);
|
319
|
-
}
|
320
|
-
|
321
|
-
static void postprocess_text(cmark_parser *parser, cmark_node *text) {
|
322
|
-
size_t start = 0;
|
323
|
-
size_t offset = 0;
|
324
|
-
// `text` is going to be split into a list of nodes containing shorter segments
|
325
|
-
// of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
|
326
|
-
// create references to it. Later, `cmark_chunk_to_cstr` is used to convert
|
327
|
-
// the references into allocated buffers. The detached buffer is freed before we
|
328
|
-
// return.
|
329
|
-
cmark_chunk detached_chunk = text->as.literal;
|
330
|
-
text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
|
331
|
-
|
332
|
-
uint8_t *data = text->as.literal.data;
|
333
|
-
size_t remaining = text->as.literal.len;
|
334
|
-
|
335
|
-
while (true) {
|
336
|
-
size_t link_end;
|
337
|
-
uint8_t *at;
|
338
|
-
bool auto_mailto = true;
|
339
|
-
bool is_xmpp = false;
|
340
|
-
size_t rewind;
|
341
|
-
size_t max_rewind;
|
342
|
-
size_t np = 0;
|
343
|
-
|
344
|
-
if (offset >= remaining)
|
345
|
-
break;
|
346
|
-
|
347
|
-
at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
|
348
|
-
if (!at)
|
349
|
-
break;
|
350
|
-
|
351
|
-
max_rewind = at - (data + start + offset);
|
352
|
-
|
353
|
-
found_at:
|
354
|
-
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
355
|
-
uint8_t c = data[start + offset + max_rewind - rewind - 1];
|
356
|
-
|
357
|
-
if (cmark_isalnum(c))
|
358
|
-
continue;
|
359
|
-
|
360
|
-
if (strchr(".+-_", c) != NULL)
|
361
|
-
continue;
|
362
|
-
|
363
|
-
if (strchr(":", c) != NULL) {
|
364
|
-
if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
365
|
-
auto_mailto = false;
|
366
|
-
continue;
|
367
|
-
}
|
368
|
-
|
369
|
-
if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
370
|
-
auto_mailto = false;
|
371
|
-
is_xmpp = true;
|
372
|
-
continue;
|
373
|
-
}
|
374
|
-
}
|
375
|
-
|
376
|
-
break;
|
377
|
-
}
|
378
|
-
|
379
|
-
if (rewind == 0) {
|
380
|
-
offset += max_rewind + 1;
|
381
|
-
continue;
|
382
|
-
}
|
383
|
-
|
384
|
-
assert(data[start + offset + max_rewind] == '@');
|
385
|
-
for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
|
386
|
-
uint8_t c = data[start + offset + max_rewind + link_end];
|
387
|
-
|
388
|
-
if (cmark_isalnum(c))
|
389
|
-
continue;
|
390
|
-
|
391
|
-
if (c == '@') {
|
392
|
-
// Found another '@', so go back and try again with an updated offset and max_rewind.
|
393
|
-
offset += max_rewind + 1;
|
394
|
-
max_rewind = link_end - 1;
|
395
|
-
goto found_at;
|
396
|
-
} else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
|
397
|
-
cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
|
398
|
-
np++;
|
399
|
-
else if (c == '/' && is_xmpp)
|
400
|
-
continue;
|
401
|
-
else if (c != '-' && c != '_')
|
402
|
-
break;
|
403
|
-
}
|
404
|
-
|
405
|
-
if (link_end < 2 || np == 0 ||
|
406
|
-
(!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
|
407
|
-
data[start + offset + max_rewind + link_end - 1] != '.')) {
|
408
|
-
offset += max_rewind + link_end;
|
409
|
-
continue;
|
410
|
-
}
|
411
|
-
|
412
|
-
link_end = autolink_delim(data + start + offset + max_rewind, link_end);
|
413
|
-
|
414
|
-
if (link_end == 0) {
|
415
|
-
offset += max_rewind + 1;
|
416
|
-
continue;
|
417
|
-
}
|
418
|
-
|
419
|
-
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
420
|
-
cmark_strbuf buf;
|
421
|
-
cmark_strbuf_init(parser->mem, &buf, 10);
|
422
|
-
if (auto_mailto)
|
423
|
-
cmark_strbuf_puts(&buf, "mailto:");
|
424
|
-
cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
|
425
|
-
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
426
|
-
|
427
|
-
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
428
|
-
cmark_chunk email = cmark_chunk_dup(
|
429
|
-
&detached_chunk,
|
430
|
-
(bufsize_t)(start + offset + max_rewind - rewind),
|
431
|
-
(bufsize_t)(link_end + rewind));
|
432
|
-
cmark_chunk_to_cstr(parser->mem, &email);
|
433
|
-
link_text->as.literal = email;
|
434
|
-
cmark_node_append_child(link_node, link_text);
|
435
|
-
|
436
|
-
cmark_node_insert_after(text, link_node);
|
437
|
-
|
438
|
-
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
439
|
-
post->as.literal = cmark_chunk_dup(&detached_chunk,
|
440
|
-
(bufsize_t)(start + offset + max_rewind + link_end),
|
441
|
-
(bufsize_t)(remaining - offset - max_rewind - link_end));
|
442
|
-
|
443
|
-
cmark_node_insert_after(link_node, post);
|
444
|
-
|
445
|
-
text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
|
446
|
-
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
447
|
-
|
448
|
-
text = post;
|
449
|
-
start += offset + max_rewind + link_end;
|
450
|
-
remaining -= offset + max_rewind + link_end;
|
451
|
-
offset = 0;
|
452
|
-
}
|
453
|
-
|
454
|
-
// Convert the reference to allocated memory.
|
455
|
-
assert(!text->as.literal.alloc);
|
456
|
-
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
457
|
-
|
458
|
-
// Free the detached buffer.
|
459
|
-
cmark_chunk_free(parser->mem, &detached_chunk);
|
460
|
-
}
|
461
|
-
|
462
|
-
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
|
463
|
-
cmark_iter *iter;
|
464
|
-
cmark_event_type ev;
|
465
|
-
cmark_node *node;
|
466
|
-
bool in_link = false;
|
467
|
-
|
468
|
-
cmark_consolidate_text_nodes(root);
|
469
|
-
iter = cmark_iter_new(root);
|
470
|
-
|
471
|
-
while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
472
|
-
node = cmark_iter_get_node(iter);
|
473
|
-
if (in_link) {
|
474
|
-
if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
|
475
|
-
in_link = false;
|
476
|
-
}
|
477
|
-
continue;
|
478
|
-
}
|
479
|
-
|
480
|
-
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
|
481
|
-
in_link = true;
|
482
|
-
continue;
|
483
|
-
}
|
484
|
-
|
485
|
-
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
|
486
|
-
postprocess_text(parser, node);
|
487
|
-
}
|
488
|
-
}
|
489
|
-
|
490
|
-
cmark_iter_free(iter);
|
491
|
-
|
492
|
-
return root;
|
493
|
-
}
|
494
|
-
|
495
|
-
cmark_syntax_extension *create_autolink_extension(void) {
|
496
|
-
cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
|
497
|
-
cmark_llist *special_chars = NULL;
|
498
|
-
|
499
|
-
cmark_syntax_extension_set_match_inline_func(ext, match);
|
500
|
-
cmark_syntax_extension_set_postprocess_func(ext, postprocess);
|
501
|
-
|
502
|
-
cmark_mem *mem = cmark_get_default_mem_allocator();
|
503
|
-
special_chars = cmark_llist_append(mem, special_chars, (void *)':');
|
504
|
-
special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
|
505
|
-
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
|
506
|
-
|
507
|
-
return ext;
|
508
|
-
}
|