qiita_marker 0.23.6.2 → 0.23.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/qiita_marker/arena.c +9 -8
- data/ext/qiita_marker/autolink.c +209 -159
- data/ext/qiita_marker/blocks.c +25 -1
- data/ext/qiita_marker/cmark-gfm-core-extensions.h +11 -11
- data/ext/qiita_marker/cmark-gfm-extension_api.h +1 -0
- data/ext/qiita_marker/cmark-gfm.h +18 -2
- data/ext/qiita_marker/cmark-gfm_version.h +2 -2
- data/ext/qiita_marker/cmark.c +3 -3
- data/ext/qiita_marker/commonmark.c +18 -33
- data/ext/qiita_marker/html.c +22 -6
- data/ext/qiita_marker/inlines.c +130 -58
- data/ext/qiita_marker/latex.c +6 -4
- data/ext/qiita_marker/man.c +7 -11
- data/ext/qiita_marker/map.c +11 -4
- data/ext/qiita_marker/map.h +5 -2
- data/ext/qiita_marker/node.c +75 -10
- data/ext/qiita_marker/node.h +42 -1
- data/ext/qiita_marker/parser.h +1 -0
- data/ext/qiita_marker/plaintext.c +12 -29
- data/ext/qiita_marker/qiita_marker.c +1 -0
- data/ext/qiita_marker/references.c +1 -0
- data/ext/qiita_marker/render.c +15 -7
- data/ext/qiita_marker/scanners.c +13917 -10369
- data/ext/qiita_marker/scanners.h +8 -0
- data/ext/qiita_marker/strikethrough.c +1 -1
- data/ext/qiita_marker/table.c +59 -35
- data/ext/qiita_marker/xml.c +2 -1
- data/lib/qiita_marker/config.rb +14 -12
- data/lib/qiita_marker/renderer/html_renderer.rb +15 -4
- data/lib/qiita_marker/renderer.rb +1 -1
- data/lib/qiita_marker/version.rb +1 -1
- data/lib/qiita_marker.rb +26 -24
- data/qiita_marker.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32eb3d3a4da3ce6d72df6417f6b307e2117958939163446e38ded1a248c495fb
|
4
|
+
data.tar.gz: f3daa3a44fb40d856ce1994c94a1db9ea93491a2c02ba3d61bdca74694b87331
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8cd8a157a896d9fc67ed1810cd1964ac967b4d6034d0db3009772c2429ea5ee11929689bc5bbdddeff0c30a8f032bcbb71c4cb3b0d81bee8daf08acd0732e72d
|
7
|
+
data.tar.gz: 03010f6c8bfec94564d01d5cec3f301552b63366bd0b8748d10a5b4b1cb2921d457ce1ca9e60fbedc41375cc89fc1a81ffc64c88a8c1952c6547c95904c46a3c
|
data/ext/qiita_marker/arena.c
CHANGED
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
|
|
68
68
|
const size_t align = sizeof(size_t) - 1;
|
69
69
|
sz = (sz + align) & ~align;
|
70
70
|
|
71
|
+
struct arena_chunk *chunk;
|
71
72
|
if (sz > A->sz) {
|
72
|
-
A->prev = alloc_arena_chunk(sz, A->prev);
|
73
|
-
|
73
|
+
A->prev = chunk = alloc_arena_chunk(sz, A->prev);
|
74
|
+
} else if (sz > A->sz - A->used) {
|
75
|
+
A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
|
76
|
+
} else {
|
77
|
+
chunk = A;
|
74
78
|
}
|
75
|
-
|
76
|
-
|
77
|
-
}
|
78
|
-
void *ptr = (uint8_t *) A->ptr + A->used;
|
79
|
-
A->used += sz;
|
79
|
+
void *ptr = (uint8_t *) chunk->ptr + chunk->used;
|
80
|
+
chunk->used += sz;
|
80
81
|
*((size_t *) ptr) = sz - sizeof(size_t);
|
81
82
|
return (uint8_t *) ptr + sizeof(size_t);
|
82
83
|
}
|
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
|
|
98
99
|
|
99
100
|
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
|
100
101
|
|
101
|
-
cmark_mem *cmark_get_arena_mem_allocator() {
|
102
|
+
cmark_mem *cmark_get_arena_mem_allocator(void) {
|
102
103
|
return &CMARK_ARENA_MEM_ALLOCATOR;
|
103
104
|
}
|
data/ext/qiita_marker/autolink.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <render.h>
|
7
7
|
#include <string.h>
|
8
8
|
#include <utf8.h>
|
9
|
+
#include <stddef.h>
|
9
10
|
|
10
11
|
#if defined(_WIN32)
|
11
12
|
#define strncasecmp _strnicmp
|
@@ -44,44 +45,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
|
|
44
45
|
}
|
45
46
|
|
46
47
|
static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
47
|
-
uint8_t cclose, copen;
|
48
48
|
size_t i;
|
49
|
+
size_t closing = 0;
|
50
|
+
size_t opening = 0;
|
49
51
|
|
50
|
-
for (i = 0; i < link_end; ++i)
|
51
|
-
|
52
|
+
for (i = 0; i < link_end; ++i) {
|
53
|
+
const uint8_t c = data[i];
|
54
|
+
if (c == '<') {
|
52
55
|
link_end = i;
|
53
56
|
break;
|
57
|
+
} else if (c == '(') {
|
58
|
+
opening++;
|
59
|
+
} else if (c == ')') {
|
60
|
+
closing++;
|
54
61
|
}
|
62
|
+
}
|
55
63
|
|
56
64
|
while (link_end > 0) {
|
57
|
-
|
58
|
-
|
59
|
-
switch (cclose) {
|
65
|
+
switch (data[link_end - 1]) {
|
60
66
|
case ')':
|
61
|
-
copen = '(';
|
62
|
-
break;
|
63
|
-
default:
|
64
|
-
copen = 0;
|
65
|
-
}
|
66
|
-
|
67
|
-
if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
|
68
|
-
link_end--;
|
69
|
-
|
70
|
-
else if (data[link_end - 1] == ';') {
|
71
|
-
size_t new_end = link_end - 2;
|
72
|
-
|
73
|
-
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
74
|
-
new_end--;
|
75
|
-
|
76
|
-
if (new_end < link_end - 2 && data[new_end] == '&')
|
77
|
-
link_end = new_end;
|
78
|
-
else
|
79
|
-
link_end--;
|
80
|
-
} else if (copen != 0) {
|
81
|
-
size_t closing = 0;
|
82
|
-
size_t opening = 0;
|
83
|
-
i = 0;
|
84
|
-
|
85
67
|
/* Allow any number of matching brackets (as recognised in copen/cclose)
|
86
68
|
* at the end of the URL. If there is a greater number of closing
|
87
69
|
* brackets than opening ones, we remove one character from the end of
|
@@ -89,34 +71,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
|
89
71
|
*
|
90
72
|
* Examples (input text => output linked portion):
|
91
73
|
*
|
92
|
-
*
|
93
|
-
*
|
74
|
+
* http://www.pokemon.com/Pikachu_(Electric)
|
75
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
94
76
|
*
|
95
|
-
*
|
96
|
-
*
|
77
|
+
* http://www.pokemon.com/Pikachu_((Electric)
|
78
|
+
* => http://www.pokemon.com/Pikachu_((Electric)
|
97
79
|
*
|
98
|
-
*
|
99
|
-
*
|
80
|
+
* http://www.pokemon.com/Pikachu_(Electric))
|
81
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
100
82
|
*
|
101
|
-
*
|
102
|
-
*
|
83
|
+
* http://www.pokemon.com/Pikachu_((Electric))
|
84
|
+
* => http://www.pokemon.com/Pikachu_((Electric))
|
103
85
|
*/
|
104
|
-
|
105
|
-
|
106
|
-
if (data[i] == copen)
|
107
|
-
opening++;
|
108
|
-
else if (data[i] == cclose)
|
109
|
-
closing++;
|
110
|
-
|
111
|
-
i++;
|
86
|
+
if (closing <= opening) {
|
87
|
+
return link_end;
|
112
88
|
}
|
89
|
+
closing--;
|
90
|
+
link_end--;
|
91
|
+
break;
|
92
|
+
case '?':
|
93
|
+
case '!':
|
94
|
+
case '.':
|
95
|
+
case ',':
|
96
|
+
case ':':
|
97
|
+
case '*':
|
98
|
+
case '_':
|
99
|
+
case '~':
|
100
|
+
case '\'':
|
101
|
+
case '"':
|
102
|
+
link_end--;
|
103
|
+
break;
|
104
|
+
case ';': {
|
105
|
+
size_t new_end = link_end - 2;
|
113
106
|
|
114
|
-
|
115
|
-
|
107
|
+
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
108
|
+
new_end--;
|
116
109
|
|
117
|
-
link_end
|
118
|
-
|
110
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
111
|
+
link_end = new_end;
|
112
|
+
else
|
113
|
+
link_end--;
|
119
114
|
break;
|
115
|
+
}
|
116
|
+
|
117
|
+
default:
|
118
|
+
return link_end;
|
119
|
+
}
|
120
120
|
}
|
121
121
|
|
122
122
|
return link_end;
|
@@ -125,7 +125,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
|
125
125
|
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
126
126
|
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
|
127
127
|
|
128
|
+
/* The purpose of this code is to reject urls that contain an underscore
|
129
|
+
* in one of the last two segments. Examples:
|
130
|
+
*
|
131
|
+
* www.xxx.yyy.zzz autolinked
|
132
|
+
* www.xxx.yyy._zzz not autolinked
|
133
|
+
* www.xxx._yyy.zzz not autolinked
|
134
|
+
* www._xxx.yyy.zzz autolinked
|
135
|
+
*
|
136
|
+
* The reason is that domain names are allowed to include underscores,
|
137
|
+
* but host names are not. See: https://stackoverflow.com/a/2183140
|
138
|
+
*/
|
128
139
|
for (i = 1; i < size - 1; i++) {
|
140
|
+
if (data[i] == '\\' && i < size - 2)
|
141
|
+
i++;
|
129
142
|
if (data[i] == '_')
|
130
143
|
uscore2++;
|
131
144
|
else if (data[i] == '.') {
|
@@ -136,8 +149,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
|
136
149
|
break;
|
137
150
|
}
|
138
151
|
|
139
|
-
if (uscore1 > 0 || uscore2 > 0)
|
140
|
-
|
152
|
+
if (uscore1 > 0 || uscore2 > 0) {
|
153
|
+
/* If the url is very long then accept it despite the underscores,
|
154
|
+
* to avoid quadratic behavior causing a denial of service. See:
|
155
|
+
* https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
|
156
|
+
* Reasonable urls are unlikely to have more than 10 segments, so
|
157
|
+
* this extra condition shouldn't have any impact on normal usage.
|
158
|
+
*/
|
159
|
+
if (np <= 10) {
|
160
|
+
return 0;
|
161
|
+
}
|
162
|
+
}
|
141
163
|
|
142
164
|
if (allow_short) {
|
143
165
|
/* We don't need a valid domain in the strict sense (with
|
@@ -175,7 +197,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
|
175
197
|
if (link_end == 0)
|
176
198
|
return NULL;
|
177
199
|
|
178
|
-
while (link_end < size && !cmark_isspace(data[link_end]))
|
200
|
+
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
179
201
|
link_end++;
|
180
202
|
|
181
203
|
link_end = autolink_delim(data, link_end);
|
@@ -239,7 +261,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
|
239
261
|
return 0;
|
240
262
|
|
241
263
|
link_end += domain_len;
|
242
|
-
while (link_end < size && !cmark_isspace(data[link_end]))
|
264
|
+
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
243
265
|
link_end++;
|
244
266
|
|
245
267
|
link_end = autolink_delim(data, link_end);
|
@@ -262,6 +284,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
|
262
284
|
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
263
285
|
text->as.literal = url;
|
264
286
|
cmark_node_append_child(node, text);
|
287
|
+
|
288
|
+
node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
|
289
|
+
|
290
|
+
node->start_column = text->start_column = max_rewind - rewind;
|
291
|
+
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
265
292
|
|
266
293
|
return node;
|
267
294
|
}
|
@@ -286,148 +313,171 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
|
|
286
313
|
// inline was finished in inlines.c.
|
287
314
|
}
|
288
315
|
|
289
|
-
static bool validate_protocol(char protocol[], uint8_t *data,
|
316
|
+
static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
|
290
317
|
size_t len = strlen(protocol);
|
291
318
|
|
319
|
+
if (len > (max_rewind - rewind)) {
|
320
|
+
return false;
|
321
|
+
}
|
322
|
+
|
292
323
|
// Check that the protocol matches
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
324
|
+
if (memcmp(data - rewind - len, protocol, len) != 0) {
|
325
|
+
return false;
|
326
|
+
}
|
327
|
+
|
328
|
+
if (len == (max_rewind - rewind)) {
|
329
|
+
return true;
|
297
330
|
}
|
298
331
|
|
299
|
-
char prev_char = data[-rewind - len - 1];
|
332
|
+
char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
|
300
333
|
|
301
334
|
// Make sure the character before the protocol is non-alphanumeric
|
302
335
|
return !cmark_isalnum(prev_char);
|
303
336
|
}
|
304
337
|
|
305
|
-
static void postprocess_text(cmark_parser *parser, cmark_node *text,
|
306
|
-
|
307
|
-
|
308
|
-
//
|
309
|
-
|
338
|
+
static void postprocess_text(cmark_parser *parser, cmark_node *text, cmark_syntax_extension *ext) {
|
339
|
+
size_t start = 0;
|
340
|
+
size_t offset = 0;
|
341
|
+
// `text` is going to be split into a list of nodes containing shorter segments
|
342
|
+
// of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
|
343
|
+
// create references to it. Later, `cmark_chunk_to_cstr` is used to convert
|
344
|
+
// the references into allocated buffers. The detached buffer is freed before we
|
345
|
+
// return.
|
346
|
+
cmark_chunk detached_chunk = text->as.literal;
|
347
|
+
text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
|
348
|
+
|
349
|
+
uint8_t *data = text->as.literal.data;
|
350
|
+
size_t remaining = text->as.literal.len;
|
351
|
+
|
352
|
+
while (true) {
|
353
|
+
size_t link_end;
|
354
|
+
uint8_t *at;
|
355
|
+
bool auto_mailto = true;
|
356
|
+
bool is_xmpp = false;
|
357
|
+
size_t rewind;
|
358
|
+
size_t max_rewind;
|
359
|
+
size_t np = 0;
|
360
|
+
|
361
|
+
if (offset >= remaining)
|
362
|
+
break;
|
310
363
|
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
size_t size = text->as.literal.len;
|
315
|
-
bool auto_mailto = true;
|
316
|
-
bool is_xmpp = false;
|
317
|
-
int rewind, max_rewind,
|
318
|
-
nb = 0, np = 0, ns = 0;
|
364
|
+
at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
|
365
|
+
if (!at)
|
366
|
+
break;
|
319
367
|
|
320
|
-
|
321
|
-
return;
|
368
|
+
max_rewind = at - (data + start + offset);
|
322
369
|
|
323
|
-
|
324
|
-
|
370
|
+
found_at:
|
371
|
+
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
372
|
+
uint8_t c = data[start + offset + max_rewind - rewind - 1];
|
325
373
|
|
326
|
-
|
327
|
-
|
328
|
-
return;
|
374
|
+
if (cmark_isalnum(c))
|
375
|
+
continue;
|
329
376
|
|
330
|
-
|
331
|
-
|
332
|
-
size -= max_rewind;
|
377
|
+
if (strchr(".+-_", c) != NULL)
|
378
|
+
continue;
|
333
379
|
|
334
|
-
|
335
|
-
|
380
|
+
if (strchr(":", c) != NULL) {
|
381
|
+
if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
382
|
+
auto_mailto = false;
|
383
|
+
continue;
|
384
|
+
}
|
385
|
+
|
386
|
+
if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
387
|
+
auto_mailto = false;
|
388
|
+
is_xmpp = true;
|
389
|
+
continue;
|
390
|
+
}
|
391
|
+
}
|
336
392
|
|
337
|
-
|
338
|
-
|
393
|
+
break;
|
394
|
+
}
|
339
395
|
|
340
|
-
if (
|
396
|
+
if (rewind == 0) {
|
397
|
+
offset += max_rewind + 1;
|
341
398
|
continue;
|
399
|
+
}
|
342
400
|
|
343
|
-
|
344
|
-
|
345
|
-
|
401
|
+
assert(data[start + offset + max_rewind] == '@');
|
402
|
+
for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
|
403
|
+
uint8_t c = data[start + offset + max_rewind + link_end];
|
404
|
+
|
405
|
+
if (cmark_isalnum(c))
|
346
406
|
continue;
|
347
|
-
}
|
348
407
|
|
349
|
-
if (
|
350
|
-
|
351
|
-
|
408
|
+
if (c == '@') {
|
409
|
+
// Found another '@', so go back and try again with an updated offset and max_rewind.
|
410
|
+
offset += max_rewind + 1;
|
411
|
+
max_rewind = link_end - 1;
|
412
|
+
goto found_at;
|
413
|
+
} else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
|
414
|
+
cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
|
415
|
+
np++;
|
416
|
+
else if (c == '/' && is_xmpp)
|
352
417
|
continue;
|
353
|
-
|
418
|
+
else if (c != '-' && c != '_')
|
419
|
+
break;
|
354
420
|
}
|
355
421
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
}
|
422
|
+
if (link_end < 2 || np == 0 ||
|
423
|
+
(!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
|
424
|
+
data[start + offset + max_rewind + link_end - 1] != '.')) {
|
425
|
+
offset += max_rewind + link_end;
|
426
|
+
continue;
|
427
|
+
}
|
363
428
|
|
364
|
-
|
365
|
-
uint8_t c = data[link_end];
|
429
|
+
link_end = autolink_delim(data + start + offset + max_rewind, link_end);
|
366
430
|
|
367
|
-
if (
|
431
|
+
if (link_end == 0) {
|
432
|
+
offset += max_rewind + 1;
|
368
433
|
continue;
|
434
|
+
}
|
369
435
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
436
|
+
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
437
|
+
if (parser->options & CMARK_OPT_AUTOLINK_CLASS_NAME) {
|
438
|
+
cmark_node_set_syntax_extension(link_node, ext);
|
439
|
+
}
|
440
|
+
cmark_strbuf buf;
|
441
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
442
|
+
if (auto_mailto)
|
443
|
+
cmark_strbuf_puts(&buf, "mailto:");
|
444
|
+
cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
|
445
|
+
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
446
|
+
|
447
|
+
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
448
|
+
cmark_chunk email = cmark_chunk_dup(
|
449
|
+
&detached_chunk,
|
450
|
+
(bufsize_t)(start + offset + max_rewind - rewind),
|
451
|
+
(bufsize_t)(link_end + rewind));
|
452
|
+
cmark_chunk_to_cstr(parser->mem, &email);
|
453
|
+
link_text->as.literal = email;
|
454
|
+
cmark_node_append_child(link_node, link_text);
|
379
455
|
|
380
|
-
|
381
|
-
(!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
|
382
|
-
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
|
383
|
-
return;
|
384
|
-
}
|
456
|
+
cmark_node_insert_after(text, link_node);
|
385
457
|
|
386
|
-
|
458
|
+
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
459
|
+
post->as.literal = cmark_chunk_dup(&detached_chunk,
|
460
|
+
(bufsize_t)(start + offset + max_rewind + link_end),
|
461
|
+
(bufsize_t)(remaining - offset - max_rewind - link_end));
|
387
462
|
|
388
|
-
|
389
|
-
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
|
390
|
-
return;
|
391
|
-
}
|
463
|
+
cmark_node_insert_after(link_node, post);
|
392
464
|
|
393
|
-
|
465
|
+
text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
|
466
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
394
467
|
|
395
|
-
|
396
|
-
|
397
|
-
|
468
|
+
text = post;
|
469
|
+
start += offset + max_rewind + link_end;
|
470
|
+
remaining -= offset + max_rewind + link_end;
|
471
|
+
offset = 0;
|
398
472
|
}
|
399
|
-
cmark_strbuf buf;
|
400
|
-
cmark_strbuf_init(parser->mem, &buf, 10);
|
401
|
-
if (auto_mailto)
|
402
|
-
cmark_strbuf_puts(&buf, "mailto:");
|
403
|
-
cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
|
404
|
-
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
405
|
-
|
406
|
-
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
407
|
-
cmark_chunk email = cmark_chunk_dup(
|
408
|
-
&text->as.literal,
|
409
|
-
offset + max_rewind - rewind,
|
410
|
-
(bufsize_t)(link_end + rewind));
|
411
|
-
cmark_chunk_to_cstr(parser->mem, &email);
|
412
|
-
link_text->as.literal = email;
|
413
|
-
cmark_node_append_child(link_node, link_text);
|
414
473
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
post->as.literal = cmark_chunk_dup(&text->as.literal,
|
419
|
-
(bufsize_t)(offset + max_rewind + link_end),
|
420
|
-
(bufsize_t)(size - link_end));
|
421
|
-
cmark_chunk_to_cstr(parser->mem, &post->as.literal);
|
422
|
-
|
423
|
-
cmark_node_insert_after(link_node, post);
|
424
|
-
|
425
|
-
text->as.literal.len = offset + max_rewind - rewind;
|
426
|
-
text->as.literal.data[text->as.literal.len] = 0;
|
474
|
+
// Convert the reference to allocated memory.
|
475
|
+
assert(!text->as.literal.alloc);
|
476
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
427
477
|
|
428
|
-
|
478
|
+
// Free the detached buffer.
|
479
|
+
cmark_chunk_free(parser->mem, &detached_chunk);
|
429
480
|
}
|
430
|
-
|
431
481
|
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
|
432
482
|
cmark_iter *iter;
|
433
483
|
cmark_event_type ev;
|
@@ -452,7 +502,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
|
|
452
502
|
}
|
453
503
|
|
454
504
|
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
|
455
|
-
postprocess_text(parser, node,
|
505
|
+
postprocess_text(parser, node, ext);
|
456
506
|
}
|
457
507
|
}
|
458
508
|
|
data/ext/qiita_marker/blocks.c
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#include <stdlib.h>
|
9
9
|
#include <assert.h>
|
10
10
|
#include <stdio.h>
|
11
|
+
#include <limits.h>
|
11
12
|
|
12
13
|
#include "cmark_ctype.h"
|
13
14
|
#include "syntax_extension.h"
|
@@ -27,6 +28,14 @@
|
|
27
28
|
#define CODE_INDENT 4
|
28
29
|
#define TAB_STOP 4
|
29
30
|
|
31
|
+
/**
|
32
|
+
* Very deeply nested lists can cause quadratic performance issues.
|
33
|
+
* This constant is used in open_new_blocks() to limit the nesting
|
34
|
+
* depth. It is unlikely that a non-contrived markdown document will
|
35
|
+
* be nested this deeply.
|
36
|
+
*/
|
37
|
+
#define MAX_LIST_DEPTH 100
|
38
|
+
|
30
39
|
#ifndef MIN
|
31
40
|
#define MIN(x, y) ((x < y) ? x : y)
|
32
41
|
#endif
|
@@ -642,6 +651,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
|
|
642
651
|
}
|
643
652
|
|
644
653
|
finalize(parser, parser->root);
|
654
|
+
|
655
|
+
// Limit total size of extra content created from reference links to
|
656
|
+
// document size to avoid superlinear growth. Always allow 100KB.
|
657
|
+
if (parser->total_size > 100000)
|
658
|
+
parser->refmap->max_ref_size = parser->total_size;
|
659
|
+
else
|
660
|
+
parser->refmap->max_ref_size = 100000;
|
661
|
+
|
645
662
|
process_inlines(parser, parser->refmap, parser->options);
|
646
663
|
if (parser->options & CMARK_OPT_FOOTNOTES)
|
647
664
|
process_footnotes(parser);
|
@@ -701,6 +718,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
|
|
701
718
|
const unsigned char *end = buffer + len;
|
702
719
|
static const uint8_t repl[] = {239, 191, 189};
|
703
720
|
|
721
|
+
if (len > UINT_MAX - parser->total_size)
|
722
|
+
parser->total_size = UINT_MAX;
|
723
|
+
else
|
724
|
+
parser->total_size += len;
|
725
|
+
|
704
726
|
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
|
705
727
|
// skip NL if last buffer ended with CR ; see #117
|
706
728
|
buffer++;
|
@@ -1108,10 +1130,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
|
1108
1130
|
bool has_content;
|
1109
1131
|
int save_offset;
|
1110
1132
|
int save_column;
|
1133
|
+
size_t depth = 0;
|
1111
1134
|
|
1112
1135
|
while (cont_type != CMARK_NODE_CODE_BLOCK &&
|
1113
1136
|
cont_type != CMARK_NODE_HTML_BLOCK) {
|
1114
|
-
|
1137
|
+
depth++;
|
1115
1138
|
S_find_first_nonspace(parser, input);
|
1116
1139
|
indented = parser->indent >= CODE_INDENT;
|
1117
1140
|
|
@@ -1213,6 +1236,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
|
1213
1236
|
(*container)->internal_offset = matched;
|
1214
1237
|
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
|
1215
1238
|
parser->indent < 4 &&
|
1239
|
+
depth < MAX_LIST_DEPTH &&
|
1216
1240
|
(matched = parse_list_marker(
|
1217
1241
|
parser->mem, input, parser->first_nonspace,
|
1218
1242
|
(*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
|
@@ -6,45 +6,45 @@ extern "C" {
|
|
6
6
|
#endif
|
7
7
|
|
8
8
|
#include "cmark-gfm-extension_api.h"
|
9
|
-
#include "cmark-
|
10
|
-
#include
|
9
|
+
#include "cmark-gfm_export.h"
|
10
|
+
#include <stdbool.h>
|
11
11
|
#include <stdint.h>
|
12
12
|
|
13
|
-
|
13
|
+
CMARK_GFM_EXPORT
|
14
14
|
void cmark_gfm_core_extensions_ensure_registered(void);
|
15
15
|
|
16
|
-
|
16
|
+
CMARK_GFM_EXPORT
|
17
17
|
uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
|
18
18
|
|
19
19
|
/** Sets the number of columns for the table, returning 1 on success and 0 on error.
|
20
20
|
*/
|
21
|
-
|
21
|
+
CMARK_GFM_EXPORT
|
22
22
|
int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
|
23
23
|
|
24
|
-
|
24
|
+
CMARK_GFM_EXPORT
|
25
25
|
uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
|
26
26
|
|
27
27
|
/** Sets the alignments for the table, returning 1 on success and 0 on error.
|
28
28
|
*/
|
29
|
-
|
29
|
+
CMARK_GFM_EXPORT
|
30
30
|
int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
|
31
31
|
|
32
|
-
|
32
|
+
CMARK_GFM_EXPORT
|
33
33
|
int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
|
34
34
|
|
35
35
|
/** Sets whether the node is a table header row, returning 1 on success and 0 on error.
|
36
36
|
*/
|
37
|
-
|
37
|
+
CMARK_GFM_EXPORT
|
38
38
|
int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
|
39
39
|
|
40
|
-
|
40
|
+
CMARK_GFM_EXPORT
|
41
41
|
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
|
42
42
|
/* For backwards compatibility */
|
43
43
|
#define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
|
44
44
|
|
45
45
|
/** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
|
46
46
|
*/
|
47
|
-
|
47
|
+
CMARK_GFM_EXPORT
|
48
48
|
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
|
49
49
|
|
50
50
|
#ifdef __cplusplus
|