qiita_marker 0.23.6.2 → 0.23.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33ffa83efbb04e12670414e88754dd011545a84e3f33d8b6cd485e9cb93852d6
4
- data.tar.gz: bc620d5b55aeb110636550b01a0bbe2a49c3e18ba1d8023086cdaab4cb1e4b68
3
+ metadata.gz: 32eb3d3a4da3ce6d72df6417f6b307e2117958939163446e38ded1a248c495fb
4
+ data.tar.gz: f3daa3a44fb40d856ce1994c94a1db9ea93491a2c02ba3d61bdca74694b87331
5
5
  SHA512:
6
- metadata.gz: 33a5fc0e5676ae1669b16fefe6f6afff49b87fcc5276371ad5aeac833385df71e93413f39287109430de433fb3b874192a559a6783090f1b8d786ac115fd70b2
7
- data.tar.gz: 7fd8db1b4369982a7206a4be403843dfc765893dacf9703a349719f379ff6cd547b1897799b4421cd7518a3f423dc03ada44636de86d0b7003cb55eadcc863ae
6
+ metadata.gz: 8cd8a157a896d9fc67ed1810cd1964ac967b4d6034d0db3009772c2429ea5ee11929689bc5bbdddeff0c30a8f032bcbb71c4cb3b0d81bee8daf08acd0732e72d
7
+ data.tar.gz: 03010f6c8bfec94564d01d5cec3f301552b63366bd0b8748d10a5b4b1cb2921d457ce1ca9e60fbedc41375cc89fc1a81ffc64c88a8c1952c6547c95904c46a3c
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
68
68
  const size_t align = sizeof(size_t) - 1;
69
69
  sz = (sz + align) & ~align;
70
70
 
71
+ struct arena_chunk *chunk;
71
72
  if (sz > A->sz) {
72
- A->prev = alloc_arena_chunk(sz, A->prev);
73
- return (uint8_t *) A->prev->ptr + sizeof(size_t);
73
+ A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74
+ } else if (sz > A->sz - A->used) {
75
+ A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76
+ } else {
77
+ chunk = A;
74
78
  }
75
- if (sz > A->sz - A->used) {
76
- A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77
- }
78
- void *ptr = (uint8_t *) A->ptr + A->used;
79
- A->used += sz;
79
+ void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80
+ chunk->used += sz;
80
81
  *((size_t *) ptr) = sz - sizeof(size_t);
81
82
  return (uint8_t *) ptr + sizeof(size_t);
82
83
  }
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
98
99
 
99
100
  cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100
101
 
101
- cmark_mem *cmark_get_arena_mem_allocator() {
102
+ cmark_mem *cmark_get_arena_mem_allocator(void) {
102
103
  return &CMARK_ARENA_MEM_ALLOCATOR;
103
104
  }
@@ -6,6 +6,7 @@
6
6
  #include <render.h>
7
7
  #include <string.h>
8
8
  #include <utf8.h>
9
+ #include <stddef.h>
9
10
 
10
11
  #if defined(_WIN32)
11
12
  #define strncasecmp _strnicmp
@@ -44,44 +45,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
44
45
  }
45
46
 
46
47
  static size_t autolink_delim(uint8_t *data, size_t link_end) {
47
- uint8_t cclose, copen;
48
48
  size_t i;
49
+ size_t closing = 0;
50
+ size_t opening = 0;
49
51
 
50
- for (i = 0; i < link_end; ++i)
51
- if (data[i] == '<') {
52
+ for (i = 0; i < link_end; ++i) {
53
+ const uint8_t c = data[i];
54
+ if (c == '<') {
52
55
  link_end = i;
53
56
  break;
57
+ } else if (c == '(') {
58
+ opening++;
59
+ } else if (c == ')') {
60
+ closing++;
54
61
  }
62
+ }
55
63
 
56
64
  while (link_end > 0) {
57
- cclose = data[link_end - 1];
58
-
59
- switch (cclose) {
65
+ switch (data[link_end - 1]) {
60
66
  case ')':
61
- copen = '(';
62
- break;
63
- default:
64
- copen = 0;
65
- }
66
-
67
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
68
- link_end--;
69
-
70
- else if (data[link_end - 1] == ';') {
71
- size_t new_end = link_end - 2;
72
-
73
- while (new_end > 0 && cmark_isalpha(data[new_end]))
74
- new_end--;
75
-
76
- if (new_end < link_end - 2 && data[new_end] == '&')
77
- link_end = new_end;
78
- else
79
- link_end--;
80
- } else if (copen != 0) {
81
- size_t closing = 0;
82
- size_t opening = 0;
83
- i = 0;
84
-
85
67
  /* Allow any number of matching brackets (as recognised in copen/cclose)
86
68
  * at the end of the URL. If there is a greater number of closing
87
69
  * brackets than opening ones, we remove one character from the end of
@@ -89,34 +71,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
89
71
  *
90
72
  * Examples (input text => output linked portion):
91
73
  *
92
- * http://www.pokemon.com/Pikachu_(Electric)
93
- * => http://www.pokemon.com/Pikachu_(Electric)
74
+ * http://www.pokemon.com/Pikachu_(Electric)
75
+ * => http://www.pokemon.com/Pikachu_(Electric)
94
76
  *
95
- * http://www.pokemon.com/Pikachu_((Electric)
96
- * => http://www.pokemon.com/Pikachu_((Electric)
77
+ * http://www.pokemon.com/Pikachu_((Electric)
78
+ * => http://www.pokemon.com/Pikachu_((Electric)
97
79
  *
98
- * http://www.pokemon.com/Pikachu_(Electric))
99
- * => http://www.pokemon.com/Pikachu_(Electric)
80
+ * http://www.pokemon.com/Pikachu_(Electric))
81
+ * => http://www.pokemon.com/Pikachu_(Electric)
100
82
  *
101
- * http://www.pokemon.com/Pikachu_((Electric))
102
- * => http://www.pokemon.com/Pikachu_((Electric))
83
+ * http://www.pokemon.com/Pikachu_((Electric))
84
+ * => http://www.pokemon.com/Pikachu_((Electric))
103
85
  */
104
-
105
- while (i < link_end) {
106
- if (data[i] == copen)
107
- opening++;
108
- else if (data[i] == cclose)
109
- closing++;
110
-
111
- i++;
86
+ if (closing <= opening) {
87
+ return link_end;
112
88
  }
89
+ closing--;
90
+ link_end--;
91
+ break;
92
+ case '?':
93
+ case '!':
94
+ case '.':
95
+ case ',':
96
+ case ':':
97
+ case '*':
98
+ case '_':
99
+ case '~':
100
+ case '\'':
101
+ case '"':
102
+ link_end--;
103
+ break;
104
+ case ';': {
105
+ size_t new_end = link_end - 2;
113
106
 
114
- if (closing <= opening)
115
- break;
107
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
108
+ new_end--;
116
109
 
117
- link_end--;
118
- } else
110
+ if (new_end < link_end - 2 && data[new_end] == '&')
111
+ link_end = new_end;
112
+ else
113
+ link_end--;
119
114
  break;
115
+ }
116
+
117
+ default:
118
+ return link_end;
119
+ }
120
120
  }
121
121
 
122
122
  return link_end;
@@ -125,7 +125,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
125
125
  static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
126
126
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;
127
127
 
128
+ /* The purpose of this code is to reject urls that contain an underscore
129
+ * in one of the last two segments. Examples:
130
+ *
131
+ * www.xxx.yyy.zzz autolinked
132
+ * www.xxx.yyy._zzz not autolinked
133
+ * www.xxx._yyy.zzz not autolinked
134
+ * www._xxx.yyy.zzz autolinked
135
+ *
136
+ * The reason is that domain names are allowed to include underscores,
137
+ * but host names are not. See: https://stackoverflow.com/a/2183140
138
+ */
128
139
  for (i = 1; i < size - 1; i++) {
140
+ if (data[i] == '\\' && i < size - 2)
141
+ i++;
129
142
  if (data[i] == '_')
130
143
  uscore2++;
131
144
  else if (data[i] == '.') {
@@ -136,8 +149,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
136
149
  break;
137
150
  }
138
151
 
139
- if (uscore1 > 0 || uscore2 > 0)
140
- return 0;
152
+ if (uscore1 > 0 || uscore2 > 0) {
153
+ /* If the url is very long then accept it despite the underscores,
154
+ * to avoid quadratic behavior causing a denial of service. See:
155
+ * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
156
+ * Reasonable urls are unlikely to have more than 10 segments, so
157
+ * this extra condition shouldn't have any impact on normal usage.
158
+ */
159
+ if (np <= 10) {
160
+ return 0;
161
+ }
162
+ }
141
163
 
142
164
  if (allow_short) {
143
165
  /* We don't need a valid domain in the strict sense (with
@@ -175,7 +197,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
175
197
  if (link_end == 0)
176
198
  return NULL;
177
199
 
178
- while (link_end < size && !cmark_isspace(data[link_end]))
200
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
179
201
  link_end++;
180
202
 
181
203
  link_end = autolink_delim(data, link_end);
@@ -239,7 +261,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
239
261
  return 0;
240
262
 
241
263
  link_end += domain_len;
242
- while (link_end < size && !cmark_isspace(data[link_end]))
264
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
243
265
  link_end++;
244
266
 
245
267
  link_end = autolink_delim(data, link_end);
@@ -262,6 +284,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
262
284
  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
263
285
  text->as.literal = url;
264
286
  cmark_node_append_child(node, text);
287
+
288
+ node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
289
+
290
+ node->start_column = text->start_column = max_rewind - rewind;
291
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
265
292
 
266
293
  return node;
267
294
  }
@@ -286,148 +313,171 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
286
313
  // inline was finished in inlines.c.
287
314
  }
288
315
 
289
- static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
316
+ static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
290
317
  size_t len = strlen(protocol);
291
318
 
319
+ if (len > (max_rewind - rewind)) {
320
+ return false;
321
+ }
322
+
292
323
  // Check that the protocol matches
293
- for (int i = 1; i <= len; i++) {
294
- if (data[-rewind - i] != protocol[len - i]) {
295
- return false;
296
- }
324
+ if (memcmp(data - rewind - len, protocol, len) != 0) {
325
+ return false;
326
+ }
327
+
328
+ if (len == (max_rewind - rewind)) {
329
+ return true;
297
330
  }
298
331
 
299
- char prev_char = data[-rewind - len - 1];
332
+ char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
300
333
 
301
334
  // Make sure the character before the protocol is non-alphanumeric
302
335
  return !cmark_isalnum(prev_char);
303
336
  }
304
337
 
305
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset,
306
- int depth, cmark_syntax_extension *ext) {
307
- // postprocess_text can recurse very deeply if there is a very long line of
308
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
309
- if (depth > 1000) return;
338
+ static void postprocess_text(cmark_parser *parser, cmark_node *text, cmark_syntax_extension *ext) {
339
+ size_t start = 0;
340
+ size_t offset = 0;
341
+ // `text` is going to be split into a list of nodes containing shorter segments
342
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
343
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
344
+ // the references into allocated buffers. The detached buffer is freed before we
345
+ // return.
346
+ cmark_chunk detached_chunk = text->as.literal;
347
+ text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
348
+
349
+ uint8_t *data = text->as.literal.data;
350
+ size_t remaining = text->as.literal.len;
351
+
352
+ while (true) {
353
+ size_t link_end;
354
+ uint8_t *at;
355
+ bool auto_mailto = true;
356
+ bool is_xmpp = false;
357
+ size_t rewind;
358
+ size_t max_rewind;
359
+ size_t np = 0;
360
+
361
+ if (offset >= remaining)
362
+ break;
310
363
 
311
- size_t link_end;
312
- uint8_t *data = text->as.literal.data,
313
- *at;
314
- size_t size = text->as.literal.len;
315
- bool auto_mailto = true;
316
- bool is_xmpp = false;
317
- int rewind, max_rewind,
318
- nb = 0, np = 0, ns = 0;
364
+ at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
365
+ if (!at)
366
+ break;
319
367
 
320
- if (offset < 0 || (size_t)offset >= size)
321
- return;
368
+ max_rewind = at - (data + start + offset);
322
369
 
323
- data += offset;
324
- size -= offset;
370
+ found_at:
371
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
372
+ uint8_t c = data[start + offset + max_rewind - rewind - 1];
325
373
 
326
- at = (uint8_t *)memchr(data, '@', size);
327
- if (!at)
328
- return;
374
+ if (cmark_isalnum(c))
375
+ continue;
329
376
 
330
- max_rewind = (int)(at - data);
331
- data += max_rewind;
332
- size -= max_rewind;
377
+ if (strchr(".+-_", c) != NULL)
378
+ continue;
333
379
 
334
- for (rewind = 0; rewind < max_rewind; ++rewind) {
335
- uint8_t c = data[-rewind - 1];
380
+ if (strchr(":", c) != NULL) {
381
+ if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
382
+ auto_mailto = false;
383
+ continue;
384
+ }
385
+
386
+ if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
387
+ auto_mailto = false;
388
+ is_xmpp = true;
389
+ continue;
390
+ }
391
+ }
336
392
 
337
- if (cmark_isalnum(c))
338
- continue;
393
+ break;
394
+ }
339
395
 
340
- if (strchr(".+-_", c) != NULL)
396
+ if (rewind == 0) {
397
+ offset += max_rewind + 1;
341
398
  continue;
399
+ }
342
400
 
343
- if (strchr(":", c) != NULL) {
344
- if (validate_protocol("mailto:", data, rewind)) {
345
- auto_mailto = false;
401
+ assert(data[start + offset + max_rewind] == '@');
402
+ for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
403
+ uint8_t c = data[start + offset + max_rewind + link_end];
404
+
405
+ if (cmark_isalnum(c))
346
406
  continue;
347
- }
348
407
 
349
- if (validate_protocol("xmpp:", data, rewind)) {
350
- auto_mailto = false;
351
- is_xmpp = true;
408
+ if (c == '@') {
409
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
410
+ offset += max_rewind + 1;
411
+ max_rewind = link_end - 1;
412
+ goto found_at;
413
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
414
+ cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
415
+ np++;
416
+ else if (c == '/' && is_xmpp)
352
417
  continue;
353
- }
418
+ else if (c != '-' && c != '_')
419
+ break;
354
420
  }
355
421
 
356
- break;
357
- }
358
-
359
- if (rewind == 0 || ns > 0) {
360
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
361
- return;
362
- }
422
+ if (link_end < 2 || np == 0 ||
423
+ (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
424
+ data[start + offset + max_rewind + link_end - 1] != '.')) {
425
+ offset += max_rewind + link_end;
426
+ continue;
427
+ }
363
428
 
364
- for (link_end = 0; link_end < size; ++link_end) {
365
- uint8_t c = data[link_end];
429
+ link_end = autolink_delim(data + start + offset + max_rewind, link_end);
366
430
 
367
- if (cmark_isalnum(c))
431
+ if (link_end == 0) {
432
+ offset += max_rewind + 1;
368
433
  continue;
434
+ }
369
435
 
370
- if (c == '@')
371
- nb++;
372
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
373
- np++;
374
- else if (c == '/' && is_xmpp)
375
- continue;
376
- else if (c != '-' && c != '_')
377
- break;
378
- }
436
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
437
+ if (parser->options & CMARK_OPT_AUTOLINK_CLASS_NAME) {
438
+ cmark_node_set_syntax_extension(link_node, ext);
439
+ }
440
+ cmark_strbuf buf;
441
+ cmark_strbuf_init(parser->mem, &buf, 10);
442
+ if (auto_mailto)
443
+ cmark_strbuf_puts(&buf, "mailto:");
444
+ cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
445
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
446
+
447
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
448
+ cmark_chunk email = cmark_chunk_dup(
449
+ &detached_chunk,
450
+ (bufsize_t)(start + offset + max_rewind - rewind),
451
+ (bufsize_t)(link_end + rewind));
452
+ cmark_chunk_to_cstr(parser->mem, &email);
453
+ link_text->as.literal = email;
454
+ cmark_node_append_child(link_node, link_text);
379
455
 
380
- if (link_end < 2 || nb != 1 || np == 0 ||
381
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
382
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
383
- return;
384
- }
456
+ cmark_node_insert_after(text, link_node);
385
457
 
386
- link_end = autolink_delim(data, link_end);
458
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
459
+ post->as.literal = cmark_chunk_dup(&detached_chunk,
460
+ (bufsize_t)(start + offset + max_rewind + link_end),
461
+ (bufsize_t)(remaining - offset - max_rewind - link_end));
387
462
 
388
- if (link_end == 0) {
389
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
390
- return;
391
- }
463
+ cmark_node_insert_after(link_node, post);
392
464
 
393
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
465
+ text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
466
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
394
467
 
395
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
396
- if (parser->options & CMARK_OPT_AUTOLINK_CLASS_NAME) {
397
- cmark_node_set_syntax_extension(link_node, ext);
468
+ text = post;
469
+ start += offset + max_rewind + link_end;
470
+ remaining -= offset + max_rewind + link_end;
471
+ offset = 0;
398
472
  }
399
- cmark_strbuf buf;
400
- cmark_strbuf_init(parser->mem, &buf, 10);
401
- if (auto_mailto)
402
- cmark_strbuf_puts(&buf, "mailto:");
403
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
404
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
405
-
406
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
407
- cmark_chunk email = cmark_chunk_dup(
408
- &text->as.literal,
409
- offset + max_rewind - rewind,
410
- (bufsize_t)(link_end + rewind));
411
- cmark_chunk_to_cstr(parser->mem, &email);
412
- link_text->as.literal = email;
413
- cmark_node_append_child(link_node, link_text);
414
473
 
415
- cmark_node_insert_after(text, link_node);
416
-
417
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
418
- post->as.literal = cmark_chunk_dup(&text->as.literal,
419
- (bufsize_t)(offset + max_rewind + link_end),
420
- (bufsize_t)(size - link_end));
421
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
422
-
423
- cmark_node_insert_after(link_node, post);
424
-
425
- text->as.literal.len = offset + max_rewind - rewind;
426
- text->as.literal.data[text->as.literal.len] = 0;
474
+ // Convert the reference to allocated memory.
475
+ assert(!text->as.literal.alloc);
476
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
427
477
 
428
- postprocess_text(parser, post, 0, depth + 1, ext);
478
+ // Free the detached buffer.
479
+ cmark_chunk_free(parser->mem, &detached_chunk);
429
480
  }
430
-
431
481
  static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
432
482
  cmark_iter *iter;
433
483
  cmark_event_type ev;
@@ -452,7 +502,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
452
502
  }
453
503
 
454
504
  if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
455
- postprocess_text(parser, node, 0, /*depth*/ 0, ext);
505
+ postprocess_text(parser, node, ext);
456
506
  }
457
507
  }
458
508
 
@@ -8,6 +8,7 @@
8
8
  #include <stdlib.h>
9
9
  #include <assert.h>
10
10
  #include <stdio.h>
11
+ #include <limits.h>
11
12
 
12
13
  #include "cmark_ctype.h"
13
14
  #include "syntax_extension.h"
@@ -27,6 +28,14 @@
27
28
  #define CODE_INDENT 4
28
29
  #define TAB_STOP 4
29
30
 
31
+ /**
32
+ * Very deeply nested lists can cause quadratic performance issues.
33
+ * This constant is used in open_new_blocks() to limit the nesting
34
+ * depth. It is unlikely that a non-contrived markdown document will
35
+ * be nested this deeply.
36
+ */
37
+ #define MAX_LIST_DEPTH 100
38
+
30
39
  #ifndef MIN
31
40
  #define MIN(x, y) ((x < y) ? x : y)
32
41
  #endif
@@ -642,6 +651,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
642
651
  }
643
652
 
644
653
  finalize(parser, parser->root);
654
+
655
+ // Limit total size of extra content created from reference links to
656
+ // document size to avoid superlinear growth. Always allow 100KB.
657
+ if (parser->total_size > 100000)
658
+ parser->refmap->max_ref_size = parser->total_size;
659
+ else
660
+ parser->refmap->max_ref_size = 100000;
661
+
645
662
  process_inlines(parser, parser->refmap, parser->options);
646
663
  if (parser->options & CMARK_OPT_FOOTNOTES)
647
664
  process_footnotes(parser);
@@ -701,6 +718,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
701
718
  const unsigned char *end = buffer + len;
702
719
  static const uint8_t repl[] = {239, 191, 189};
703
720
 
721
+ if (len > UINT_MAX - parser->total_size)
722
+ parser->total_size = UINT_MAX;
723
+ else
724
+ parser->total_size += len;
725
+
704
726
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
705
727
  // skip NL if last buffer ended with CR ; see #117
706
728
  buffer++;
@@ -1108,10 +1130,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1108
1130
  bool has_content;
1109
1131
  int save_offset;
1110
1132
  int save_column;
1133
+ size_t depth = 0;
1111
1134
 
1112
1135
  while (cont_type != CMARK_NODE_CODE_BLOCK &&
1113
1136
  cont_type != CMARK_NODE_HTML_BLOCK) {
1114
-
1137
+ depth++;
1115
1138
  S_find_first_nonspace(parser, input);
1116
1139
  indented = parser->indent >= CODE_INDENT;
1117
1140
 
@@ -1213,6 +1236,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1213
1236
  (*container)->internal_offset = matched;
1214
1237
  } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1215
1238
  parser->indent < 4 &&
1239
+ depth < MAX_LIST_DEPTH &&
1216
1240
  (matched = parse_list_marker(
1217
1241
  parser->mem, input, parser->first_nonspace,
1218
1242
  (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
@@ -6,45 +6,45 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm-extension_api.h"
9
- #include "cmark-gfm-extensions_export.h"
10
- #include "config.h" // for bool
9
+ #include "cmark-gfm_export.h"
10
+ #include <stdbool.h>
11
11
  #include <stdint.h>
12
12
 
13
- CMARK_GFM_EXTENSIONS_EXPORT
13
+ CMARK_GFM_EXPORT
14
14
  void cmark_gfm_core_extensions_ensure_registered(void);
15
15
 
16
- CMARK_GFM_EXTENSIONS_EXPORT
16
+ CMARK_GFM_EXPORT
17
17
  uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
18
18
 
19
19
  /** Sets the number of columns for the table, returning 1 on success and 0 on error.
20
20
  */
21
- CMARK_GFM_EXTENSIONS_EXPORT
21
+ CMARK_GFM_EXPORT
22
22
  int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
23
23
 
24
- CMARK_GFM_EXTENSIONS_EXPORT
24
+ CMARK_GFM_EXPORT
25
25
  uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
26
26
 
27
27
  /** Sets the alignments for the table, returning 1 on success and 0 on error.
28
28
  */
29
- CMARK_GFM_EXTENSIONS_EXPORT
29
+ CMARK_GFM_EXPORT
30
30
  int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
31
31
 
32
- CMARK_GFM_EXTENSIONS_EXPORT
32
+ CMARK_GFM_EXPORT
33
33
  int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
34
34
 
35
35
  /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
36
36
  */
37
- CMARK_GFM_EXTENSIONS_EXPORT
37
+ CMARK_GFM_EXPORT
38
38
  int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
39
39
 
40
- CMARK_GFM_EXTENSIONS_EXPORT
40
+ CMARK_GFM_EXPORT
41
41
  bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
42
42
  /* For backwards compatibility */
43
43
  #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
44
44
 
45
45
  /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
46
46
  */
47
- CMARK_GFM_EXTENSIONS_EXPORT
47
+ CMARK_GFM_EXPORT
48
48
  int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
49
49
 
50
50
  #ifdef __cplusplus
@@ -114,6 +114,7 @@ typedef struct delimiter {
114
114
  struct delimiter *previous;
115
115
  struct delimiter *next;
116
116
  cmark_node *inl_text;
117
+ bufsize_t position;
117
118
  bufsize_t length;
118
119
  unsigned char delim_char;
119
120
  int can_open;