qiita_marker 0.23.6.2 → 0.23.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33ffa83efbb04e12670414e88754dd011545a84e3f33d8b6cd485e9cb93852d6
4
- data.tar.gz: bc620d5b55aeb110636550b01a0bbe2a49c3e18ba1d8023086cdaab4cb1e4b68
3
+ metadata.gz: 32eb3d3a4da3ce6d72df6417f6b307e2117958939163446e38ded1a248c495fb
4
+ data.tar.gz: f3daa3a44fb40d856ce1994c94a1db9ea93491a2c02ba3d61bdca74694b87331
5
5
  SHA512:
6
- metadata.gz: 33a5fc0e5676ae1669b16fefe6f6afff49b87fcc5276371ad5aeac833385df71e93413f39287109430de433fb3b874192a559a6783090f1b8d786ac115fd70b2
7
- data.tar.gz: 7fd8db1b4369982a7206a4be403843dfc765893dacf9703a349719f379ff6cd547b1897799b4421cd7518a3f423dc03ada44636de86d0b7003cb55eadcc863ae
6
+ metadata.gz: 8cd8a157a896d9fc67ed1810cd1964ac967b4d6034d0db3009772c2429ea5ee11929689bc5bbdddeff0c30a8f032bcbb71c4cb3b0d81bee8daf08acd0732e72d
7
+ data.tar.gz: 03010f6c8bfec94564d01d5cec3f301552b63366bd0b8748d10a5b4b1cb2921d457ce1ca9e60fbedc41375cc89fc1a81ffc64c88a8c1952c6547c95904c46a3c
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
68
68
  const size_t align = sizeof(size_t) - 1;
69
69
  sz = (sz + align) & ~align;
70
70
 
71
+ struct arena_chunk *chunk;
71
72
  if (sz > A->sz) {
72
- A->prev = alloc_arena_chunk(sz, A->prev);
73
- return (uint8_t *) A->prev->ptr + sizeof(size_t);
73
+ A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74
+ } else if (sz > A->sz - A->used) {
75
+ A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76
+ } else {
77
+ chunk = A;
74
78
  }
75
- if (sz > A->sz - A->used) {
76
- A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77
- }
78
- void *ptr = (uint8_t *) A->ptr + A->used;
79
- A->used += sz;
79
+ void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80
+ chunk->used += sz;
80
81
  *((size_t *) ptr) = sz - sizeof(size_t);
81
82
  return (uint8_t *) ptr + sizeof(size_t);
82
83
  }
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
98
99
 
99
100
  cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100
101
 
101
- cmark_mem *cmark_get_arena_mem_allocator() {
102
+ cmark_mem *cmark_get_arena_mem_allocator(void) {
102
103
  return &CMARK_ARENA_MEM_ALLOCATOR;
103
104
  }
@@ -6,6 +6,7 @@
6
6
  #include <render.h>
7
7
  #include <string.h>
8
8
  #include <utf8.h>
9
+ #include <stddef.h>
9
10
 
10
11
  #if defined(_WIN32)
11
12
  #define strncasecmp _strnicmp
@@ -44,44 +45,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
44
45
  }
45
46
 
46
47
  static size_t autolink_delim(uint8_t *data, size_t link_end) {
47
- uint8_t cclose, copen;
48
48
  size_t i;
49
+ size_t closing = 0;
50
+ size_t opening = 0;
49
51
 
50
- for (i = 0; i < link_end; ++i)
51
- if (data[i] == '<') {
52
+ for (i = 0; i < link_end; ++i) {
53
+ const uint8_t c = data[i];
54
+ if (c == '<') {
52
55
  link_end = i;
53
56
  break;
57
+ } else if (c == '(') {
58
+ opening++;
59
+ } else if (c == ')') {
60
+ closing++;
54
61
  }
62
+ }
55
63
 
56
64
  while (link_end > 0) {
57
- cclose = data[link_end - 1];
58
-
59
- switch (cclose) {
65
+ switch (data[link_end - 1]) {
60
66
  case ')':
61
- copen = '(';
62
- break;
63
- default:
64
- copen = 0;
65
- }
66
-
67
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
68
- link_end--;
69
-
70
- else if (data[link_end - 1] == ';') {
71
- size_t new_end = link_end - 2;
72
-
73
- while (new_end > 0 && cmark_isalpha(data[new_end]))
74
- new_end--;
75
-
76
- if (new_end < link_end - 2 && data[new_end] == '&')
77
- link_end = new_end;
78
- else
79
- link_end--;
80
- } else if (copen != 0) {
81
- size_t closing = 0;
82
- size_t opening = 0;
83
- i = 0;
84
-
85
67
  /* Allow any number of matching brackets (as recognised in copen/cclose)
86
68
  * at the end of the URL. If there is a greater number of closing
87
69
  * brackets than opening ones, we remove one character from the end of
@@ -89,34 +71,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
89
71
  *
90
72
  * Examples (input text => output linked portion):
91
73
  *
92
- * http://www.pokemon.com/Pikachu_(Electric)
93
- * => http://www.pokemon.com/Pikachu_(Electric)
74
+ * http://www.pokemon.com/Pikachu_(Electric)
75
+ * => http://www.pokemon.com/Pikachu_(Electric)
94
76
  *
95
- * http://www.pokemon.com/Pikachu_((Electric)
96
- * => http://www.pokemon.com/Pikachu_((Electric)
77
+ * http://www.pokemon.com/Pikachu_((Electric)
78
+ * => http://www.pokemon.com/Pikachu_((Electric)
97
79
  *
98
- * http://www.pokemon.com/Pikachu_(Electric))
99
- * => http://www.pokemon.com/Pikachu_(Electric)
80
+ * http://www.pokemon.com/Pikachu_(Electric))
81
+ * => http://www.pokemon.com/Pikachu_(Electric)
100
82
  *
101
- * http://www.pokemon.com/Pikachu_((Electric))
102
- * => http://www.pokemon.com/Pikachu_((Electric))
83
+ * http://www.pokemon.com/Pikachu_((Electric))
84
+ * => http://www.pokemon.com/Pikachu_((Electric))
103
85
  */
104
-
105
- while (i < link_end) {
106
- if (data[i] == copen)
107
- opening++;
108
- else if (data[i] == cclose)
109
- closing++;
110
-
111
- i++;
86
+ if (closing <= opening) {
87
+ return link_end;
112
88
  }
89
+ closing--;
90
+ link_end--;
91
+ break;
92
+ case '?':
93
+ case '!':
94
+ case '.':
95
+ case ',':
96
+ case ':':
97
+ case '*':
98
+ case '_':
99
+ case '~':
100
+ case '\'':
101
+ case '"':
102
+ link_end--;
103
+ break;
104
+ case ';': {
105
+ size_t new_end = link_end - 2;
113
106
 
114
- if (closing <= opening)
115
- break;
107
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
108
+ new_end--;
116
109
 
117
- link_end--;
118
- } else
110
+ if (new_end < link_end - 2 && data[new_end] == '&')
111
+ link_end = new_end;
112
+ else
113
+ link_end--;
119
114
  break;
115
+ }
116
+
117
+ default:
118
+ return link_end;
119
+ }
120
120
  }
121
121
 
122
122
  return link_end;
@@ -125,7 +125,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
125
125
  static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
126
126
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;
127
127
 
128
+ /* The purpose of this code is to reject urls that contain an underscore
129
+ * in one of the last two segments. Examples:
130
+ *
131
+ * www.xxx.yyy.zzz autolinked
132
+ * www.xxx.yyy._zzz not autolinked
133
+ * www.xxx._yyy.zzz not autolinked
134
+ * www._xxx.yyy.zzz autolinked
135
+ *
136
+ * The reason is that domain names are allowed to include underscores,
137
+ * but host names are not. See: https://stackoverflow.com/a/2183140
138
+ */
128
139
  for (i = 1; i < size - 1; i++) {
140
+ if (data[i] == '\\' && i < size - 2)
141
+ i++;
129
142
  if (data[i] == '_')
130
143
  uscore2++;
131
144
  else if (data[i] == '.') {
@@ -136,8 +149,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
136
149
  break;
137
150
  }
138
151
 
139
- if (uscore1 > 0 || uscore2 > 0)
140
- return 0;
152
+ if (uscore1 > 0 || uscore2 > 0) {
153
+ /* If the url is very long then accept it despite the underscores,
154
+ * to avoid quadratic behavior causing a denial of service. See:
155
+ * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
156
+ * Reasonable urls are unlikely to have more than 10 segments, so
157
+ * this extra condition shouldn't have any impact on normal usage.
158
+ */
159
+ if (np <= 10) {
160
+ return 0;
161
+ }
162
+ }
141
163
 
142
164
  if (allow_short) {
143
165
  /* We don't need a valid domain in the strict sense (with
@@ -175,7 +197,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
175
197
  if (link_end == 0)
176
198
  return NULL;
177
199
 
178
- while (link_end < size && !cmark_isspace(data[link_end]))
200
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
179
201
  link_end++;
180
202
 
181
203
  link_end = autolink_delim(data, link_end);
@@ -239,7 +261,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
239
261
  return 0;
240
262
 
241
263
  link_end += domain_len;
242
- while (link_end < size && !cmark_isspace(data[link_end]))
264
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
243
265
  link_end++;
244
266
 
245
267
  link_end = autolink_delim(data, link_end);
@@ -262,6 +284,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
262
284
  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
263
285
  text->as.literal = url;
264
286
  cmark_node_append_child(node, text);
287
+
288
+ node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
289
+
290
+ node->start_column = text->start_column = max_rewind - rewind;
291
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
265
292
 
266
293
  return node;
267
294
  }
@@ -286,148 +313,171 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
286
313
  // inline was finished in inlines.c.
287
314
  }
288
315
 
289
- static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
316
+ static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
290
317
  size_t len = strlen(protocol);
291
318
 
319
+ if (len > (max_rewind - rewind)) {
320
+ return false;
321
+ }
322
+
292
323
  // Check that the protocol matches
293
- for (int i = 1; i <= len; i++) {
294
- if (data[-rewind - i] != protocol[len - i]) {
295
- return false;
296
- }
324
+ if (memcmp(data - rewind - len, protocol, len) != 0) {
325
+ return false;
326
+ }
327
+
328
+ if (len == (max_rewind - rewind)) {
329
+ return true;
297
330
  }
298
331
 
299
- char prev_char = data[-rewind - len - 1];
332
+ char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
300
333
 
301
334
  // Make sure the character before the protocol is non-alphanumeric
302
335
  return !cmark_isalnum(prev_char);
303
336
  }
304
337
 
305
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset,
306
- int depth, cmark_syntax_extension *ext) {
307
- // postprocess_text can recurse very deeply if there is a very long line of
308
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
309
- if (depth > 1000) return;
338
+ static void postprocess_text(cmark_parser *parser, cmark_node *text, cmark_syntax_extension *ext) {
339
+ size_t start = 0;
340
+ size_t offset = 0;
341
+ // `text` is going to be split into a list of nodes containing shorter segments
342
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
343
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
344
+ // the references into allocated buffers. The detached buffer is freed before we
345
+ // return.
346
+ cmark_chunk detached_chunk = text->as.literal;
347
+ text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
348
+
349
+ uint8_t *data = text->as.literal.data;
350
+ size_t remaining = text->as.literal.len;
351
+
352
+ while (true) {
353
+ size_t link_end;
354
+ uint8_t *at;
355
+ bool auto_mailto = true;
356
+ bool is_xmpp = false;
357
+ size_t rewind;
358
+ size_t max_rewind;
359
+ size_t np = 0;
360
+
361
+ if (offset >= remaining)
362
+ break;
310
363
 
311
- size_t link_end;
312
- uint8_t *data = text->as.literal.data,
313
- *at;
314
- size_t size = text->as.literal.len;
315
- bool auto_mailto = true;
316
- bool is_xmpp = false;
317
- int rewind, max_rewind,
318
- nb = 0, np = 0, ns = 0;
364
+ at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
365
+ if (!at)
366
+ break;
319
367
 
320
- if (offset < 0 || (size_t)offset >= size)
321
- return;
368
+ max_rewind = at - (data + start + offset);
322
369
 
323
- data += offset;
324
- size -= offset;
370
+ found_at:
371
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
372
+ uint8_t c = data[start + offset + max_rewind - rewind - 1];
325
373
 
326
- at = (uint8_t *)memchr(data, '@', size);
327
- if (!at)
328
- return;
374
+ if (cmark_isalnum(c))
375
+ continue;
329
376
 
330
- max_rewind = (int)(at - data);
331
- data += max_rewind;
332
- size -= max_rewind;
377
+ if (strchr(".+-_", c) != NULL)
378
+ continue;
333
379
 
334
- for (rewind = 0; rewind < max_rewind; ++rewind) {
335
- uint8_t c = data[-rewind - 1];
380
+ if (strchr(":", c) != NULL) {
381
+ if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
382
+ auto_mailto = false;
383
+ continue;
384
+ }
385
+
386
+ if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
387
+ auto_mailto = false;
388
+ is_xmpp = true;
389
+ continue;
390
+ }
391
+ }
336
392
 
337
- if (cmark_isalnum(c))
338
- continue;
393
+ break;
394
+ }
339
395
 
340
- if (strchr(".+-_", c) != NULL)
396
+ if (rewind == 0) {
397
+ offset += max_rewind + 1;
341
398
  continue;
399
+ }
342
400
 
343
- if (strchr(":", c) != NULL) {
344
- if (validate_protocol("mailto:", data, rewind)) {
345
- auto_mailto = false;
401
+ assert(data[start + offset + max_rewind] == '@');
402
+ for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
403
+ uint8_t c = data[start + offset + max_rewind + link_end];
404
+
405
+ if (cmark_isalnum(c))
346
406
  continue;
347
- }
348
407
 
349
- if (validate_protocol("xmpp:", data, rewind)) {
350
- auto_mailto = false;
351
- is_xmpp = true;
408
+ if (c == '@') {
409
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
410
+ offset += max_rewind + 1;
411
+ max_rewind = link_end - 1;
412
+ goto found_at;
413
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
414
+ cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
415
+ np++;
416
+ else if (c == '/' && is_xmpp)
352
417
  continue;
353
- }
418
+ else if (c != '-' && c != '_')
419
+ break;
354
420
  }
355
421
 
356
- break;
357
- }
358
-
359
- if (rewind == 0 || ns > 0) {
360
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
361
- return;
362
- }
422
+ if (link_end < 2 || np == 0 ||
423
+ (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
424
+ data[start + offset + max_rewind + link_end - 1] != '.')) {
425
+ offset += max_rewind + link_end;
426
+ continue;
427
+ }
363
428
 
364
- for (link_end = 0; link_end < size; ++link_end) {
365
- uint8_t c = data[link_end];
429
+ link_end = autolink_delim(data + start + offset + max_rewind, link_end);
366
430
 
367
- if (cmark_isalnum(c))
431
+ if (link_end == 0) {
432
+ offset += max_rewind + 1;
368
433
  continue;
434
+ }
369
435
 
370
- if (c == '@')
371
- nb++;
372
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
373
- np++;
374
- else if (c == '/' && is_xmpp)
375
- continue;
376
- else if (c != '-' && c != '_')
377
- break;
378
- }
436
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
437
+ if (parser->options & CMARK_OPT_AUTOLINK_CLASS_NAME) {
438
+ cmark_node_set_syntax_extension(link_node, ext);
439
+ }
440
+ cmark_strbuf buf;
441
+ cmark_strbuf_init(parser->mem, &buf, 10);
442
+ if (auto_mailto)
443
+ cmark_strbuf_puts(&buf, "mailto:");
444
+ cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
445
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
446
+
447
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
448
+ cmark_chunk email = cmark_chunk_dup(
449
+ &detached_chunk,
450
+ (bufsize_t)(start + offset + max_rewind - rewind),
451
+ (bufsize_t)(link_end + rewind));
452
+ cmark_chunk_to_cstr(parser->mem, &email);
453
+ link_text->as.literal = email;
454
+ cmark_node_append_child(link_node, link_text);
379
455
 
380
- if (link_end < 2 || nb != 1 || np == 0 ||
381
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
382
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
383
- return;
384
- }
456
+ cmark_node_insert_after(text, link_node);
385
457
 
386
- link_end = autolink_delim(data, link_end);
458
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
459
+ post->as.literal = cmark_chunk_dup(&detached_chunk,
460
+ (bufsize_t)(start + offset + max_rewind + link_end),
461
+ (bufsize_t)(remaining - offset - max_rewind - link_end));
387
462
 
388
- if (link_end == 0) {
389
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1, ext);
390
- return;
391
- }
463
+ cmark_node_insert_after(link_node, post);
392
464
 
393
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
465
+ text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
466
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
394
467
 
395
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
396
- if (parser->options & CMARK_OPT_AUTOLINK_CLASS_NAME) {
397
- cmark_node_set_syntax_extension(link_node, ext);
468
+ text = post;
469
+ start += offset + max_rewind + link_end;
470
+ remaining -= offset + max_rewind + link_end;
471
+ offset = 0;
398
472
  }
399
- cmark_strbuf buf;
400
- cmark_strbuf_init(parser->mem, &buf, 10);
401
- if (auto_mailto)
402
- cmark_strbuf_puts(&buf, "mailto:");
403
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
404
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
405
-
406
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
407
- cmark_chunk email = cmark_chunk_dup(
408
- &text->as.literal,
409
- offset + max_rewind - rewind,
410
- (bufsize_t)(link_end + rewind));
411
- cmark_chunk_to_cstr(parser->mem, &email);
412
- link_text->as.literal = email;
413
- cmark_node_append_child(link_node, link_text);
414
473
 
415
- cmark_node_insert_after(text, link_node);
416
-
417
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
418
- post->as.literal = cmark_chunk_dup(&text->as.literal,
419
- (bufsize_t)(offset + max_rewind + link_end),
420
- (bufsize_t)(size - link_end));
421
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
422
-
423
- cmark_node_insert_after(link_node, post);
424
-
425
- text->as.literal.len = offset + max_rewind - rewind;
426
- text->as.literal.data[text->as.literal.len] = 0;
474
+ // Convert the reference to allocated memory.
475
+ assert(!text->as.literal.alloc);
476
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
427
477
 
428
- postprocess_text(parser, post, 0, depth + 1, ext);
478
+ // Free the detached buffer.
479
+ cmark_chunk_free(parser->mem, &detached_chunk);
429
480
  }
430
-
431
481
  static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
432
482
  cmark_iter *iter;
433
483
  cmark_event_type ev;
@@ -452,7 +502,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
452
502
  }
453
503
 
454
504
  if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
455
- postprocess_text(parser, node, 0, /*depth*/ 0, ext);
505
+ postprocess_text(parser, node, ext);
456
506
  }
457
507
  }
458
508
 
@@ -8,6 +8,7 @@
8
8
  #include <stdlib.h>
9
9
  #include <assert.h>
10
10
  #include <stdio.h>
11
+ #include <limits.h>
11
12
 
12
13
  #include "cmark_ctype.h"
13
14
  #include "syntax_extension.h"
@@ -27,6 +28,14 @@
27
28
  #define CODE_INDENT 4
28
29
  #define TAB_STOP 4
29
30
 
31
+ /**
32
+ * Very deeply nested lists can cause quadratic performance issues.
33
+ * This constant is used in open_new_blocks() to limit the nesting
34
+ * depth. It is unlikely that a non-contrived markdown document will
35
+ * be nested this deeply.
36
+ */
37
+ #define MAX_LIST_DEPTH 100
38
+
30
39
  #ifndef MIN
31
40
  #define MIN(x, y) ((x < y) ? x : y)
32
41
  #endif
@@ -642,6 +651,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
642
651
  }
643
652
 
644
653
  finalize(parser, parser->root);
654
+
655
+ // Limit total size of extra content created from reference links to
656
+ // document size to avoid superlinear growth. Always allow 100KB.
657
+ if (parser->total_size > 100000)
658
+ parser->refmap->max_ref_size = parser->total_size;
659
+ else
660
+ parser->refmap->max_ref_size = 100000;
661
+
645
662
  process_inlines(parser, parser->refmap, parser->options);
646
663
  if (parser->options & CMARK_OPT_FOOTNOTES)
647
664
  process_footnotes(parser);
@@ -701,6 +718,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
701
718
  const unsigned char *end = buffer + len;
702
719
  static const uint8_t repl[] = {239, 191, 189};
703
720
 
721
+ if (len > UINT_MAX - parser->total_size)
722
+ parser->total_size = UINT_MAX;
723
+ else
724
+ parser->total_size += len;
725
+
704
726
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
705
727
  // skip NL if last buffer ended with CR ; see #117
706
728
  buffer++;
@@ -1108,10 +1130,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1108
1130
  bool has_content;
1109
1131
  int save_offset;
1110
1132
  int save_column;
1133
+ size_t depth = 0;
1111
1134
 
1112
1135
  while (cont_type != CMARK_NODE_CODE_BLOCK &&
1113
1136
  cont_type != CMARK_NODE_HTML_BLOCK) {
1114
-
1137
+ depth++;
1115
1138
  S_find_first_nonspace(parser, input);
1116
1139
  indented = parser->indent >= CODE_INDENT;
1117
1140
 
@@ -1213,6 +1236,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1213
1236
  (*container)->internal_offset = matched;
1214
1237
  } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1215
1238
  parser->indent < 4 &&
1239
+ depth < MAX_LIST_DEPTH &&
1216
1240
  (matched = parse_list_marker(
1217
1241
  parser->mem, input, parser->first_nonspace,
1218
1242
  (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
@@ -6,45 +6,45 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm-extension_api.h"
9
- #include "cmark-gfm-extensions_export.h"
10
- #include "config.h" // for bool
9
+ #include "cmark-gfm_export.h"
10
+ #include <stdbool.h>
11
11
  #include <stdint.h>
12
12
 
13
- CMARK_GFM_EXTENSIONS_EXPORT
13
+ CMARK_GFM_EXPORT
14
14
  void cmark_gfm_core_extensions_ensure_registered(void);
15
15
 
16
- CMARK_GFM_EXTENSIONS_EXPORT
16
+ CMARK_GFM_EXPORT
17
17
  uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
18
18
 
19
19
  /** Sets the number of columns for the table, returning 1 on success and 0 on error.
20
20
  */
21
- CMARK_GFM_EXTENSIONS_EXPORT
21
+ CMARK_GFM_EXPORT
22
22
  int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
23
23
 
24
- CMARK_GFM_EXTENSIONS_EXPORT
24
+ CMARK_GFM_EXPORT
25
25
  uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
26
26
 
27
27
  /** Sets the alignments for the table, returning 1 on success and 0 on error.
28
28
  */
29
- CMARK_GFM_EXTENSIONS_EXPORT
29
+ CMARK_GFM_EXPORT
30
30
  int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
31
31
 
32
- CMARK_GFM_EXTENSIONS_EXPORT
32
+ CMARK_GFM_EXPORT
33
33
  int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
34
34
 
35
35
  /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
36
36
  */
37
- CMARK_GFM_EXTENSIONS_EXPORT
37
+ CMARK_GFM_EXPORT
38
38
  int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
39
39
 
40
- CMARK_GFM_EXTENSIONS_EXPORT
40
+ CMARK_GFM_EXPORT
41
41
  bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
42
42
  /* For backwards compatibility */
43
43
  #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
44
44
 
45
45
  /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
46
46
  */
47
- CMARK_GFM_EXTENSIONS_EXPORT
47
+ CMARK_GFM_EXPORT
48
48
  int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
49
49
 
50
50
  #ifdef __cplusplus
@@ -114,6 +114,7 @@ typedef struct delimiter {
114
114
  struct delimiter *previous;
115
115
  struct delimiter *next;
116
116
  cmark_node *inl_text;
117
+ bufsize_t position;
117
118
  bufsize_t length;
118
119
  unsigned char delim_char;
119
120
  int can_open;