markly 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/Makefile +270 -0
  5. data/ext/markly/arena.c +9 -8
  6. data/ext/markly/arena.o +0 -0
  7. data/ext/markly/autolink.c +217 -134
  8. data/ext/markly/autolink.o +0 -0
  9. data/ext/markly/blocks.c +27 -2
  10. data/ext/markly/blocks.o +0 -0
  11. data/ext/markly/buffer.o +0 -0
  12. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  13. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  14. data/ext/markly/cmark-gfm.h +18 -2
  15. data/ext/markly/cmark.c +3 -3
  16. data/ext/markly/cmark.o +0 -0
  17. data/ext/markly/cmark_ctype.o +0 -0
  18. data/ext/markly/commonmark.c +19 -34
  19. data/ext/markly/commonmark.o +0 -0
  20. data/ext/markly/core-extensions.o +0 -0
  21. data/ext/markly/ext_scanners.o +0 -0
  22. data/ext/markly/extconf.rb +8 -1
  23. data/ext/markly/footnotes.o +0 -0
  24. data/ext/markly/houdini_href_e.o +0 -0
  25. data/ext/markly/houdini_html_e.o +0 -0
  26. data/ext/markly/houdini_html_u.o +0 -0
  27. data/ext/markly/html.c +22 -6
  28. data/ext/markly/html.o +0 -0
  29. data/ext/markly/inlines.c +148 -51
  30. data/ext/markly/inlines.o +0 -0
  31. data/ext/markly/iterator.o +0 -0
  32. data/ext/markly/latex.c +6 -4
  33. data/ext/markly/latex.o +0 -0
  34. data/ext/markly/linked_list.o +0 -0
  35. data/ext/markly/man.c +7 -11
  36. data/ext/markly/man.o +0 -0
  37. data/ext/markly/map.c +11 -4
  38. data/ext/markly/map.h +5 -2
  39. data/ext/markly/map.o +0 -0
  40. data/ext/markly/markly.bundle +0 -0
  41. data/ext/markly/markly.c +582 -586
  42. data/ext/markly/markly.h +1 -1
  43. data/ext/markly/markly.o +0 -0
  44. data/ext/markly/node.c +76 -10
  45. data/ext/markly/node.h +42 -1
  46. data/ext/markly/node.o +0 -0
  47. data/ext/markly/parser.h +1 -0
  48. data/ext/markly/plaintext.c +12 -29
  49. data/ext/markly/plaintext.o +0 -0
  50. data/ext/markly/plugin.o +0 -0
  51. data/ext/markly/references.c +1 -0
  52. data/ext/markly/references.o +0 -0
  53. data/ext/markly/registry.o +0 -0
  54. data/ext/markly/render.c +15 -7
  55. data/ext/markly/render.o +0 -0
  56. data/ext/markly/scanners.c +13916 -10380
  57. data/ext/markly/scanners.h +8 -0
  58. data/ext/markly/scanners.o +0 -0
  59. data/ext/markly/scanners.re +47 -8
  60. data/ext/markly/strikethrough.c +1 -1
  61. data/ext/markly/strikethrough.o +0 -0
  62. data/ext/markly/syntax_extension.o +0 -0
  63. data/ext/markly/table.c +81 -31
  64. data/ext/markly/table.o +0 -0
  65. data/ext/markly/tagfilter.o +0 -0
  66. data/ext/markly/tasklist.o +0 -0
  67. data/ext/markly/utf8.o +0 -0
  68. data/ext/markly/xml.c +2 -1
  69. data/ext/markly/xml.o +0 -0
  70. data/lib/markly/flags.rb +16 -0
  71. data/lib/markly/node/inspect.rb +59 -53
  72. data/lib/markly/node.rb +125 -58
  73. data/lib/markly/renderer/generic.rb +129 -124
  74. data/lib/markly/renderer/html.rb +294 -275
  75. data/lib/markly/version.rb +7 -1
  76. data/lib/markly.rb +36 -30
  77. data/license.md +39 -0
  78. data/readme.md +36 -0
  79. data.tar.gz.sig +0 -0
  80. metadata +98 -29
  81. metadata.gz.sig +0 -0
  82. data/bin/markly +0 -94
  83. data/lib/markly/markly.bundle +0 -0
@@ -2,6 +2,7 @@
2
2
  #include <parser.h>
3
3
  #include <string.h>
4
4
  #include <utf8.h>
5
+ #include <stddef.h>
5
6
 
6
7
  #if defined(_WIN32)
7
8
  #define strncasecmp _strnicmp
@@ -35,44 +36,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
35
36
  }
36
37
 
37
38
  static size_t autolink_delim(uint8_t *data, size_t link_end) {
38
- uint8_t cclose, copen;
39
39
  size_t i;
40
+ size_t closing = 0;
41
+ size_t opening = 0;
40
42
 
41
- for (i = 0; i < link_end; ++i)
42
- if (data[i] == '<') {
43
+ for (i = 0; i < link_end; ++i) {
44
+ const uint8_t c = data[i];
45
+ if (c == '<') {
43
46
  link_end = i;
44
47
  break;
48
+ } else if (c == '(') {
49
+ opening++;
50
+ } else if (c == ')') {
51
+ closing++;
45
52
  }
53
+ }
46
54
 
47
55
  while (link_end > 0) {
48
- cclose = data[link_end - 1];
49
-
50
- switch (cclose) {
56
+ switch (data[link_end - 1]) {
51
57
  case ')':
52
- copen = '(';
53
- break;
54
- default:
55
- copen = 0;
56
- }
57
-
58
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
59
- link_end--;
60
-
61
- else if (data[link_end - 1] == ';') {
62
- size_t new_end = link_end - 2;
63
-
64
- while (new_end > 0 && cmark_isalpha(data[new_end]))
65
- new_end--;
66
-
67
- if (new_end < link_end - 2 && data[new_end] == '&')
68
- link_end = new_end;
69
- else
70
- link_end--;
71
- } else if (copen != 0) {
72
- size_t closing = 0;
73
- size_t opening = 0;
74
- i = 0;
75
-
76
58
  /* Allow any number of matching brackets (as recognised in copen/cclose)
77
59
  * at the end of the URL. If there is a greater number of closing
78
60
  * brackets than opening ones, we remove one character from the end of
@@ -80,34 +62,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
80
62
  *
81
63
  * Examples (input text => output linked portion):
82
64
  *
83
- * http://www.pokemon.com/Pikachu_(Electric)
84
- * => http://www.pokemon.com/Pikachu_(Electric)
65
+ * http://www.pokemon.com/Pikachu_(Electric)
66
+ * => http://www.pokemon.com/Pikachu_(Electric)
85
67
  *
86
- * http://www.pokemon.com/Pikachu_((Electric)
87
- * => http://www.pokemon.com/Pikachu_((Electric)
68
+ * http://www.pokemon.com/Pikachu_((Electric)
69
+ * => http://www.pokemon.com/Pikachu_((Electric)
88
70
  *
89
- * http://www.pokemon.com/Pikachu_(Electric))
90
- * => http://www.pokemon.com/Pikachu_(Electric)
71
+ * http://www.pokemon.com/Pikachu_(Electric))
72
+ * => http://www.pokemon.com/Pikachu_(Electric)
91
73
  *
92
- * http://www.pokemon.com/Pikachu_((Electric))
93
- * => http://www.pokemon.com/Pikachu_((Electric))
74
+ * http://www.pokemon.com/Pikachu_((Electric))
75
+ * => http://www.pokemon.com/Pikachu_((Electric))
94
76
  */
95
-
96
- while (i < link_end) {
97
- if (data[i] == copen)
98
- opening++;
99
- else if (data[i] == cclose)
100
- closing++;
101
-
102
- i++;
77
+ if (closing <= opening) {
78
+ return link_end;
103
79
  }
80
+ closing--;
81
+ link_end--;
82
+ break;
83
+ case '?':
84
+ case '!':
85
+ case '.':
86
+ case ',':
87
+ case ':':
88
+ case '*':
89
+ case '_':
90
+ case '~':
91
+ case '\'':
92
+ case '"':
93
+ link_end--;
94
+ break;
95
+ case ';': {
96
+ size_t new_end = link_end - 2;
104
97
 
105
- if (closing <= opening)
106
- break;
98
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
99
+ new_end--;
107
100
 
108
- link_end--;
109
- } else
101
+ if (new_end < link_end - 2 && data[new_end] == '&')
102
+ link_end = new_end;
103
+ else
104
+ link_end--;
110
105
  break;
106
+ }
107
+
108
+ default:
109
+ return link_end;
110
+ }
111
111
  }
112
112
 
113
113
  return link_end;
@@ -116,7 +116,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
116
116
  static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
117
117
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;
118
118
 
119
+ /* The purpose of this code is to reject urls that contain an underscore
120
+ * in one of the last two segments. Examples:
121
+ *
122
+ * www.xxx.yyy.zzz autolinked
123
+ * www.xxx.yyy._zzz not autolinked
124
+ * www.xxx._yyy.zzz not autolinked
125
+ * www._xxx.yyy.zzz autolinked
126
+ *
127
+ * The reason is that domain names are allowed to include underscores,
128
+ * but host names are not. See: https://stackoverflow.com/a/2183140
129
+ */
119
130
  for (i = 1; i < size - 1; i++) {
131
+ if (data[i] == '\\' && i < size - 2)
132
+ i++;
120
133
  if (data[i] == '_')
121
134
  uscore2++;
122
135
  else if (data[i] == '.') {
@@ -127,8 +140,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
127
140
  break;
128
141
  }
129
142
 
130
- if (uscore1 > 0 || uscore2 > 0)
131
- return 0;
143
+ if (uscore1 > 0 || uscore2 > 0) {
144
+ /* If the url is very long then accept it despite the underscores,
145
+ * to avoid quadratic behavior causing a denial of service. See:
146
+ * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
147
+ * Reasonable urls are unlikely to have more than 10 segments, so
148
+ * this extra condition shouldn't have any impact on normal usage.
149
+ */
150
+ if (np <= 10) {
151
+ return 0;
152
+ }
153
+ }
132
154
 
133
155
  if (allow_short) {
134
156
  /* We don't need a valid domain in the strict sense (with
@@ -165,7 +187,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
165
187
  if (link_end == 0)
166
188
  return NULL;
167
189
 
168
- while (link_end < size && !cmark_isspace(data[link_end]))
190
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
169
191
  link_end++;
170
192
 
171
193
  link_end = autolink_delim(data, link_end);
@@ -225,7 +247,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
225
247
  return 0;
226
248
 
227
249
  link_end += domain_len;
228
- while (link_end < size && !cmark_isspace(data[link_end]))
250
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
229
251
  link_end++;
230
252
 
231
253
  link_end = autolink_delim(data, link_end);
@@ -245,6 +267,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
245
267
  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
246
268
  text->as.literal = url;
247
269
  cmark_node_append_child(node, text);
270
+
271
+ node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
272
+
273
+ node->start_column = text->start_column = max_rewind - rewind;
274
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
248
275
 
249
276
  return node;
250
277
  }
@@ -269,111 +296,167 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
269
296
  // inline was finished in inlines.c.
270
297
  }
271
298
 
272
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
273
- // postprocess_text can recurse very deeply if there is a very long line of
274
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
275
- if (depth > 1000) return;
299
+ static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
300
+ size_t len = strlen(protocol);
276
301
 
277
- size_t link_end;
278
- uint8_t *data = text->as.literal.data,
279
- *at;
280
- size_t size = text->as.literal.len;
281
- int rewind, max_rewind,
282
- nb = 0, np = 0, ns = 0;
302
+ if (len > (max_rewind - rewind)) {
303
+ return false;
304
+ }
283
305
 
284
- if (offset < 0 || (size_t)offset >= size)
285
- return;
306
+ // Check that the protocol matches
307
+ if (memcmp(data - rewind - len, protocol, len) != 0) {
308
+ return false;
309
+ }
286
310
 
287
- data += offset;
288
- size -= offset;
311
+ if (len == (max_rewind - rewind)) {
312
+ return true;
313
+ }
289
314
 
290
- at = (uint8_t *)memchr(data, '@', size);
291
- if (!at)
292
- return;
315
+ char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
293
316
 
294
- max_rewind = (int)(at - data);
295
- data += max_rewind;
296
- size -= max_rewind;
317
+ // Make sure the character before the protocol is non-alphanumeric
318
+ return !cmark_isalnum(prev_char);
319
+ }
297
320
 
298
- for (rewind = 0; rewind < max_rewind; ++rewind) {
299
- uint8_t c = data[-rewind - 1];
321
+ static void postprocess_text(cmark_parser *parser, cmark_node *text) {
322
+ size_t start = 0;
323
+ size_t offset = 0;
324
+ // `text` is going to be split into a list of nodes containing shorter segments
325
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
326
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
327
+ // the references into allocated buffers. The detached buffer is freed before we
328
+ // return.
329
+ cmark_chunk detached_chunk = text->as.literal;
330
+ text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
331
+
332
+ uint8_t *data = text->as.literal.data;
333
+ size_t remaining = text->as.literal.len;
334
+
335
+ while (true) {
336
+ size_t link_end;
337
+ uint8_t *at;
338
+ bool auto_mailto = true;
339
+ bool is_xmpp = false;
340
+ size_t rewind;
341
+ size_t max_rewind;
342
+ size_t np = 0;
343
+
344
+ if (offset >= remaining)
345
+ break;
300
346
 
301
- if (cmark_isalnum(c))
302
- continue;
347
+ at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
348
+ if (!at)
349
+ break;
303
350
 
304
- if (strchr(".+-_", c) != NULL)
305
- continue;
351
+ max_rewind = at - (data + start + offset);
306
352
 
307
- if (c == '/')
308
- ns++;
353
+ found_at:
354
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
355
+ uint8_t c = data[start + offset + max_rewind - rewind - 1];
309
356
 
310
- break;
311
- }
357
+ if (cmark_isalnum(c))
358
+ continue;
312
359
 
313
- if (rewind == 0 || ns > 0) {
314
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
315
- return;
316
- }
360
+ if (strchr(".+-_", c) != NULL)
361
+ continue;
317
362
 
318
- for (link_end = 0; link_end < size; ++link_end) {
319
- uint8_t c = data[link_end];
363
+ if (strchr(":", c) != NULL) {
364
+ if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
365
+ auto_mailto = false;
366
+ continue;
367
+ }
320
368
 
321
- if (cmark_isalnum(c))
322
- continue;
369
+ if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
370
+ auto_mailto = false;
371
+ is_xmpp = true;
372
+ continue;
373
+ }
374
+ }
323
375
 
324
- if (c == '@')
325
- nb++;
326
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
327
- np++;
328
- else if (c != '-' && c != '_')
329
376
  break;
330
- }
377
+ }
331
378
 
332
- if (link_end < 2 || nb != 1 || np == 0 ||
333
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
334
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
335
- return;
336
- }
379
+ if (rewind == 0) {
380
+ offset += max_rewind + 1;
381
+ continue;
382
+ }
337
383
 
338
- link_end = autolink_delim(data, link_end);
384
+ assert(data[start + offset + max_rewind] == '@');
385
+ for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
386
+ uint8_t c = data[start + offset + max_rewind + link_end];
387
+
388
+ if (cmark_isalnum(c))
389
+ continue;
390
+
391
+ if (c == '@') {
392
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
393
+ offset += max_rewind + 1;
394
+ max_rewind = link_end - 1;
395
+ goto found_at;
396
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
397
+ cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
398
+ np++;
399
+ else if (c == '/' && is_xmpp)
400
+ continue;
401
+ else if (c != '-' && c != '_')
402
+ break;
403
+ }
339
404
 
340
- if (link_end == 0) {
341
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
342
- return;
343
- }
405
+ if (link_end < 2 || np == 0 ||
406
+ (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
407
+ data[start + offset + max_rewind + link_end - 1] != '.')) {
408
+ offset += max_rewind + link_end;
409
+ continue;
410
+ }
344
411
 
345
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
412
+ link_end = autolink_delim(data + start + offset + max_rewind, link_end);
346
413
 
347
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
348
- cmark_strbuf buf;
349
- cmark_strbuf_init(parser->mem, &buf, 10);
350
- cmark_strbuf_puts(&buf, "mailto:");
351
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
352
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
353
-
354
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
355
- cmark_chunk email = cmark_chunk_dup(
356
- &text->as.literal,
357
- offset + max_rewind - rewind,
414
+ if (link_end == 0) {
415
+ offset += max_rewind + 1;
416
+ continue;
417
+ }
418
+
419
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
420
+ cmark_strbuf buf;
421
+ cmark_strbuf_init(parser->mem, &buf, 10);
422
+ if (auto_mailto)
423
+ cmark_strbuf_puts(&buf, "mailto:");
424
+ cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
425
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
426
+
427
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
428
+ cmark_chunk email = cmark_chunk_dup(
429
+ &detached_chunk,
430
+ (bufsize_t)(start + offset + max_rewind - rewind),
358
431
  (bufsize_t)(link_end + rewind));
359
- cmark_chunk_to_cstr(parser->mem, &email);
360
- link_text->as.literal = email;
361
- cmark_node_append_child(link_node, link_text);
432
+ cmark_chunk_to_cstr(parser->mem, &email);
433
+ link_text->as.literal = email;
434
+ cmark_node_append_child(link_node, link_text);
362
435
 
363
- cmark_node_insert_after(text, link_node);
436
+ cmark_node_insert_after(text, link_node);
364
437
 
365
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
366
- post->as.literal = cmark_chunk_dup(&text->as.literal,
367
- (bufsize_t)(offset + max_rewind + link_end),
368
- (bufsize_t)(size - link_end));
369
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
438
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
439
+ post->as.literal = cmark_chunk_dup(&detached_chunk,
440
+ (bufsize_t)(start + offset + max_rewind + link_end),
441
+ (bufsize_t)(remaining - offset - max_rewind - link_end));
370
442
 
371
- cmark_node_insert_after(link_node, post);
443
+ cmark_node_insert_after(link_node, post);
372
444
 
373
- text->as.literal.len = offset + max_rewind - rewind;
374
- text->as.literal.data[text->as.literal.len] = 0;
445
+ text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
446
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
447
+
448
+ text = post;
449
+ start += offset + max_rewind + link_end;
450
+ remaining -= offset + max_rewind + link_end;
451
+ offset = 0;
452
+ }
453
+
454
+ // Convert the reference to allocated memory.
455
+ assert(!text->as.literal.alloc);
456
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
375
457
 
376
- postprocess_text(parser, post, 0, depth + 1);
458
+ // Free the detached buffer.
459
+ cmark_chunk_free(parser->mem, &detached_chunk);
377
460
  }
378
461
 
379
462
  static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
@@ -400,7 +483,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
400
483
  }
401
484
 
402
485
  if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
403
- postprocess_text(parser, node, 0, /*depth*/0);
486
+ postprocess_text(parser, node);
404
487
  }
405
488
  }
406
489
 
Binary file
data/ext/markly/blocks.c CHANGED
@@ -8,6 +8,7 @@
8
8
  #include <stdlib.h>
9
9
  #include <assert.h>
10
10
  #include <stdio.h>
11
+ #include <limits.h>
11
12
 
12
13
  #include "cmark_ctype.h"
13
14
  #include "syntax_extension.h"
@@ -26,6 +27,14 @@
26
27
  #define CODE_INDENT 4
27
28
  #define TAB_STOP 4
28
29
 
30
+ /**
31
+ * Very deeply nested lists can cause quadratic performance issues.
32
+ * This constant is used in open_new_blocks() to limit the nesting
33
+ * depth. It is unlikely that a non-contrived markdown document will
34
+ * be nested this deeply.
35
+ */
36
+ #define MAX_LIST_DEPTH 100
37
+
29
38
  #ifndef MIN
30
39
  #define MIN(x, y) ((x < y) ? x : y)
31
40
  #endif
@@ -639,6 +648,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
639
648
  }
640
649
 
641
650
  finalize(parser, parser->root);
651
+
652
+ // Limit total size of extra content created from reference links to
653
+ // document size to avoid superlinear growth. Always allow 100KB.
654
+ if (parser->total_size > 100000)
655
+ parser->refmap->max_ref_size = parser->total_size;
656
+ else
657
+ parser->refmap->max_ref_size = 100000;
658
+
642
659
  process_inlines(parser, parser->refmap, parser->options);
643
660
  if (parser->options & CMARK_OPT_FOOTNOTES)
644
661
  process_footnotes(parser);
@@ -698,6 +715,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
698
715
  const unsigned char *end = buffer + len;
699
716
  static const uint8_t repl[] = {239, 191, 189};
700
717
 
718
+ if (len > UINT_MAX - parser->total_size)
719
+ parser->total_size = UINT_MAX;
720
+ else
721
+ parser->total_size += len;
722
+
701
723
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
702
724
  // skip NL if last buffer ended with CR ; see #117
703
725
  buffer++;
@@ -1105,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1105
1127
  bool has_content;
1106
1128
  int save_offset;
1107
1129
  int save_column;
1130
+ size_t depth = 0;
1108
1131
 
1109
1132
  while (cont_type != CMARK_NODE_CODE_BLOCK &&
1110
1133
  cont_type != CMARK_NODE_HTML_BLOCK) {
1111
-
1134
+ depth++;
1112
1135
  S_find_first_nonspace(parser, input);
1113
1136
  indented = parser->indent >= CODE_INDENT;
1114
1137
 
@@ -1197,12 +1220,13 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1197
1220
  parser->options & CMARK_OPT_FOOTNOTES &&
1198
1221
  (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1199
1222
  cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1200
- cmark_chunk_to_cstr(parser->mem, &c);
1201
1223
 
1202
1224
  while (c.data[c.len - 1] != ']')
1203
1225
  --c.len;
1204
1226
  --c.len;
1205
1227
 
1228
+ cmark_chunk_to_cstr(parser->mem, &c);
1229
+
1206
1230
  S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1207
1231
  *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1208
1232
  (*container)->as.literal = c;
@@ -1210,6 +1234,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1210
1234
  (*container)->internal_offset = matched;
1211
1235
  } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1212
1236
  parser->indent < 4 &&
1237
+ depth < MAX_LIST_DEPTH &&
1213
1238
  (matched = parse_list_marker(
1214
1239
  parser->mem, input, parser->first_nonspace,
1215
1240
  (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
Binary file
Binary file
@@ -6,45 +6,45 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm-extension_api.h"
9
- #include "cmark-gfm-extensions_export.h"
10
- #include "config.h" // for bool
9
+ #include "cmark-gfm_export.h"
10
+ #include <stdbool.h>
11
11
  #include <stdint.h>
12
12
 
13
- CMARK_GFM_EXTENSIONS_EXPORT
13
+ CMARK_GFM_EXPORT
14
14
  void cmark_gfm_core_extensions_ensure_registered(void);
15
15
 
16
- CMARK_GFM_EXTENSIONS_EXPORT
16
+ CMARK_GFM_EXPORT
17
17
  uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
18
18
 
19
19
  /** Sets the number of columns for the table, returning 1 on success and 0 on error.
20
20
  */
21
- CMARK_GFM_EXTENSIONS_EXPORT
21
+ CMARK_GFM_EXPORT
22
22
  int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
23
23
 
24
- CMARK_GFM_EXTENSIONS_EXPORT
24
+ CMARK_GFM_EXPORT
25
25
  uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
26
26
 
27
27
  /** Sets the alignments for the table, returning 1 on success and 0 on error.
28
28
  */
29
- CMARK_GFM_EXTENSIONS_EXPORT
29
+ CMARK_GFM_EXPORT
30
30
  int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
31
31
 
32
- CMARK_GFM_EXTENSIONS_EXPORT
32
+ CMARK_GFM_EXPORT
33
33
  int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
34
34
 
35
35
  /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
36
36
  */
37
- CMARK_GFM_EXTENSIONS_EXPORT
37
+ CMARK_GFM_EXPORT
38
38
  int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
39
39
 
40
- CMARK_GFM_EXTENSIONS_EXPORT
40
+ CMARK_GFM_EXPORT
41
41
  bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
42
42
  /* For backwards compatibility */
43
43
  #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
44
44
 
45
45
  /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
46
46
  */
47
- CMARK_GFM_EXTENSIONS_EXPORT
47
+ CMARK_GFM_EXPORT
48
48
  int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
49
49
 
50
50
  #ifdef __cplusplus
@@ -114,6 +114,7 @@ typedef struct delimiter {
114
114
  struct delimiter *previous;
115
115
  struct delimiter *next;
116
116
  cmark_node *inl_text;
117
+ bufsize_t position;
117
118
  bufsize_t length;
118
119
  unsigned char delim_char;
119
120
  int can_open;
@@ -111,13 +111,13 @@ typedef struct cmark_mem {
111
111
  * realloc and free.
112
112
  */
113
113
  CMARK_GFM_EXPORT
114
- cmark_mem *cmark_get_default_mem_allocator();
114
+ cmark_mem *cmark_get_default_mem_allocator(void);
115
115
 
116
116
  /** An arena allocator; uses system calloc to allocate large
117
117
  * slabs of memory. Memory in these slabs is not reused at all.
118
118
  */
119
119
  CMARK_GFM_EXPORT
120
- cmark_mem *cmark_get_arena_mem_allocator();
120
+ cmark_mem *cmark_get_arena_mem_allocator(void);
121
121
 
122
122
  /** Resets the arena allocator, quickly returning all used memory
123
123
  * to the operating system.
@@ -225,6 +225,11 @@ CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
225
225
  */
226
226
  CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
227
227
 
228
+ /** Returns the footnote reference of 'node', or NULL if 'node' doesn't have a
229
+ * footnote reference.
230
+ */
231
+ CMARK_GFM_EXPORT cmark_node *cmark_node_parent_footnote_def(cmark_node *node);
232
+
228
233
  /**
229
234
  * ## Iterator
230
235
  *
@@ -408,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
408
413
  */
409
414
  CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
410
415
 
416
+ /**
417
+ * Returns item index of 'node'. This is only used when rendering output
418
+ * formats such as commonmark, which need to output the index. It is not
419
+ * required for formats such as html or latex.
420
+ */
421
+ CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
422
+
423
+ /** Sets item index of 'node'. Returns 1 on success, 0 on failure.
424
+ */
425
+ CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
426
+
411
427
  /** Returns the info string from a fenced code block.
412
428
  */
413
429
  CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);