commonmarker 0.17.13 → 0.23.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (74) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +94 -18
  3. data/Rakefile +24 -5
  4. data/bin/commonmarker +107 -47
  5. data/commonmarker.gemspec +18 -15
  6. data/ext/commonmarker/autolink.c +10 -6
  7. data/ext/commonmarker/blocks.c +102 -31
  8. data/ext/commonmarker/buffer.c +0 -1
  9. data/ext/commonmarker/chunk.h +0 -1
  10. data/ext/commonmarker/cmark-gfm-core-extensions.h +29 -0
  11. data/ext/commonmarker/cmark-gfm-extension_api.h +19 -2
  12. data/ext/commonmarker/cmark-gfm.h +19 -5
  13. data/ext/commonmarker/cmark-gfm_version.h +2 -2
  14. data/ext/commonmarker/commonmark.c +33 -12
  15. data/ext/commonmarker/commonmarker.c +209 -100
  16. data/ext/commonmarker/core-extensions.c +2 -0
  17. data/ext/commonmarker/ext_scanners.c +622 -684
  18. data/ext/commonmarker/ext_scanners.h +2 -0
  19. data/ext/commonmarker/extconf.rb +3 -1
  20. data/ext/commonmarker/footnotes.c +23 -0
  21. data/ext/commonmarker/footnotes.h +2 -0
  22. data/ext/commonmarker/houdini_href_e.c +1 -1
  23. data/ext/commonmarker/html.c +46 -25
  24. data/ext/commonmarker/inlines.c +127 -30
  25. data/ext/commonmarker/iterator.h +0 -1
  26. data/ext/commonmarker/map.h +0 -1
  27. data/ext/commonmarker/node.c +17 -3
  28. data/ext/commonmarker/node.h +9 -0
  29. data/ext/commonmarker/parser.h +2 -1
  30. data/ext/commonmarker/plaintext.c +22 -0
  31. data/ext/commonmarker/render.c +18 -15
  32. data/ext/commonmarker/render.h +0 -1
  33. data/ext/commonmarker/scanners.c +779 -953
  34. data/ext/commonmarker/scanners.h +0 -2
  35. data/ext/commonmarker/strikethrough.c +4 -1
  36. data/ext/commonmarker/syntax_extension.c +10 -0
  37. data/ext/commonmarker/syntax_extension.h +2 -0
  38. data/ext/commonmarker/table.c +178 -31
  39. data/ext/commonmarker/tasklist.c +156 -0
  40. data/ext/commonmarker/tasklist.h +8 -0
  41. data/ext/commonmarker/xml.c +9 -2
  42. data/lib/commonmarker/config.rb +41 -38
  43. data/lib/commonmarker/errors.rb +12 -0
  44. data/lib/commonmarker/node/inspect.rb +15 -17
  45. data/lib/commonmarker/node.rb +14 -2
  46. data/lib/commonmarker/renderer/html_renderer.rb +45 -36
  47. data/lib/commonmarker/renderer.rb +16 -10
  48. data/lib/commonmarker/version.rb +3 -1
  49. data/lib/commonmarker.rb +8 -7
  50. data/test/benchmark.rb +26 -21
  51. data/test/fixtures/strong.md +1 -0
  52. data/test/fixtures/table.md +10 -0
  53. data/test/test_attributes.rb +5 -3
  54. data/test/test_basics.rb +19 -0
  55. data/test/test_commands.rb +72 -0
  56. data/test/test_commonmark.rb +15 -13
  57. data/test/test_doc.rb +31 -29
  58. data/test/test_encoding.rb +9 -5
  59. data/test/test_extensions.rb +66 -73
  60. data/test/test_footnotes.rb +47 -12
  61. data/test/test_gc.rb +6 -2
  62. data/test/test_helper.rb +25 -15
  63. data/test/test_linebreaks.rb +2 -0
  64. data/test/test_maliciousness.rb +189 -190
  65. data/test/test_node.rb +12 -12
  66. data/test/test_options.rb +17 -15
  67. data/test/test_pathological_inputs.rb +14 -12
  68. data/test/test_plaintext.rb +23 -21
  69. data/test/test_renderer.rb +29 -10
  70. data/test/test_smartpunct.rb +7 -2
  71. data/test/test_spec.rb +7 -4
  72. data/test/test_tasklists.rb +43 -0
  73. data/test/test_xml.rb +107 -0
  74. metadata +74 -30
@@ -11,11 +11,13 @@ bufsize_t _scan_table_start(const unsigned char *p);
11
11
  bufsize_t _scan_table_cell(const unsigned char *p);
12
12
  bufsize_t _scan_table_cell_end(const unsigned char *p);
13
13
  bufsize_t _scan_table_row_end(const unsigned char *p);
14
+ bufsize_t _scan_tasklist(const unsigned char *p);
14
15
 
15
16
  #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
16
17
  #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
17
18
  #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
18
19
  #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
20
+ #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
19
21
 
20
22
  #ifdef __cplusplus
21
23
  }
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
- $CFLAGS << " -std=c99"
5
+ $CFLAGS << ' -std=c99'
4
6
 
5
7
  create_makefile('commonmarker/commonmarker')
@@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
38
38
  cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
39
  return cmark_map_new(mem, footnote_free);
40
40
  }
41
+
42
+ // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
+ // unlink all of the footnote nodes before freeing their memory.
44
+ //
45
+ // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
+ // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
+ // etc, can lead to a use-after-free error.
48
+ //
49
+ // Better to `unlink` every footnote node first, setting their next, prev, and
50
+ // parent pointers to NULL, and only then walk thru & free them up.
51
+ void cmark_unlink_footnotes_map(cmark_map *map) {
52
+ cmark_map_entry *ref;
53
+ cmark_map_entry *next;
54
+
55
+ ref = map->refs;
56
+ while(ref) {
57
+ next = ref->next;
58
+ if (((cmark_footnote *)ref)->node) {
59
+ cmark_node_unlink(((cmark_footnote *)ref)->node);
60
+ }
61
+ ref = next;
62
+ }
63
+ }
@@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote;
18
18
  void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
19
  cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
20
 
21
+ void cmark_unlink_footnotes_map(cmark_map *map);
22
+
21
23
  #ifdef __cplusplus
22
24
  }
23
25
  #endif
@@ -15,7 +15,7 @@
15
15
  * - The characters which are *not* safe to be in
16
16
  * an URL because they are RESERVED characters.
17
17
  *
18
- * We asume (lazily) that any RESERVED char that
18
+ * We assume (lazily) that any RESERVED char that
19
19
  * appears inside an URL is actually meant to
20
20
  * have its native function (i.e. as an URL
21
21
  * component/separator) and hence needs no escaping.
@@ -59,16 +59,30 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size
59
59
  cmark_strbuf_put(html, data, (bufsize_t)len);
60
60
  }
61
61
 
62
- static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
62
+ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
63
63
  if (renderer->written_footnote_ix >= renderer->footnote_ix)
64
64
  return false;
65
65
  renderer->written_footnote_ix = renderer->footnote_ix;
66
66
 
67
- cmark_strbuf_puts(html, "<a href=\"#fnref");
68
- char n[32];
69
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
70
- cmark_strbuf_puts(html, n);
71
- cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
67
+ cmark_strbuf_puts(html, "<a href=\"#fnref-");
68
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
69
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩</a>");
70
+
71
+ if (node->footnote.def_count > 1)
72
+ {
73
+ for(int i = 2; i <= node->footnote.def_count; i++) {
74
+ char n[32];
75
+ snprintf(n, sizeof(n), "%d", i);
76
+
77
+ cmark_strbuf_puts(html, " <a href=\"#fnref-");
78
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
79
+ cmark_strbuf_puts(html, "-");
80
+ cmark_strbuf_puts(html, n);
81
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩<sup class=\"footnote-ref\">");
82
+ cmark_strbuf_puts(html, n);
83
+ cmark_strbuf_puts(html, "</sup></a>");
84
+ }
85
+ }
72
86
 
73
87
  return true;
74
88
  }
@@ -227,7 +241,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
227
241
 
228
242
  case CMARK_NODE_HTML_BLOCK:
229
243
  cmark_html_render_cr(html);
230
- if (options & CMARK_OPT_SAFE) {
244
+ if (!(options & CMARK_OPT_UNSAFE)) {
231
245
  cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
232
246
  } else if (renderer->filter_extensions) {
233
247
  filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
@@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
273
287
  } else {
274
288
  if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
275
289
  cmark_strbuf_putc(html, ' ');
276
- S_put_footnote_backref(renderer, html);
290
+ S_put_footnote_backref(renderer, html, parent);
277
291
  }
278
292
  cmark_strbuf_puts(html, "</p>\n");
279
293
  }
@@ -305,7 +319,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
305
319
  break;
306
320
 
307
321
  case CMARK_NODE_HTML_INLINE:
308
- if (options & CMARK_OPT_SAFE) {
322
+ if (!(options & CMARK_OPT_UNSAFE)) {
309
323
  cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
310
324
  } else {
311
325
  filtered = false;
@@ -354,8 +368,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
354
368
  case CMARK_NODE_LINK:
355
369
  if (entering) {
356
370
  cmark_strbuf_puts(html, "<a href=\"");
357
- if (!((options & CMARK_OPT_SAFE) &&
358
- scan_dangerous_url(&node->as.link.url, 0))) {
371
+ if ((options & CMARK_OPT_UNSAFE) ||
372
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
359
373
  houdini_escape_href(html, node->as.link.url.data,
360
374
  node->as.link.url.len);
361
375
  }
@@ -372,8 +386,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
372
386
  case CMARK_NODE_IMAGE:
373
387
  if (entering) {
374
388
  cmark_strbuf_puts(html, "<img src=\"");
375
- if (!((options & CMARK_OPT_SAFE) &&
376
- scan_dangerous_url(&node->as.link.url, 0))) {
389
+ if ((options & CMARK_OPT_UNSAFE) ||
390
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
377
391
  houdini_escape_href(html, node->as.link.url.data,
378
392
  node->as.link.url.len);
379
393
  }
@@ -392,16 +406,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
392
406
  case CMARK_NODE_FOOTNOTE_DEFINITION:
393
407
  if (entering) {
394
408
  if (renderer->footnote_ix == 0) {
395
- cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
409
+ cmark_strbuf_puts(html, "<section class=\"footnotes\" data-footnotes>\n<ol>\n");
396
410
  }
397
411
  ++renderer->footnote_ix;
398
- cmark_strbuf_puts(html, "<li id=\"fn");
399
- char n[32];
400
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
401
- cmark_strbuf_puts(html, n);
412
+
413
+ cmark_strbuf_puts(html, "<li id=\"fn-");
414
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
402
415
  cmark_strbuf_puts(html, "\">\n");
403
416
  } else {
404
- if (S_put_footnote_backref(renderer, html)) {
417
+ if (S_put_footnote_backref(renderer, html, node)) {
405
418
  cmark_strbuf_putc(html, '\n');
406
419
  }
407
420
  cmark_strbuf_puts(html, "</li>\n");
@@ -410,12 +423,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
410
423
 
411
424
  case CMARK_NODE_FOOTNOTE_REFERENCE:
412
425
  if (entering) {
413
- cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
414
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
415
- cmark_strbuf_puts(html, "\" id=\"fnref");
416
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
417
- cmark_strbuf_puts(html, "\">");
418
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
426
+ cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn-");
427
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
428
+ cmark_strbuf_puts(html, "\" id=\"fnref-");
429
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
430
+
431
+ if (node->footnote.ref_ix > 1) {
432
+ char n[32];
433
+ snprintf(n, sizeof(n), "%d", node->footnote.ref_ix);
434
+ cmark_strbuf_puts(html, "-");
435
+ cmark_strbuf_puts(html, n);
436
+ }
437
+
438
+ cmark_strbuf_puts(html, "\" data-footnote-ref>");
439
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
419
440
  cmark_strbuf_puts(html, "</a></sup>");
420
441
  }
421
442
  break;
@@ -321,6 +321,43 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
321
321
  return 0;
322
322
  }
323
323
 
324
+ // Destructively modify string, converting newlines to
325
+ // spaces, then removing a single leading + trailing space,
326
+ // unless the code span consists entirely of space characters.
327
+ static void S_normalize_code(cmark_strbuf *s) {
328
+ bufsize_t r, w;
329
+ bool contains_nonspace = false;
330
+
331
+ for (r = 0, w = 0; r < s->size; ++r) {
332
+ switch (s->ptr[r]) {
333
+ case '\r':
334
+ if (s->ptr[r + 1] != '\n') {
335
+ s->ptr[w++] = ' ';
336
+ }
337
+ break;
338
+ case '\n':
339
+ s->ptr[w++] = ' ';
340
+ break;
341
+ default:
342
+ s->ptr[w++] = s->ptr[r];
343
+ }
344
+ if (s->ptr[r] != ' ') {
345
+ contains_nonspace = true;
346
+ }
347
+ }
348
+
349
+ // begins and ends with space?
350
+ if (contains_nonspace &&
351
+ s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
352
+ cmark_strbuf_drop(s, 1);
353
+ cmark_strbuf_truncate(s, w - 2);
354
+ } else {
355
+ cmark_strbuf_truncate(s, w);
356
+ }
357
+
358
+ }
359
+
360
+
324
361
  // Parse backtick code section or raw backticks, return an inline.
325
362
  // Assumes that the subject has a backtick at the current position.
326
363
  static cmark_node *handle_backticks(subject *subj, int options) {
@@ -336,8 +373,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
336
373
 
337
374
  cmark_strbuf_set(&buf, subj->input.data + startpos,
338
375
  endpos - startpos - openticks.len);
339
- cmark_strbuf_trim(&buf);
340
- cmark_strbuf_normalize_whitespace(&buf);
376
+ S_normalize_code(&buf);
341
377
 
342
378
  cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
343
379
  adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
@@ -345,6 +381,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
345
381
  }
346
382
  }
347
383
 
384
+
348
385
  // Scan ***, **, or * and return number scanned, or 0.
349
386
  // Advances position.
350
387
  static int scan_delims(subject *subj, unsigned char c, bool *can_open,
@@ -599,7 +636,6 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
599
636
  delimiter *opener;
600
637
  delimiter *old_closer;
601
638
  bool opener_found;
602
- bool odd_match;
603
639
  delimiter *openers_bottom[3][128];
604
640
  int i;
605
641
 
@@ -624,15 +660,14 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
624
660
  // Now look backwards for first matching opener:
625
661
  opener = closer->previous;
626
662
  opener_found = false;
627
- odd_match = false;
628
663
  while (opener != NULL && opener != stack_bottom &&
629
664
  opener != openers_bottom[closer->length % 3][closer->delim_char]) {
630
665
  if (opener->can_open && opener->delim_char == closer->delim_char) {
631
666
  // interior closer of size 2 can't match opener of size 1
632
667
  // or of size 1 can't match 2
633
- odd_match = (closer->can_open || opener->can_close) &&
634
- ((opener->length + closer->length) % 3 == 0);
635
- if (!odd_match) {
668
+ if (!(closer->can_open || opener->can_close) ||
669
+ closer->length % 3 == 0 ||
670
+ (opener->length + closer->length) % 3 != 0) {
636
671
  opener_found = true;
637
672
  break;
638
673
  }
@@ -729,9 +764,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
729
764
  }
730
765
  cmark_node_insert_after(opener_inl, emph);
731
766
 
732
- emph->start_line = emph->end_line = subj->line;
733
- emph->start_column = opener_inl->start_column + subj->column_offset;
734
- emph->end_column = closer_inl->end_column + subj->column_offset;
767
+ emph->start_line = opener_inl->start_line;
768
+ emph->end_line = closer_inl->end_line;
769
+ emph->start_column = opener_inl->start_column;
770
+ emph->end_column = closer_inl->end_column;
735
771
 
736
772
  // if opener has 0 characters, remove it and its associated inline
737
773
  if (opener_num_chars == 0) {
@@ -937,17 +973,21 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
937
973
  else if (input->data[i] == '(') {
938
974
  ++nb_p;
939
975
  ++i;
940
- if (nb_p > 32)
941
- return -1;
976
+ if (nb_p > 32)
977
+ return -1;
942
978
  } else if (input->data[i] == ')') {
943
979
  if (nb_p == 0)
944
980
  break;
945
981
  --nb_p;
946
982
  ++i;
947
- } else if (cmark_isspace(input->data[i]))
983
+ } else if (cmark_isspace(input->data[i])) {
984
+ if (i == offset) {
985
+ return -1;
986
+ }
948
987
  break;
949
- else
988
+ } else {
950
989
  ++i;
990
+ }
951
991
  }
952
992
 
953
993
  if (i >= input->len)
@@ -973,7 +1013,7 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
973
1013
  } else if (input->data[i] == '\\')
974
1014
  i += 2;
975
1015
  else if (input->data[i] == '\n' || input->data[i] == '<')
976
- return manual_scan_link_url_2(input, offset, output);
1016
+ return -1;
977
1017
  else
978
1018
  ++i;
979
1019
  }
@@ -1097,19 +1137,77 @@ noMatch:
1097
1137
  // What if we're a footnote link?
1098
1138
  if (parser->options & CMARK_OPT_FOOTNOTES &&
1099
1139
  opener->inl_text->next &&
1100
- opener->inl_text->next->type == CMARK_NODE_TEXT &&
1101
- !opener->inl_text->next->next) {
1140
+ opener->inl_text->next->type == CMARK_NODE_TEXT) {
1141
+
1102
1142
  cmark_chunk *literal = &opener->inl_text->next->as.literal;
1103
- if (literal->len > 1 && literal->data[0] == '^') {
1104
- inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1105
- inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
1106
- inl->start_line = inl->end_line = subj->line;
1107
- inl->start_column = opener->inl_text->start_column;
1108
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1109
- cmark_node_insert_before(opener->inl_text, inl);
1110
- cmark_node_free(opener->inl_text->next);
1111
- cmark_node_free(opener->inl_text);
1143
+
1144
+ // look back to the opening '[', and skip ahead to the next character
1145
+ // if we're looking at a '[^' sequence, and there is other text or nodes
1146
+ // after the ^, let's call it a footnote reference.
1147
+ if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1148
+
1149
+ // Before we got this far, the `handle_close_bracket` function may have
1150
+ // advanced the current state beyond our footnote's actual closing
1151
+ // bracket, ie if it went looking for a `link_label`.
1152
+ // Let's just rewind the subject's position:
1153
+ subj->pos = initial_pos;
1154
+
1155
+ cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1156
+
1157
+ // the start and end of the footnote ref is the opening and closing brace
1158
+ // i.e. the subject's current position, and the opener's start_column
1159
+ int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1160
+ int fnref_start_column = opener->inl_text->start_column;
1161
+
1162
+ // any given node delineates a substring of the line being processed,
1163
+ // with the remainder of the line being pointed to thru its 'literal'
1164
+ // struct member.
1165
+ // here, we copy the literal's pointer, moving it past the '^' character
1166
+ // for a length equal to the size of footnote reference text.
1167
+ // i.e. end_col minus start_col, minus the [ and the ^ characters
1168
+ //
1169
+ // this copies the footnote reference string, even if between the
1170
+ // `opener` and the subject's current position there are other nodes
1171
+ //
1172
+ // (first, check for underflows)
1173
+ if ((fnref_start_column + 2) <= fnref_end_column) {
1174
+ fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1175
+ } else {
1176
+ fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1177
+ }
1178
+
1179
+ fnref->start_line = fnref->end_line = subj->line;
1180
+ fnref->start_column = fnref_start_column;
1181
+ fnref->end_column = fnref_end_column;
1182
+
1183
+ // we then replace the opener with this new fnref node, the net effect
1184
+ // being replacing the opening '[' text node with a `^footnote-ref]` node.
1185
+ cmark_node_insert_before(opener->inl_text, fnref);
1186
+
1112
1187
  process_emphasis(parser, subj, opener->previous_delimiter);
1188
+ // sometimes, the footnote reference text gets parsed into multiple nodes
1189
+ // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1190
+ // this happens for ex with the autolink extension. when the autolinker
1191
+ // finds the 'w' character, it will split the text into multiple nodes
1192
+ // in hopes of being able to match a 'www.' substring.
1193
+ //
1194
+ // because this function is called one character at a time via the
1195
+ // `parse_inlines` function, and the current subj->pos is pointing at the
1196
+ // closing ] brace, and because we copy all the text between the [ ]
1197
+ // braces, we should be able to safely ignore and delete any nodes after
1198
+ // the opener->inl_text->next.
1199
+ //
1200
+ // therefore, here we walk thru the list and free them all up
1201
+ cmark_node *next_node;
1202
+ cmark_node *current_node = opener->inl_text->next;
1203
+ while(current_node) {
1204
+ next_node = current_node->next;
1205
+ cmark_node_free(current_node);
1206
+ current_node = next_node;
1207
+ }
1208
+
1209
+ cmark_node_free(opener->inl_text);
1210
+
1113
1211
  pop_bracket(subj);
1114
1212
  return NULL;
1115
1213
  }
@@ -1400,8 +1498,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1400
1498
 
1401
1499
  // parse link url:
1402
1500
  spnl(&subj);
1403
- if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
1404
- url.len > 0) {
1501
+ if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1405
1502
  subj.pos += matchlen;
1406
1503
  } else {
1407
1504
  return 0;
@@ -1410,7 +1507,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1410
1507
  // parse optional link_title
1411
1508
  beforetitle = subj.pos;
1412
1509
  spnl(&subj);
1413
- matchlen = scan_link_title(&subj.input, subj.pos);
1510
+ matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
1414
1511
  if (matchlen) {
1415
1512
  title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1416
1513
  subj.pos += matchlen;
@@ -1520,7 +1617,7 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
1520
1617
  }
1521
1618
  }
1522
1619
 
1523
- while (peek_char(parser) == c && numdelims <= max_delims) {
1620
+ while (peek_char(parser) == c && numdelims < max_delims) {
1524
1621
  numdelims++;
1525
1622
  advance(parser);
1526
1623
  }
@@ -6,7 +6,6 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm.h"
9
- #include "memory.h"
10
9
 
11
10
  typedef struct {
12
11
  cmark_event_type ev_type;
@@ -1,7 +1,6 @@
1
1
  #ifndef CMARK_MAP_H
2
2
  #define CMARK_MAP_H
3
3
 
4
- #include "memory.h"
5
4
  #include "chunk.h"
6
5
 
7
6
  #ifdef __cplusplus
@@ -69,10 +69,11 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) {
69
69
  return cmark_node_can_contain_type(node, (cmark_node_type) child->type);
70
70
  }
71
71
 
72
- cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
72
+ cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, cmark_mem *mem, cmark_syntax_extension *extension) {
73
73
  cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
74
74
  cmark_strbuf_init(mem, &node->content, 0);
75
75
  node->type = (uint16_t)type;
76
+ node->extension = extension;
76
77
 
77
78
  switch (node->type) {
78
79
  case CMARK_NODE_HEADING:
@@ -91,12 +92,25 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
91
92
  break;
92
93
  }
93
94
 
95
+ if (node->extension && node->extension->opaque_alloc_func) {
96
+ node->extension->opaque_alloc_func(node->extension, mem, node);
97
+ }
98
+
94
99
  return node;
95
100
  }
96
101
 
97
- cmark_node *cmark_node_new(cmark_node_type type) {
102
+ cmark_node *cmark_node_new_with_ext(cmark_node_type type, cmark_syntax_extension *extension) {
98
103
  extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
99
- return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR);
104
+ return cmark_node_new_with_mem_and_ext(type, &CMARK_DEFAULT_MEM_ALLOCATOR, extension);
105
+ }
106
+
107
+ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem)
108
+ {
109
+ return cmark_node_new_with_mem_and_ext(type, mem, NULL);
110
+ }
111
+
112
+ cmark_node *cmark_node_new(cmark_node_type type) {
113
+ return cmark_node_new_with_ext(type, NULL);
100
114
  }
101
115
 
102
116
  static void free_node_as(cmark_node *node) {
@@ -21,6 +21,7 @@ typedef struct {
21
21
  cmark_delim_type delimiter;
22
22
  unsigned char bullet_char;
23
23
  bool tight;
24
+ bool checked; // For task list extension
24
25
  } cmark_list;
25
26
 
26
27
  typedef struct {
@@ -50,6 +51,7 @@ typedef struct {
50
51
  enum cmark_node__internal_flags {
51
52
  CMARK_NODE__OPEN = (1 << 0),
52
53
  CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
54
+ CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
53
55
  };
54
56
 
55
57
  struct cmark_node {
@@ -74,6 +76,13 @@ struct cmark_node {
74
76
 
75
77
  cmark_syntax_extension *extension;
76
78
 
79
+ union {
80
+ int ref_ix;
81
+ int def_count;
82
+ } footnote;
83
+
84
+ cmark_node *parent_footnote_def;
85
+
77
86
  union {
78
87
  cmark_chunk literal;
79
88
  cmark_list list;
@@ -2,9 +2,9 @@
2
2
  #define CMARK_PARSER_H
3
3
 
4
4
  #include <stdio.h>
5
+ #include "references.h"
5
6
  #include "node.h"
6
7
  #include "buffer.h"
7
- #include "memory.h"
8
8
 
9
9
  #ifdef __cplusplus
10
10
  extern "C" {
@@ -30,6 +30,7 @@ struct cmark_parser {
30
30
  bufsize_t first_nonspace;
31
31
  /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */
32
32
  bufsize_t first_nonspace_column;
33
+ bufsize_t thematic_break_kill_pos;
33
34
  /* See the documentation for cmark_parser_get_indent() in cmark.h */
34
35
  int indent;
35
36
  /* See the documentation for cmark_parser_is_blank() in cmark.h */
@@ -191,6 +191,28 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
191
191
  case CMARK_NODE_IMAGE:
192
192
  break;
193
193
 
194
+ case CMARK_NODE_FOOTNOTE_REFERENCE:
195
+ if (entering) {
196
+ LIT("[^");
197
+ OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
198
+ LIT("]");
199
+ }
200
+ break;
201
+
202
+ case CMARK_NODE_FOOTNOTE_DEFINITION:
203
+ if (entering) {
204
+ renderer->footnote_ix += 1;
205
+ LIT("[^");
206
+ char n[32];
207
+ snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
208
+ OUT(n, false, LITERAL);
209
+ LIT("]: ");
210
+
211
+ cmark_strbuf_puts(renderer->prefix, " ");
212
+ } else {
213
+ cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
214
+ }
215
+ break;
194
216
  default:
195
217
  assert(false);
196
218
  break;
@@ -57,6 +57,7 @@ static void S_out(cmark_renderer *renderer, cmark_node *node,
57
57
  }
58
58
  }
59
59
  renderer->column = 0;
60
+ renderer->last_breakable = 0;
60
61
  renderer->begin_line = true;
61
62
  renderer->begin_content = true;
62
63
  renderer->need_cr -= 1;
@@ -97,21 +98,23 @@ static void S_out(cmark_renderer *renderer, cmark_node *node,
97
98
  }
98
99
  }
99
100
 
100
- } else if (c == 10) {
101
- cmark_strbuf_putc(renderer->buffer, '\n');
102
- renderer->column = 0;
103
- renderer->begin_line = true;
104
- renderer->begin_content = true;
105
- renderer->last_breakable = 0;
106
101
  } else if (escape == LITERAL) {
107
- cmark_render_code_point(renderer, c);
108
- renderer->begin_line = false;
109
- // we don't set 'begin_content' to false til we've
110
- // finished parsing a digit. Reason: in commonmark
111
- // we need to escape a potential list marker after
112
- // a digit:
113
- renderer->begin_content =
114
- renderer->begin_content && cmark_isdigit((char)c) == 1;
102
+ if (c == 10) {
103
+ cmark_strbuf_putc(renderer->buffer, '\n');
104
+ renderer->column = 0;
105
+ renderer->begin_line = true;
106
+ renderer->begin_content = true;
107
+ renderer->last_breakable = 0;
108
+ } else {
109
+ cmark_render_code_point(renderer, c);
110
+ renderer->begin_line = false;
111
+ // we don't set 'begin_content' to false til we've
112
+ // finished parsing a digit. Reason: in commonmark
113
+ // we need to escape a potential list marker after
114
+ // a digit:
115
+ renderer->begin_content =
116
+ renderer->begin_content && cmark_isdigit((char)c) == 1;
117
+ }
115
118
  } else {
116
119
  (renderer->outc)(renderer, node, escape, c, nextc);
117
120
  renderer->begin_line = false;
@@ -188,7 +191,7 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
188
191
  }
189
192
 
190
193
  // ensure final newline
191
- if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
194
+ if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
192
195
  cmark_strbuf_putc(renderer.buffer, '\n');
193
196
  }
194
197
 
@@ -8,7 +8,6 @@ extern "C" {
8
8
  #include <stdlib.h>
9
9
  #include "buffer.h"
10
10
  #include "chunk.h"
11
- #include "memory.h"
12
11
 
13
12
  typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
14
13