commonmarker 0.17.13 → 0.23.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/README.md +94 -18
- data/Rakefile +24 -5
- data/bin/commonmarker +107 -47
- data/commonmarker.gemspec +18 -15
- data/ext/commonmarker/autolink.c +10 -6
- data/ext/commonmarker/blocks.c +102 -31
- data/ext/commonmarker/buffer.c +0 -1
- data/ext/commonmarker/chunk.h +0 -1
- data/ext/commonmarker/cmark-gfm-core-extensions.h +29 -0
- data/ext/commonmarker/cmark-gfm-extension_api.h +19 -2
- data/ext/commonmarker/cmark-gfm.h +19 -5
- data/ext/commonmarker/cmark-gfm_version.h +2 -2
- data/ext/commonmarker/commonmark.c +33 -12
- data/ext/commonmarker/commonmarker.c +209 -100
- data/ext/commonmarker/core-extensions.c +2 -0
- data/ext/commonmarker/ext_scanners.c +622 -684
- data/ext/commonmarker/ext_scanners.h +2 -0
- data/ext/commonmarker/extconf.rb +3 -1
- data/ext/commonmarker/footnotes.c +23 -0
- data/ext/commonmarker/footnotes.h +2 -0
- data/ext/commonmarker/houdini_href_e.c +1 -1
- data/ext/commonmarker/html.c +46 -25
- data/ext/commonmarker/inlines.c +127 -30
- data/ext/commonmarker/iterator.h +0 -1
- data/ext/commonmarker/map.h +0 -1
- data/ext/commonmarker/node.c +17 -3
- data/ext/commonmarker/node.h +9 -0
- data/ext/commonmarker/parser.h +2 -1
- data/ext/commonmarker/plaintext.c +22 -0
- data/ext/commonmarker/render.c +18 -15
- data/ext/commonmarker/render.h +0 -1
- data/ext/commonmarker/scanners.c +779 -953
- data/ext/commonmarker/scanners.h +0 -2
- data/ext/commonmarker/strikethrough.c +4 -1
- data/ext/commonmarker/syntax_extension.c +10 -0
- data/ext/commonmarker/syntax_extension.h +2 -0
- data/ext/commonmarker/table.c +178 -31
- data/ext/commonmarker/tasklist.c +156 -0
- data/ext/commonmarker/tasklist.h +8 -0
- data/ext/commonmarker/xml.c +9 -2
- data/lib/commonmarker/config.rb +41 -38
- data/lib/commonmarker/errors.rb +12 -0
- data/lib/commonmarker/node/inspect.rb +15 -17
- data/lib/commonmarker/node.rb +14 -2
- data/lib/commonmarker/renderer/html_renderer.rb +45 -36
- data/lib/commonmarker/renderer.rb +16 -10
- data/lib/commonmarker/version.rb +3 -1
- data/lib/commonmarker.rb +8 -7
- data/test/benchmark.rb +26 -21
- data/test/fixtures/strong.md +1 -0
- data/test/fixtures/table.md +10 -0
- data/test/test_attributes.rb +5 -3
- data/test/test_basics.rb +19 -0
- data/test/test_commands.rb +72 -0
- data/test/test_commonmark.rb +15 -13
- data/test/test_doc.rb +31 -29
- data/test/test_encoding.rb +9 -5
- data/test/test_extensions.rb +66 -73
- data/test/test_footnotes.rb +47 -12
- data/test/test_gc.rb +6 -2
- data/test/test_helper.rb +25 -15
- data/test/test_linebreaks.rb +2 -0
- data/test/test_maliciousness.rb +189 -190
- data/test/test_node.rb +12 -12
- data/test/test_options.rb +17 -15
- data/test/test_pathological_inputs.rb +14 -12
- data/test/test_plaintext.rb +23 -21
- data/test/test_renderer.rb +29 -10
- data/test/test_smartpunct.rb +7 -2
- data/test/test_spec.rb +7 -4
- data/test/test_tasklists.rb +43 -0
- data/test/test_xml.rb +107 -0
- metadata +74 -30
@@ -11,11 +11,13 @@ bufsize_t _scan_table_start(const unsigned char *p);
|
|
11
11
|
bufsize_t _scan_table_cell(const unsigned char *p);
|
12
12
|
bufsize_t _scan_table_cell_end(const unsigned char *p);
|
13
13
|
bufsize_t _scan_table_row_end(const unsigned char *p);
|
14
|
+
bufsize_t _scan_tasklist(const unsigned char *p);
|
14
15
|
|
15
16
|
#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
|
16
17
|
#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
|
17
18
|
#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
|
18
19
|
#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
|
20
|
+
#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
|
19
21
|
|
20
22
|
#ifdef __cplusplus
|
21
23
|
}
|
data/ext/commonmarker/extconf.rb
CHANGED
@@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
|
|
38
38
|
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
|
39
39
|
return cmark_map_new(mem, footnote_free);
|
40
40
|
}
|
41
|
+
|
42
|
+
// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
|
43
|
+
// unlink all of the footnote nodes before freeing their memory.
|
44
|
+
//
|
45
|
+
// Sometimes, two (unused) footnote nodes can end up referencing each other,
|
46
|
+
// which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
|
47
|
+
// etc, can lead to a use-after-free error.
|
48
|
+
//
|
49
|
+
// Better to `unlink` every footnote node first, setting their next, prev, and
|
50
|
+
// parent pointers to NULL, and only then walk thru & free them up.
|
51
|
+
void cmark_unlink_footnotes_map(cmark_map *map) {
|
52
|
+
cmark_map_entry *ref;
|
53
|
+
cmark_map_entry *next;
|
54
|
+
|
55
|
+
ref = map->refs;
|
56
|
+
while(ref) {
|
57
|
+
next = ref->next;
|
58
|
+
if (((cmark_footnote *)ref)->node) {
|
59
|
+
cmark_node_unlink(((cmark_footnote *)ref)->node);
|
60
|
+
}
|
61
|
+
ref = next;
|
62
|
+
}
|
63
|
+
}
|
@@ -15,7 +15,7 @@
|
|
15
15
|
* - The characters which are *not* safe to be in
|
16
16
|
* an URL because they are RESERVED characters.
|
17
17
|
*
|
18
|
-
* We
|
18
|
+
* We assume (lazily) that any RESERVED char that
|
19
19
|
* appears inside an URL is actually meant to
|
20
20
|
* have its native function (i.e. as an URL
|
21
21
|
* component/separator) and hence needs no escaping.
|
data/ext/commonmarker/html.c
CHANGED
@@ -59,16 +59,30 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size
|
|
59
59
|
cmark_strbuf_put(html, data, (bufsize_t)len);
|
60
60
|
}
|
61
61
|
|
62
|
-
static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
|
62
|
+
static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
|
63
63
|
if (renderer->written_footnote_ix >= renderer->footnote_ix)
|
64
64
|
return false;
|
65
65
|
renderer->written_footnote_ix = renderer->footnote_ix;
|
66
66
|
|
67
|
-
cmark_strbuf_puts(html, "<a href=\"#fnref");
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
67
|
+
cmark_strbuf_puts(html, "<a href=\"#fnref-");
|
68
|
+
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
|
69
|
+
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩</a>");
|
70
|
+
|
71
|
+
if (node->footnote.def_count > 1)
|
72
|
+
{
|
73
|
+
for(int i = 2; i <= node->footnote.def_count; i++) {
|
74
|
+
char n[32];
|
75
|
+
snprintf(n, sizeof(n), "%d", i);
|
76
|
+
|
77
|
+
cmark_strbuf_puts(html, " <a href=\"#fnref-");
|
78
|
+
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
|
79
|
+
cmark_strbuf_puts(html, "-");
|
80
|
+
cmark_strbuf_puts(html, n);
|
81
|
+
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩<sup class=\"footnote-ref\">");
|
82
|
+
cmark_strbuf_puts(html, n);
|
83
|
+
cmark_strbuf_puts(html, "</sup></a>");
|
84
|
+
}
|
85
|
+
}
|
72
86
|
|
73
87
|
return true;
|
74
88
|
}
|
@@ -227,7 +241,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
227
241
|
|
228
242
|
case CMARK_NODE_HTML_BLOCK:
|
229
243
|
cmark_html_render_cr(html);
|
230
|
-
if (options &
|
244
|
+
if (!(options & CMARK_OPT_UNSAFE)) {
|
231
245
|
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
|
232
246
|
} else if (renderer->filter_extensions) {
|
233
247
|
filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
|
@@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
273
287
|
} else {
|
274
288
|
if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
|
275
289
|
cmark_strbuf_putc(html, ' ');
|
276
|
-
S_put_footnote_backref(renderer, html);
|
290
|
+
S_put_footnote_backref(renderer, html, parent);
|
277
291
|
}
|
278
292
|
cmark_strbuf_puts(html, "</p>\n");
|
279
293
|
}
|
@@ -305,7 +319,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
305
319
|
break;
|
306
320
|
|
307
321
|
case CMARK_NODE_HTML_INLINE:
|
308
|
-
if (options &
|
322
|
+
if (!(options & CMARK_OPT_UNSAFE)) {
|
309
323
|
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
|
310
324
|
} else {
|
311
325
|
filtered = false;
|
@@ -354,8 +368,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
354
368
|
case CMARK_NODE_LINK:
|
355
369
|
if (entering) {
|
356
370
|
cmark_strbuf_puts(html, "<a href=\"");
|
357
|
-
if (
|
358
|
-
scan_dangerous_url(&node->as.link.url, 0))) {
|
371
|
+
if ((options & CMARK_OPT_UNSAFE) ||
|
372
|
+
!(scan_dangerous_url(&node->as.link.url, 0))) {
|
359
373
|
houdini_escape_href(html, node->as.link.url.data,
|
360
374
|
node->as.link.url.len);
|
361
375
|
}
|
@@ -372,8 +386,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
372
386
|
case CMARK_NODE_IMAGE:
|
373
387
|
if (entering) {
|
374
388
|
cmark_strbuf_puts(html, "<img src=\"");
|
375
|
-
if (
|
376
|
-
scan_dangerous_url(&node->as.link.url, 0))) {
|
389
|
+
if ((options & CMARK_OPT_UNSAFE) ||
|
390
|
+
!(scan_dangerous_url(&node->as.link.url, 0))) {
|
377
391
|
houdini_escape_href(html, node->as.link.url.data,
|
378
392
|
node->as.link.url.len);
|
379
393
|
}
|
@@ -392,16 +406,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
392
406
|
case CMARK_NODE_FOOTNOTE_DEFINITION:
|
393
407
|
if (entering) {
|
394
408
|
if (renderer->footnote_ix == 0) {
|
395
|
-
cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
|
409
|
+
cmark_strbuf_puts(html, "<section class=\"footnotes\" data-footnotes>\n<ol>\n");
|
396
410
|
}
|
397
411
|
++renderer->footnote_ix;
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
cmark_strbuf_puts(html, n);
|
412
|
+
|
413
|
+
cmark_strbuf_puts(html, "<li id=\"fn-");
|
414
|
+
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
|
402
415
|
cmark_strbuf_puts(html, "\">\n");
|
403
416
|
} else {
|
404
|
-
if (S_put_footnote_backref(renderer, html)) {
|
417
|
+
if (S_put_footnote_backref(renderer, html, node)) {
|
405
418
|
cmark_strbuf_putc(html, '\n');
|
406
419
|
}
|
407
420
|
cmark_strbuf_puts(html, "</li>\n");
|
@@ -410,12 +423,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
410
423
|
|
411
424
|
case CMARK_NODE_FOOTNOTE_REFERENCE:
|
412
425
|
if (entering) {
|
413
|
-
cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
|
414
|
-
|
415
|
-
cmark_strbuf_puts(html, "\" id=\"fnref");
|
416
|
-
|
417
|
-
|
418
|
-
|
426
|
+
cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn-");
|
427
|
+
houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
|
428
|
+
cmark_strbuf_puts(html, "\" id=\"fnref-");
|
429
|
+
houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
|
430
|
+
|
431
|
+
if (node->footnote.ref_ix > 1) {
|
432
|
+
char n[32];
|
433
|
+
snprintf(n, sizeof(n), "%d", node->footnote.ref_ix);
|
434
|
+
cmark_strbuf_puts(html, "-");
|
435
|
+
cmark_strbuf_puts(html, n);
|
436
|
+
}
|
437
|
+
|
438
|
+
cmark_strbuf_puts(html, "\" data-footnote-ref>");
|
439
|
+
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
|
419
440
|
cmark_strbuf_puts(html, "</a></sup>");
|
420
441
|
}
|
421
442
|
break;
|
data/ext/commonmarker/inlines.c
CHANGED
@@ -321,6 +321,43 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
|
|
321
321
|
return 0;
|
322
322
|
}
|
323
323
|
|
324
|
+
// Destructively modify string, converting newlines to
|
325
|
+
// spaces, then removing a single leading + trailing space,
|
326
|
+
// unless the code span consists entirely of space characters.
|
327
|
+
static void S_normalize_code(cmark_strbuf *s) {
|
328
|
+
bufsize_t r, w;
|
329
|
+
bool contains_nonspace = false;
|
330
|
+
|
331
|
+
for (r = 0, w = 0; r < s->size; ++r) {
|
332
|
+
switch (s->ptr[r]) {
|
333
|
+
case '\r':
|
334
|
+
if (s->ptr[r + 1] != '\n') {
|
335
|
+
s->ptr[w++] = ' ';
|
336
|
+
}
|
337
|
+
break;
|
338
|
+
case '\n':
|
339
|
+
s->ptr[w++] = ' ';
|
340
|
+
break;
|
341
|
+
default:
|
342
|
+
s->ptr[w++] = s->ptr[r];
|
343
|
+
}
|
344
|
+
if (s->ptr[r] != ' ') {
|
345
|
+
contains_nonspace = true;
|
346
|
+
}
|
347
|
+
}
|
348
|
+
|
349
|
+
// begins and ends with space?
|
350
|
+
if (contains_nonspace &&
|
351
|
+
s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
|
352
|
+
cmark_strbuf_drop(s, 1);
|
353
|
+
cmark_strbuf_truncate(s, w - 2);
|
354
|
+
} else {
|
355
|
+
cmark_strbuf_truncate(s, w);
|
356
|
+
}
|
357
|
+
|
358
|
+
}
|
359
|
+
|
360
|
+
|
324
361
|
// Parse backtick code section or raw backticks, return an inline.
|
325
362
|
// Assumes that the subject has a backtick at the current position.
|
326
363
|
static cmark_node *handle_backticks(subject *subj, int options) {
|
@@ -336,8 +373,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
|
|
336
373
|
|
337
374
|
cmark_strbuf_set(&buf, subj->input.data + startpos,
|
338
375
|
endpos - startpos - openticks.len);
|
339
|
-
|
340
|
-
cmark_strbuf_normalize_whitespace(&buf);
|
376
|
+
S_normalize_code(&buf);
|
341
377
|
|
342
378
|
cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
|
343
379
|
adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
|
@@ -345,6 +381,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
|
|
345
381
|
}
|
346
382
|
}
|
347
383
|
|
384
|
+
|
348
385
|
// Scan ***, **, or * and return number scanned, or 0.
|
349
386
|
// Advances position.
|
350
387
|
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
@@ -599,7 +636,6 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
|
|
599
636
|
delimiter *opener;
|
600
637
|
delimiter *old_closer;
|
601
638
|
bool opener_found;
|
602
|
-
bool odd_match;
|
603
639
|
delimiter *openers_bottom[3][128];
|
604
640
|
int i;
|
605
641
|
|
@@ -624,15 +660,14 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
|
|
624
660
|
// Now look backwards for first matching opener:
|
625
661
|
opener = closer->previous;
|
626
662
|
opener_found = false;
|
627
|
-
odd_match = false;
|
628
663
|
while (opener != NULL && opener != stack_bottom &&
|
629
664
|
opener != openers_bottom[closer->length % 3][closer->delim_char]) {
|
630
665
|
if (opener->can_open && opener->delim_char == closer->delim_char) {
|
631
666
|
// interior closer of size 2 can't match opener of size 1
|
632
667
|
// or of size 1 can't match 2
|
633
|
-
|
634
|
-
|
635
|
-
|
668
|
+
if (!(closer->can_open || opener->can_close) ||
|
669
|
+
closer->length % 3 == 0 ||
|
670
|
+
(opener->length + closer->length) % 3 != 0) {
|
636
671
|
opener_found = true;
|
637
672
|
break;
|
638
673
|
}
|
@@ -729,9 +764,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
729
764
|
}
|
730
765
|
cmark_node_insert_after(opener_inl, emph);
|
731
766
|
|
732
|
-
emph->start_line =
|
733
|
-
emph->
|
734
|
-
emph->
|
767
|
+
emph->start_line = opener_inl->start_line;
|
768
|
+
emph->end_line = closer_inl->end_line;
|
769
|
+
emph->start_column = opener_inl->start_column;
|
770
|
+
emph->end_column = closer_inl->end_column;
|
735
771
|
|
736
772
|
// if opener has 0 characters, remove it and its associated inline
|
737
773
|
if (opener_num_chars == 0) {
|
@@ -937,17 +973,21 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
|
|
937
973
|
else if (input->data[i] == '(') {
|
938
974
|
++nb_p;
|
939
975
|
++i;
|
940
|
-
|
941
|
-
|
976
|
+
if (nb_p > 32)
|
977
|
+
return -1;
|
942
978
|
} else if (input->data[i] == ')') {
|
943
979
|
if (nb_p == 0)
|
944
980
|
break;
|
945
981
|
--nb_p;
|
946
982
|
++i;
|
947
|
-
} else if (cmark_isspace(input->data[i]))
|
983
|
+
} else if (cmark_isspace(input->data[i])) {
|
984
|
+
if (i == offset) {
|
985
|
+
return -1;
|
986
|
+
}
|
948
987
|
break;
|
949
|
-
else
|
988
|
+
} else {
|
950
989
|
++i;
|
990
|
+
}
|
951
991
|
}
|
952
992
|
|
953
993
|
if (i >= input->len)
|
@@ -973,7 +1013,7 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
|
|
973
1013
|
} else if (input->data[i] == '\\')
|
974
1014
|
i += 2;
|
975
1015
|
else if (input->data[i] == '\n' || input->data[i] == '<')
|
976
|
-
return
|
1016
|
+
return -1;
|
977
1017
|
else
|
978
1018
|
++i;
|
979
1019
|
}
|
@@ -1097,19 +1137,77 @@ noMatch:
|
|
1097
1137
|
// What if we're a footnote link?
|
1098
1138
|
if (parser->options & CMARK_OPT_FOOTNOTES &&
|
1099
1139
|
opener->inl_text->next &&
|
1100
|
-
opener->inl_text->next->type == CMARK_NODE_TEXT
|
1101
|
-
|
1140
|
+
opener->inl_text->next->type == CMARK_NODE_TEXT) {
|
1141
|
+
|
1102
1142
|
cmark_chunk *literal = &opener->inl_text->next->as.literal;
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1143
|
+
|
1144
|
+
// look back to the opening '[', and skip ahead to the next character
|
1145
|
+
// if we're looking at a '[^' sequence, and there is other text or nodes
|
1146
|
+
// after the ^, let's call it a footnote reference.
|
1147
|
+
if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
|
1148
|
+
|
1149
|
+
// Before we got this far, the `handle_close_bracket` function may have
|
1150
|
+
// advanced the current state beyond our footnote's actual closing
|
1151
|
+
// bracket, ie if it went looking for a `link_label`.
|
1152
|
+
// Let's just rewind the subject's position:
|
1153
|
+
subj->pos = initial_pos;
|
1154
|
+
|
1155
|
+
cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
|
1156
|
+
|
1157
|
+
// the start and end of the footnote ref is the opening and closing brace
|
1158
|
+
// i.e. the subject's current position, and the opener's start_column
|
1159
|
+
int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
|
1160
|
+
int fnref_start_column = opener->inl_text->start_column;
|
1161
|
+
|
1162
|
+
// any given node delineates a substring of the line being processed,
|
1163
|
+
// with the remainder of the line being pointed to thru its 'literal'
|
1164
|
+
// struct member.
|
1165
|
+
// here, we copy the literal's pointer, moving it past the '^' character
|
1166
|
+
// for a length equal to the size of footnote reference text.
|
1167
|
+
// i.e. end_col minus start_col, minus the [ and the ^ characters
|
1168
|
+
//
|
1169
|
+
// this copies the footnote reference string, even if between the
|
1170
|
+
// `opener` and the subject's current position there are other nodes
|
1171
|
+
//
|
1172
|
+
// (first, check for underflows)
|
1173
|
+
if ((fnref_start_column + 2) <= fnref_end_column) {
|
1174
|
+
fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
|
1175
|
+
} else {
|
1176
|
+
fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
fnref->start_line = fnref->end_line = subj->line;
|
1180
|
+
fnref->start_column = fnref_start_column;
|
1181
|
+
fnref->end_column = fnref_end_column;
|
1182
|
+
|
1183
|
+
// we then replace the opener with this new fnref node, the net effect
|
1184
|
+
// being replacing the opening '[' text node with a `^footnote-ref]` node.
|
1185
|
+
cmark_node_insert_before(opener->inl_text, fnref);
|
1186
|
+
|
1112
1187
|
process_emphasis(parser, subj, opener->previous_delimiter);
|
1188
|
+
// sometimes, the footnote reference text gets parsed into multiple nodes
|
1189
|
+
// i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
|
1190
|
+
// this happens for ex with the autolink extension. when the autolinker
|
1191
|
+
// finds the 'w' character, it will split the text into multiple nodes
|
1192
|
+
// in hopes of being able to match a 'www.' substring.
|
1193
|
+
//
|
1194
|
+
// because this function is called one character at a time via the
|
1195
|
+
// `parse_inlines` function, and the current subj->pos is pointing at the
|
1196
|
+
// closing ] brace, and because we copy all the text between the [ ]
|
1197
|
+
// braces, we should be able to safely ignore and delete any nodes after
|
1198
|
+
// the opener->inl_text->next.
|
1199
|
+
//
|
1200
|
+
// therefore, here we walk thru the list and free them all up
|
1201
|
+
cmark_node *next_node;
|
1202
|
+
cmark_node *current_node = opener->inl_text->next;
|
1203
|
+
while(current_node) {
|
1204
|
+
next_node = current_node->next;
|
1205
|
+
cmark_node_free(current_node);
|
1206
|
+
current_node = next_node;
|
1207
|
+
}
|
1208
|
+
|
1209
|
+
cmark_node_free(opener->inl_text);
|
1210
|
+
|
1113
1211
|
pop_bracket(subj);
|
1114
1212
|
return NULL;
|
1115
1213
|
}
|
@@ -1400,8 +1498,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
|
|
1400
1498
|
|
1401
1499
|
// parse link url:
|
1402
1500
|
spnl(&subj);
|
1403
|
-
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1
|
1404
|
-
url.len > 0) {
|
1501
|
+
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
|
1405
1502
|
subj.pos += matchlen;
|
1406
1503
|
} else {
|
1407
1504
|
return 0;
|
@@ -1410,7 +1507,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
|
|
1410
1507
|
// parse optional link_title
|
1411
1508
|
beforetitle = subj.pos;
|
1412
1509
|
spnl(&subj);
|
1413
|
-
matchlen = scan_link_title(&subj.input, subj.pos);
|
1510
|
+
matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
|
1414
1511
|
if (matchlen) {
|
1415
1512
|
title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
|
1416
1513
|
subj.pos += matchlen;
|
@@ -1520,7 +1617,7 @@ int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
|
|
1520
1617
|
}
|
1521
1618
|
}
|
1522
1619
|
|
1523
|
-
while (peek_char(parser) == c && numdelims
|
1620
|
+
while (peek_char(parser) == c && numdelims < max_delims) {
|
1524
1621
|
numdelims++;
|
1525
1622
|
advance(parser);
|
1526
1623
|
}
|
data/ext/commonmarker/iterator.h
CHANGED
data/ext/commonmarker/map.h
CHANGED
data/ext/commonmarker/node.c
CHANGED
@@ -69,10 +69,11 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) {
|
|
69
69
|
return cmark_node_can_contain_type(node, (cmark_node_type) child->type);
|
70
70
|
}
|
71
71
|
|
72
|
-
cmark_node *
|
72
|
+
cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type, cmark_mem *mem, cmark_syntax_extension *extension) {
|
73
73
|
cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
|
74
74
|
cmark_strbuf_init(mem, &node->content, 0);
|
75
75
|
node->type = (uint16_t)type;
|
76
|
+
node->extension = extension;
|
76
77
|
|
77
78
|
switch (node->type) {
|
78
79
|
case CMARK_NODE_HEADING:
|
@@ -91,12 +92,25 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
|
|
91
92
|
break;
|
92
93
|
}
|
93
94
|
|
95
|
+
if (node->extension && node->extension->opaque_alloc_func) {
|
96
|
+
node->extension->opaque_alloc_func(node->extension, mem, node);
|
97
|
+
}
|
98
|
+
|
94
99
|
return node;
|
95
100
|
}
|
96
101
|
|
97
|
-
cmark_node *
|
102
|
+
cmark_node *cmark_node_new_with_ext(cmark_node_type type, cmark_syntax_extension *extension) {
|
98
103
|
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
|
99
|
-
return
|
104
|
+
return cmark_node_new_with_mem_and_ext(type, &CMARK_DEFAULT_MEM_ALLOCATOR, extension);
|
105
|
+
}
|
106
|
+
|
107
|
+
cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem)
|
108
|
+
{
|
109
|
+
return cmark_node_new_with_mem_and_ext(type, mem, NULL);
|
110
|
+
}
|
111
|
+
|
112
|
+
cmark_node *cmark_node_new(cmark_node_type type) {
|
113
|
+
return cmark_node_new_with_ext(type, NULL);
|
100
114
|
}
|
101
115
|
|
102
116
|
static void free_node_as(cmark_node *node) {
|
data/ext/commonmarker/node.h
CHANGED
@@ -21,6 +21,7 @@ typedef struct {
|
|
21
21
|
cmark_delim_type delimiter;
|
22
22
|
unsigned char bullet_char;
|
23
23
|
bool tight;
|
24
|
+
bool checked; // For task list extension
|
24
25
|
} cmark_list;
|
25
26
|
|
26
27
|
typedef struct {
|
@@ -50,6 +51,7 @@ typedef struct {
|
|
50
51
|
enum cmark_node__internal_flags {
|
51
52
|
CMARK_NODE__OPEN = (1 << 0),
|
52
53
|
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
|
54
|
+
CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
|
53
55
|
};
|
54
56
|
|
55
57
|
struct cmark_node {
|
@@ -74,6 +76,13 @@ struct cmark_node {
|
|
74
76
|
|
75
77
|
cmark_syntax_extension *extension;
|
76
78
|
|
79
|
+
union {
|
80
|
+
int ref_ix;
|
81
|
+
int def_count;
|
82
|
+
} footnote;
|
83
|
+
|
84
|
+
cmark_node *parent_footnote_def;
|
85
|
+
|
77
86
|
union {
|
78
87
|
cmark_chunk literal;
|
79
88
|
cmark_list list;
|
data/ext/commonmarker/parser.h
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
#define CMARK_PARSER_H
|
3
3
|
|
4
4
|
#include <stdio.h>
|
5
|
+
#include "references.h"
|
5
6
|
#include "node.h"
|
6
7
|
#include "buffer.h"
|
7
|
-
#include "memory.h"
|
8
8
|
|
9
9
|
#ifdef __cplusplus
|
10
10
|
extern "C" {
|
@@ -30,6 +30,7 @@ struct cmark_parser {
|
|
30
30
|
bufsize_t first_nonspace;
|
31
31
|
/* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */
|
32
32
|
bufsize_t first_nonspace_column;
|
33
|
+
bufsize_t thematic_break_kill_pos;
|
33
34
|
/* See the documentation for cmark_parser_get_indent() in cmark.h */
|
34
35
|
int indent;
|
35
36
|
/* See the documentation for cmark_parser_is_blank() in cmark.h */
|
@@ -191,6 +191,28 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
|
|
191
191
|
case CMARK_NODE_IMAGE:
|
192
192
|
break;
|
193
193
|
|
194
|
+
case CMARK_NODE_FOOTNOTE_REFERENCE:
|
195
|
+
if (entering) {
|
196
|
+
LIT("[^");
|
197
|
+
OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
|
198
|
+
LIT("]");
|
199
|
+
}
|
200
|
+
break;
|
201
|
+
|
202
|
+
case CMARK_NODE_FOOTNOTE_DEFINITION:
|
203
|
+
if (entering) {
|
204
|
+
renderer->footnote_ix += 1;
|
205
|
+
LIT("[^");
|
206
|
+
char n[32];
|
207
|
+
snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
|
208
|
+
OUT(n, false, LITERAL);
|
209
|
+
LIT("]: ");
|
210
|
+
|
211
|
+
cmark_strbuf_puts(renderer->prefix, " ");
|
212
|
+
} else {
|
213
|
+
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
|
214
|
+
}
|
215
|
+
break;
|
194
216
|
default:
|
195
217
|
assert(false);
|
196
218
|
break;
|
data/ext/commonmarker/render.c
CHANGED
@@ -57,6 +57,7 @@ static void S_out(cmark_renderer *renderer, cmark_node *node,
|
|
57
57
|
}
|
58
58
|
}
|
59
59
|
renderer->column = 0;
|
60
|
+
renderer->last_breakable = 0;
|
60
61
|
renderer->begin_line = true;
|
61
62
|
renderer->begin_content = true;
|
62
63
|
renderer->need_cr -= 1;
|
@@ -97,21 +98,23 @@ static void S_out(cmark_renderer *renderer, cmark_node *node,
|
|
97
98
|
}
|
98
99
|
}
|
99
100
|
|
100
|
-
} else if (c == 10) {
|
101
|
-
cmark_strbuf_putc(renderer->buffer, '\n');
|
102
|
-
renderer->column = 0;
|
103
|
-
renderer->begin_line = true;
|
104
|
-
renderer->begin_content = true;
|
105
|
-
renderer->last_breakable = 0;
|
106
101
|
} else if (escape == LITERAL) {
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
102
|
+
if (c == 10) {
|
103
|
+
cmark_strbuf_putc(renderer->buffer, '\n');
|
104
|
+
renderer->column = 0;
|
105
|
+
renderer->begin_line = true;
|
106
|
+
renderer->begin_content = true;
|
107
|
+
renderer->last_breakable = 0;
|
108
|
+
} else {
|
109
|
+
cmark_render_code_point(renderer, c);
|
110
|
+
renderer->begin_line = false;
|
111
|
+
// we don't set 'begin_content' to false til we've
|
112
|
+
// finished parsing a digit. Reason: in commonmark
|
113
|
+
// we need to escape a potential list marker after
|
114
|
+
// a digit:
|
115
|
+
renderer->begin_content =
|
116
|
+
renderer->begin_content && cmark_isdigit((char)c) == 1;
|
117
|
+
}
|
115
118
|
} else {
|
116
119
|
(renderer->outc)(renderer, node, escape, c, nextc);
|
117
120
|
renderer->begin_line = false;
|
@@ -188,7 +191,7 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
|
|
188
191
|
}
|
189
192
|
|
190
193
|
// ensure final newline
|
191
|
-
if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
|
194
|
+
if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
|
192
195
|
cmark_strbuf_putc(renderer.buffer, '\n');
|
193
196
|
}
|
194
197
|
|