markly 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/arena.c +9 -8
  5. data/ext/markly/autolink.c +217 -134
  6. data/ext/markly/blocks.c +40 -4
  7. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  8. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  9. data/ext/markly/cmark-gfm.h +18 -2
  10. data/ext/markly/cmark-gfm_version.h +2 -2
  11. data/ext/markly/cmark.c +3 -3
  12. data/ext/markly/commonmark.c +33 -38
  13. data/ext/markly/ext_scanners.c +360 -640
  14. data/ext/markly/extconf.rb +8 -1
  15. data/ext/markly/footnotes.c +23 -0
  16. data/ext/markly/footnotes.h +2 -0
  17. data/ext/markly/html.c +60 -23
  18. data/ext/markly/inlines.c +216 -61
  19. data/ext/markly/latex.c +6 -4
  20. data/ext/markly/man.c +7 -11
  21. data/ext/markly/map.c +11 -4
  22. data/ext/markly/map.h +5 -2
  23. data/ext/markly/markly.c +582 -586
  24. data/ext/markly/markly.h +1 -1
  25. data/ext/markly/node.c +76 -10
  26. data/ext/markly/node.h +49 -1
  27. data/ext/markly/parser.h +1 -0
  28. data/ext/markly/plaintext.c +12 -29
  29. data/ext/markly/references.c +1 -0
  30. data/ext/markly/render.c +15 -7
  31. data/ext/markly/scanners.c +13916 -20242
  32. data/ext/markly/scanners.h +8 -0
  33. data/ext/markly/scanners.re +47 -8
  34. data/ext/markly/strikethrough.c +1 -1
  35. data/ext/markly/table.c +143 -74
  36. data/ext/markly/xml.c +2 -1
  37. data/lib/markly/flags.rb +16 -0
  38. data/lib/markly/node/inspect.rb +59 -53
  39. data/lib/markly/node.rb +125 -58
  40. data/lib/markly/renderer/generic.rb +136 -0
  41. data/lib/markly/renderer/html.rb +301 -0
  42. data/lib/markly/version.rb +7 -1
  43. data/lib/markly.rb +38 -32
  44. data/license.md +39 -0
  45. data/readme.md +36 -0
  46. data.tar.gz.sig +0 -0
  47. metadata +63 -31
  48. metadata.gz.sig +0 -0
  49. data/bin/markly +0 -94
  50. data/lib/markly/markly.so +0 -0
  51. data/lib/markly/renderer/html_renderer.rb +0 -281
  52. data/lib/markly/renderer.rb +0 -133
@@ -1,6 +1,13 @@
1
+ #!/usr/bin/env ruby
1
2
  # frozen_string_literal: true
2
3
 
3
- # Loads mkmf which is used to make makefiles for Ruby extensions
4
+ # Released under the MIT License.
5
+ # Copyright, 2014, by John MacFarlane.
6
+ # Copyright, 2015-2019, by Garen Torikian.
7
+ # Copyright, 2016-2017, by Yuki Izumi.
8
+ # Copyright, 2017, by Ashe Connor.
9
+ # Copyright, 2020-2023, by Samuel Williams.
10
+
4
11
  require 'mkmf'
5
12
 
6
13
  $CFLAGS << " -O3 -std=c99"
@@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
38
38
  cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
39
  return cmark_map_new(mem, footnote_free);
40
40
  }
41
+
42
+ // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
+ // unlink all of the footnote nodes before freeing their memory.
44
+ //
45
+ // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
+ // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
+ // etc, can lead to a use-after-free error.
48
+ //
49
+ // Better to `unlink` every footnote node first, setting their next, prev, and
50
+ // parent pointers to NULL, and only then walk thru & free them up.
51
+ void cmark_unlink_footnotes_map(cmark_map *map) {
52
+ cmark_map_entry *ref;
53
+ cmark_map_entry *next;
54
+
55
+ ref = map->refs;
56
+ while(ref) {
57
+ next = ref->next;
58
+ if (((cmark_footnote *)ref)->node) {
59
+ cmark_node_unlink(((cmark_footnote *)ref)->node);
60
+ }
61
+ ref = next;
62
+ }
63
+ }
@@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote;
18
18
  void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
19
  cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
20
 
21
+ void cmark_unlink_footnotes_map(cmark_map *map);
22
+
21
23
  #ifdef __cplusplus
22
24
  }
23
25
  #endif
data/ext/markly/html.c CHANGED
@@ -59,16 +59,44 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size
59
59
  cmark_strbuf_put(html, data, (bufsize_t)len);
60
60
  }
61
61
 
62
- static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
62
+ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
63
63
  if (renderer->written_footnote_ix >= renderer->footnote_ix)
64
64
  return false;
65
65
  renderer->written_footnote_ix = renderer->footnote_ix;
66
+ char m[32];
67
+ snprintf(m, sizeof(m), "%d", renderer->written_footnote_ix);
68
+
69
+ cmark_strbuf_puts(html, "<a href=\"#fnref-");
70
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
71
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
72
+ cmark_strbuf_puts(html, m);
73
+ cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
74
+ cmark_strbuf_puts(html, m);
75
+ cmark_strbuf_puts(html, "\">↩</a>");
76
+
77
+ if (node->footnote.def_count > 1)
78
+ {
79
+ for(int i = 2; i <= node->footnote.def_count; i++) {
80
+ char n[32];
81
+ snprintf(n, sizeof(n), "%d", i);
66
82
 
67
- cmark_strbuf_puts(html, "<a href=\"#fnref");
68
- char n[32];
69
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
70
- cmark_strbuf_puts(html, n);
71
- cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
83
+ cmark_strbuf_puts(html, " <a href=\"#fnref-");
84
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
85
+ cmark_strbuf_puts(html, "-");
86
+ cmark_strbuf_puts(html, n);
87
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
88
+ cmark_strbuf_puts(html, m);
89
+ cmark_strbuf_puts(html, "-");
90
+ cmark_strbuf_puts(html, n);
91
+ cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
92
+ cmark_strbuf_puts(html, m);
93
+ cmark_strbuf_puts(html, "-");
94
+ cmark_strbuf_puts(html, n);
95
+ cmark_strbuf_puts(html, "\">↩<sup class=\"footnote-ref\">");
96
+ cmark_strbuf_puts(html, n);
97
+ cmark_strbuf_puts(html, "</sup></a>");
98
+ }
99
+ }
72
100
 
73
101
  return true;
74
102
  }
@@ -273,7 +301,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
273
301
  } else {
274
302
  if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
275
303
  cmark_strbuf_putc(html, ' ');
276
- S_put_footnote_backref(renderer, html);
304
+ S_put_footnote_backref(renderer, html, parent);
277
305
  }
278
306
  cmark_strbuf_puts(html, "</p>\n");
279
307
  }
@@ -336,10 +364,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
336
364
  break;
337
365
 
338
366
  case CMARK_NODE_STRONG:
339
- if (entering) {
340
- cmark_strbuf_puts(html, "<strong>");
341
- } else {
342
- cmark_strbuf_puts(html, "</strong>");
367
+ if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
368
+ if (entering) {
369
+ cmark_strbuf_puts(html, "<strong>");
370
+ } else {
371
+ cmark_strbuf_puts(html, "</strong>");
372
+ }
343
373
  }
344
374
  break;
345
375
 
@@ -392,16 +422,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
392
422
  case CMARK_NODE_FOOTNOTE_DEFINITION:
393
423
  if (entering) {
394
424
  if (renderer->footnote_ix == 0) {
395
- cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
425
+ cmark_strbuf_puts(html, "<section class=\"footnotes\" data-footnotes>\n<ol>\n");
396
426
  }
397
427
  ++renderer->footnote_ix;
398
- cmark_strbuf_puts(html, "<li id=\"fn");
399
- char n[32];
400
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
401
- cmark_strbuf_puts(html, n);
428
+
429
+ cmark_strbuf_puts(html, "<li id=\"fn-");
430
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
402
431
  cmark_strbuf_puts(html, "\">\n");
403
432
  } else {
404
- if (S_put_footnote_backref(renderer, html)) {
433
+ if (S_put_footnote_backref(renderer, html, node)) {
405
434
  cmark_strbuf_putc(html, '\n');
406
435
  }
407
436
  cmark_strbuf_puts(html, "</li>\n");
@@ -410,12 +439,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
410
439
 
411
440
  case CMARK_NODE_FOOTNOTE_REFERENCE:
412
441
  if (entering) {
413
- cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
414
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
415
- cmark_strbuf_puts(html, "\" id=\"fnref");
416
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
417
- cmark_strbuf_puts(html, "\">");
418
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
442
+ cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn-");
443
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
444
+ cmark_strbuf_puts(html, "\" id=\"fnref-");
445
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
446
+
447
+ if (node->footnote.ref_ix > 1) {
448
+ char n[32];
449
+ snprintf(n, sizeof(n), "%d", node->footnote.ref_ix);
450
+ cmark_strbuf_puts(html, "-");
451
+ cmark_strbuf_puts(html, n);
452
+ }
453
+
454
+ cmark_strbuf_puts(html, "\" data-footnote-ref>");
455
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
419
456
  cmark_strbuf_puts(html, "</a></sup>");
420
457
  }
421
458
  break;
data/ext/markly/inlines.c CHANGED
@@ -35,17 +35,24 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
35
35
 
36
36
  typedef struct bracket {
37
37
  struct bracket *previous;
38
- struct delimiter *previous_delimiter;
39
38
  cmark_node *inl_text;
40
39
  bufsize_t position;
41
40
  bool image;
42
41
  bool active;
43
42
  bool bracket_after;
43
+ bool in_bracket_image0;
44
+ bool in_bracket_image1;
44
45
  } bracket;
45
46
 
47
+ #define FLAG_SKIP_HTML_CDATA (1u << 0)
48
+ #define FLAG_SKIP_HTML_DECLARATION (1u << 1)
49
+ #define FLAG_SKIP_HTML_PI (1u << 2)
50
+ #define FLAG_SKIP_HTML_COMMENT (1u << 3)
51
+
46
52
  typedef struct subject{
47
53
  cmark_mem *mem;
48
54
  cmark_chunk input;
55
+ unsigned flags;
49
56
  int line;
50
57
  bufsize_t pos;
51
58
  int block_offset;
@@ -55,6 +62,7 @@ typedef struct subject{
55
62
  bracket *last_bracket;
56
63
  bufsize_t backticks[MAXBACKTICKS + 1];
57
64
  bool scanned_for_backticks;
65
+ bool no_link_openers;
58
66
  } subject;
59
67
 
60
68
  // Extensions may populate this.
@@ -109,6 +117,24 @@ static cmark_node *make_str_with_entities(subject *subj,
109
117
  }
110
118
  }
111
119
 
120
+ // Like cmark_node_append_child but without costly sanity checks.
121
+ // Assumes that child was newly created.
122
+ static void append_child(cmark_node *node, cmark_node *child) {
123
+ cmark_node *old_last_child = node->last_child;
124
+
125
+ child->next = NULL;
126
+ child->prev = old_last_child;
127
+ child->parent = node;
128
+ node->last_child = child;
129
+
130
+ if (old_last_child) {
131
+ old_last_child->next = child;
132
+ } else {
133
+ // Also set first_child if node previously had no children.
134
+ node->first_child = child;
135
+ }
136
+ }
137
+
112
138
  // Duplicate a chunk by creating a copy of the buffer not by reusing the
113
139
  // buffer like cmark_chunk_dup does.
114
140
  static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
@@ -152,7 +178,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj,
152
178
  link->start_line = link->end_line = subj->line;
153
179
  link->start_column = start_column + 1;
154
180
  link->end_column = end_column + 1;
155
- cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
181
+ append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
156
182
  return link;
157
183
  }
158
184
 
@@ -161,6 +187,7 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset,
161
187
  int i;
162
188
  e->mem = mem;
163
189
  e->input = *chunk;
190
+ e->flags = 0;
164
191
  e->line = line_number;
165
192
  e->pos = 0;
166
193
  e->block_offset = block_offset;
@@ -172,6 +199,7 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset,
172
199
  e->backticks[i] = 0;
173
200
  }
174
201
  e->scanned_for_backticks = false;
202
+ e->no_link_openers = true;
175
203
  }
176
204
 
177
205
  static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
@@ -503,6 +531,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
503
531
  delim->can_open = can_open;
504
532
  delim->can_close = can_close;
505
533
  delim->inl_text = inl_text;
534
+ delim->position = subj->pos;
506
535
  delim->length = inl_text->as.literal.len;
507
536
  delim->previous = subj->last_delim;
508
537
  delim->next = NULL;
@@ -516,15 +545,24 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
516
545
  bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
517
546
  if (subj->last_bracket != NULL) {
518
547
  subj->last_bracket->bracket_after = true;
548
+ b->in_bracket_image0 = subj->last_bracket->in_bracket_image0;
549
+ b->in_bracket_image1 = subj->last_bracket->in_bracket_image1;
519
550
  }
520
551
  b->image = image;
521
552
  b->active = true;
522
553
  b->inl_text = inl_text;
523
554
  b->previous = subj->last_bracket;
524
- b->previous_delimiter = subj->last_delim;
525
555
  b->position = subj->pos;
526
556
  b->bracket_after = false;
557
+ if (image) {
558
+ b->in_bracket_image1 = true;
559
+ } else {
560
+ b->in_bracket_image0 = true;
561
+ }
527
562
  subj->last_bracket = b;
563
+ if (!image) {
564
+ subj->no_link_openers = false;
565
+ }
528
566
  }
529
567
 
530
568
  // Assumes the subject has a c at the current position.
@@ -631,12 +669,13 @@ static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *pars
631
669
  return NULL;
632
670
  }
633
671
 
634
- static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) {
635
- delimiter *closer = subj->last_delim;
672
+ static void process_emphasis(cmark_parser *parser, subject *subj, bufsize_t stack_bottom) {
673
+ delimiter *candidate;
674
+ delimiter *closer = NULL;
636
675
  delimiter *opener;
637
676
  delimiter *old_closer;
638
677
  bool opener_found;
639
- delimiter *openers_bottom[3][128];
678
+ bufsize_t openers_bottom[3][128];
640
679
  int i;
641
680
 
642
681
  // initialize openers_bottom:
@@ -649,8 +688,10 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
649
688
  }
650
689
 
651
690
  // move back to first relevant delim.
652
- while (closer != NULL && closer->previous != stack_bottom) {
653
- closer = closer->previous;
691
+ candidate = subj->last_delim;
692
+ while (candidate != NULL && candidate->position >= stack_bottom) {
693
+ closer = candidate;
694
+ candidate = candidate->previous;
654
695
  }
655
696
 
656
697
  // now move forward, looking for closers, and handling each
@@ -660,8 +701,8 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
660
701
  // Now look backwards for first matching opener:
661
702
  opener = closer->previous;
662
703
  opener_found = false;
663
- while (opener != NULL && opener != stack_bottom &&
664
- opener != openers_bottom[closer->length % 3][closer->delim_char]) {
704
+ while (opener != NULL && opener->position >= stack_bottom &&
705
+ opener->position >= openers_bottom[closer->length % 3][closer->delim_char]) {
665
706
  if (opener->can_open && opener->delim_char == closer->delim_char) {
666
707
  // interior closer of size 2 can't match opener of size 1
667
708
  // or of size 1 can't match 2
@@ -687,27 +728,29 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
687
728
  } else {
688
729
  closer = closer->next;
689
730
  }
690
- } else if (closer->delim_char == '\'') {
731
+ } else if (closer->delim_char == '\'' || closer->delim_char == '"') {
691
732
  cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
692
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
693
- if (opener_found) {
694
- cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
695
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
733
+ if (closer->delim_char == '\'') {
734
+ closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
735
+ } else {
736
+ closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
696
737
  }
697
738
  closer = closer->next;
698
- } else if (closer->delim_char == '"') {
699
- cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
700
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
701
739
  if (opener_found) {
702
740
  cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
703
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
741
+ if (old_closer->delim_char == '\'') {
742
+ opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
743
+ } else {
744
+ opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
745
+ }
746
+ remove_delimiter(subj, opener);
747
+ remove_delimiter(subj, old_closer);
704
748
  }
705
- closer = closer->next;
706
749
  }
707
750
  if (!opener_found) {
708
751
  // set lower bound for future searches for openers
709
752
  openers_bottom[old_closer->length % 3][old_closer->delim_char] =
710
- old_closer->previous;
753
+ old_closer->position;
711
754
  if (!old_closer->can_open) {
712
755
  // we can remove a closer that can't be an
713
756
  // opener, once we've seen there's no
@@ -720,7 +763,8 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
720
763
  }
721
764
  }
722
765
  // free all delimiters in list until stack_bottom:
723
- while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
766
+ while (subj->last_delim != NULL &&
767
+ subj->last_delim->position >= stack_bottom) {
724
768
  remove_delimiter(subj, subj->last_delim);
725
769
  }
726
770
  }
@@ -759,7 +803,8 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
759
803
  tmp = opener_inl->next;
760
804
  while (tmp && tmp != closer_inl) {
761
805
  tmpnext = tmp->next;
762
- cmark_node_append_child(emph, tmp);
806
+ cmark_node_unlink(tmp);
807
+ append_child(emph, tmp);
763
808
  tmp = tmpnext;
764
809
  }
765
810
  cmark_node_insert_after(opener_inl, emph);
@@ -890,7 +935,63 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) {
890
935
  }
891
936
 
892
937
  // finally, try to match an html tag
893
- matchlen = scan_html_tag(&subj->input, subj->pos);
938
+ if (subj->pos + 2 <= subj->input.len) {
939
+ int c = subj->input.data[subj->pos];
940
+ if (c == '!' && (subj->flags & FLAG_SKIP_HTML_COMMENT) == 0) {
941
+ c = subj->input.data[subj->pos+1];
942
+ if (c == '-' && subj->input.data[subj->pos+2] == '-') {
943
+ if (subj->input.data[subj->pos+3] == '>') {
944
+ matchlen = 4;
945
+ } else if (subj->input.data[subj->pos+3] == '-' &&
946
+ subj->input.data[subj->pos+4] == '>') {
947
+ matchlen = 5;
948
+ } else {
949
+ matchlen = scan_html_comment(&subj->input, subj->pos + 1);
950
+ if (matchlen > 0) {
951
+ matchlen += 1; // prefix "<"
952
+ } else { // no match through end of input: set a flag so
953
+ // we don't reparse looking for -->:
954
+ subj->flags |= FLAG_SKIP_HTML_COMMENT;
955
+ }
956
+ }
957
+ } else if (c == '[') {
958
+ if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
959
+ matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
960
+ if (matchlen > 0) {
961
+ // The regex doesn't require the final "]]>". But if we're not at
962
+ // the end of input, it must come after the match. Otherwise,
963
+ // disable subsequent scans to avoid quadratic behavior.
964
+ matchlen += 5; // prefix "![", suffix "]]>"
965
+ if (subj->pos + matchlen > subj->input.len) {
966
+ subj->flags |= FLAG_SKIP_HTML_CDATA;
967
+ matchlen = 0;
968
+ }
969
+ }
970
+ }
971
+ } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
972
+ matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
973
+ if (matchlen > 0) {
974
+ matchlen += 2; // prefix "!", suffix ">"
975
+ if (subj->pos + matchlen > subj->input.len) {
976
+ subj->flags |= FLAG_SKIP_HTML_DECLARATION;
977
+ matchlen = 0;
978
+ }
979
+ }
980
+ }
981
+ } else if (c == '?') {
982
+ if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
983
+ // Note that we allow an empty match.
984
+ matchlen = scan_html_pi(&subj->input, subj->pos + 1);
985
+ matchlen += 3; // prefix "?", suffix "?>"
986
+ if (subj->pos + matchlen > subj->input.len) {
987
+ subj->flags |= FLAG_SKIP_HTML_PI;
988
+ matchlen = 0;
989
+ }
990
+ }
991
+ } else {
992
+ matchlen = scan_html_tag(&subj->input, subj->pos);
993
+ }
994
+ }
894
995
  if (matchlen > 0) {
895
996
  contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
896
997
  subj->pos += matchlen;
@@ -1056,16 +1157,16 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
1056
1157
  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1057
1158
  }
1058
1159
 
1059
- if (!opener->active) {
1160
+ // If we got here, we matched a potential link/image text.
1161
+ // Now we check to see if it's a link/image.
1162
+ is_image = opener->image;
1163
+
1164
+ if (!is_image && subj->no_link_openers) {
1060
1165
  // take delimiter off stack
1061
1166
  pop_bracket(subj);
1062
1167
  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1063
1168
  }
1064
1169
 
1065
- // If we got here, we matched a potential link/image text.
1066
- // Now we check to see if it's a link/image.
1067
- is_image = opener->image;
1068
-
1069
1170
  after_link_text_pos = subj->pos;
1070
1171
 
1071
1172
  // First, look for an inline link.
@@ -1137,19 +1238,77 @@ noMatch:
1137
1238
  // What if we're a footnote link?
1138
1239
  if (parser->options & CMARK_OPT_FOOTNOTES &&
1139
1240
  opener->inl_text->next &&
1140
- opener->inl_text->next->type == CMARK_NODE_TEXT &&
1141
- !opener->inl_text->next->next) {
1241
+ opener->inl_text->next->type == CMARK_NODE_TEXT) {
1242
+
1142
1243
  cmark_chunk *literal = &opener->inl_text->next->as.literal;
1143
- if (literal->len > 1 && literal->data[0] == '^') {
1144
- inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1145
- inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
1146
- inl->start_line = inl->end_line = subj->line;
1147
- inl->start_column = opener->inl_text->start_column;
1148
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1149
- cmark_node_insert_before(opener->inl_text, inl);
1150
- cmark_node_free(opener->inl_text->next);
1244
+
1245
+ // look back to the opening '[', and skip ahead to the next character
1246
+ // if we're looking at a '[^' sequence, and there is other text or nodes
1247
+ // after the ^, let's call it a footnote reference.
1248
+ if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1249
+
1250
+ // Before we got this far, the `handle_close_bracket` function may have
1251
+ // advanced the current state beyond our footnote's actual closing
1252
+ // bracket, ie if it went looking for a `link_label`.
1253
+ // Let's just rewind the subject's position:
1254
+ subj->pos = initial_pos;
1255
+
1256
+ cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1257
+
1258
+ // the start and end of the footnote ref is the opening and closing brace
1259
+ // i.e. the subject's current position, and the opener's start_column
1260
+ int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1261
+ int fnref_start_column = opener->inl_text->start_column;
1262
+
1263
+ // any given node delineates a substring of the line being processed,
1264
+ // with the remainder of the line being pointed to thru its 'literal'
1265
+ // struct member.
1266
+ // here, we copy the literal's pointer, moving it past the '^' character
1267
+ // for a length equal to the size of footnote reference text.
1268
+ // i.e. end_col minus start_col, minus the [ and the ^ characters
1269
+ //
1270
+ // this copies the footnote reference string, even if between the
1271
+ // `opener` and the subject's current position there are other nodes
1272
+ //
1273
+ // (first, check for underflows)
1274
+ if ((fnref_start_column + 2) <= fnref_end_column) {
1275
+ fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1276
+ } else {
1277
+ fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1278
+ }
1279
+
1280
+ fnref->start_line = fnref->end_line = subj->line;
1281
+ fnref->start_column = fnref_start_column;
1282
+ fnref->end_column = fnref_end_column;
1283
+
1284
+ // we then replace the opener with this new fnref node, the net effect
1285
+ // being replacing the opening '[' text node with a `^footnote-ref]` node.
1286
+ cmark_node_insert_before(opener->inl_text, fnref);
1287
+
1288
+ process_emphasis(parser, subj, opener->position);
1289
+ // sometimes, the footnote reference text gets parsed into multiple nodes
1290
+ // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1291
+ // this happens for ex with the autolink extension. when the autolinker
1292
+ // finds the 'w' character, it will split the text into multiple nodes
1293
+ // in hopes of being able to match a 'www.' substring.
1294
+ //
1295
+ // because this function is called one character at a time via the
1296
+ // `parse_inlines` function, and the current subj->pos is pointing at the
1297
+ // closing ] brace, and because we copy all the text between the [ ]
1298
+ // braces, we should be able to safely ignore and delete any nodes after
1299
+ // the opener->inl_text->next.
1300
+ //
1301
+ // therefore, here we walk thru the list and free them all up
1302
+ cmark_node *next_node;
1303
+ cmark_node *current_node = opener->inl_text->next;
1304
+ while(current_node) {
1305
+ next_node = current_node->next;
1306
+ cmark_node_free(current_node);
1307
+ current_node = next_node;
1308
+ }
1309
+
1151
1310
  cmark_node_free(opener->inl_text);
1152
- process_emphasis(parser, subj, opener->previous_delimiter);
1311
+
1153
1312
  pop_bracket(subj);
1154
1313
  return NULL;
1155
1314
  }
@@ -1171,31 +1330,22 @@ match:
1171
1330
  tmp = opener->inl_text->next;
1172
1331
  while (tmp) {
1173
1332
  tmpnext = tmp->next;
1174
- cmark_node_append_child(inl, tmp);
1333
+ cmark_node_unlink(tmp);
1334
+ append_child(inl, tmp);
1175
1335
  tmp = tmpnext;
1176
1336
  }
1177
1337
 
1178
1338
  // Free the bracket [:
1179
1339
  cmark_node_free(opener->inl_text);
1180
1340
 
1181
- process_emphasis(parser, subj, opener->previous_delimiter);
1341
+ process_emphasis(parser, subj, opener->position);
1182
1342
  pop_bracket(subj);
1183
1343
 
1184
- // Now, if we have a link, we also want to deactivate earlier link
1185
- // delimiters. (This code can be removed if we decide to allow links
1344
+ // Now, if we have a link, we also want to deactivate links until
1345
+ // we get a new opener. (This code can be removed if we decide to allow links
1186
1346
  // inside links.)
1187
1347
  if (!is_image) {
1188
- opener = subj->last_bracket;
1189
- while (opener != NULL) {
1190
- if (!opener->image) {
1191
- if (!opener->active) {
1192
- break;
1193
- } else {
1194
- opener->active = false;
1195
- }
1196
- }
1197
- opener = opener->previous;
1198
- }
1348
+ subj->no_link_openers = true;
1199
1349
  }
1200
1350
 
1201
1351
  return NULL;
@@ -1373,7 +1523,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1373
1523
  new_inl = make_str(subj, startpos, endpos - 1, contents);
1374
1524
  }
1375
1525
  if (new_inl != NULL) {
1376
- cmark_node_append_child(parent, new_inl);
1526
+ append_child(parent, new_inl);
1377
1527
  }
1378
1528
 
1379
1529
  return 1;
@@ -1392,7 +1542,7 @@ void cmark_parse_inlines(cmark_parser *parser,
1392
1542
  while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
1393
1543
  ;
1394
1544
 
1395
- process_emphasis(parser, &subj, NULL);
1545
+ process_emphasis(parser, &subj, 0);
1396
1546
  // free bracket and delim stack
1397
1547
  while (subj.last_delim) {
1398
1548
  remove_delimiter(&subj, subj.last_delim);
@@ -1604,10 +1754,15 @@ cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) {
1604
1754
  }
1605
1755
 
1606
1756
  int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) {
1607
- for (bracket *b = parser->last_bracket; b; b = b->previous)
1608
- if (b->active && b->image == (image != 0))
1609
- return 1;
1610
- return 0;
1757
+ bracket *b = parser->last_bracket;
1758
+ if (!b) {
1759
+ return 0;
1760
+ }
1761
+ if (image != 0) {
1762
+ return b->in_bracket_image1;
1763
+ } else {
1764
+ return b->in_bracket_image0;
1765
+ }
1611
1766
  }
1612
1767
 
1613
1768
  void cmark_node_unput(cmark_node *node, int n) {
data/ext/markly/latex.c CHANGED
@@ -385,10 +385,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
385
385
  break;
386
386
 
387
387
  case CMARK_NODE_STRONG:
388
- if (entering) {
389
- LIT("\\textbf{");
390
- } else {
391
- LIT("}");
388
+ if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
389
+ if (entering) {
390
+ LIT("\\textbf{");
391
+ } else {
392
+ LIT("}");
393
+ }
392
394
  }
393
395
  break;
394
396