commonmarker 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a9c068146510f80180b4524934cf90ba56044100
4
- data.tar.gz: 5d5ff7d6b1a68340d8e330d6b6a1e9b1d01df944
3
+ metadata.gz: ddd9dac2dc959a7d2c4297934501575ab3451594
4
+ data.tar.gz: f8d6defe22a3e3bcf16c20bd5d6403bdae7555e1
5
5
  SHA512:
6
- metadata.gz: 828329dac75b03c52a50674a53a994791bc6c26fb05fc7fed4377ff241e7b87560b3dd9b2aa8be0b0a4e2a7ac194500a4e584872c6a1bc44618a13e3cad741f8
7
- data.tar.gz: 5bf814489204bf76b497cd8dd570dd88b1f8af8f9a45847dad091b756b243ae618466e8e85441c4e39a0d22191e3a2001ee15c8309a80ec93f630952c92e0108
6
+ metadata.gz: 0ea4f88fe4b6c81f70ecde02d26897bfc12b84dde2275becadf85121cdf07be48796fc028537656345c64dea903371703aa233c00071c18e496aa204d3702788
7
+ data.tar.gz: '08413704c3d74d6f5148aa64dbebce1d66ab313494656d0ea0015cd99c9cbb5707398678d68cf70ad324a93820faf0dda0d1ae13efda5083e69a4a3ac7dba3bd'
@@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
36
36
  return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
37
37
  }
38
38
 
39
+ static bool S_last_line_checked(const cmark_node *node) {
40
+ return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
41
+ }
42
+
39
43
  static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
40
44
  return (cmark_node_type)node->type;
41
45
  }
@@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
47
51
  node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
48
52
  }
49
53
 
54
+ static void S_set_last_line_checked(cmark_node *node) {
55
+ node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
56
+ }
57
+
50
58
  static CMARK_INLINE bool S_is_line_end_char(char c) {
51
59
  return (c == '\n' || c == '\r');
52
60
  }
@@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
121
129
  parser->root = document;
122
130
  parser->current = document;
123
131
 
124
- parser->last_buffer_ended_with_cr = false;
125
-
126
132
  parser->syntax_extensions = saved_exts;
127
133
  parser->inline_syntax_extensions = saved_inline_exts;
128
134
  parser->options = saved_options;
@@ -234,19 +240,35 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
234
240
 
235
241
  // Check to see if a node ends with a blank line, descending
236
242
  // if needed into lists and sublists.
237
- static bool ends_with_blank_line(cmark_node *node) {
238
- cmark_node *cur = node;
239
- while (cur != NULL) {
240
- if (S_last_line_blank(cur)) {
241
- return true;
242
- }
243
- if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
244
- cur = cur->last_child;
245
- } else {
246
- cur = NULL;
247
- }
243
+ static bool S_ends_with_blank_line(cmark_node *node) {
244
+ if (S_last_line_checked(node)) {
245
+ return(S_last_line_blank(node));
246
+ } else if ((S_type(node) == CMARK_NODE_LIST ||
247
+ S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
248
+ S_set_last_line_checked(node);
249
+ return(S_ends_with_blank_line(node->last_child));
250
+ } else {
251
+ S_set_last_line_checked(node);
252
+ return (S_last_line_blank(node));
248
253
  }
249
- return false;
254
+ }
255
+
256
+ // returns true if content remains after link defs are resolved.
257
+ static bool resolve_reference_link_definitions(
258
+ cmark_parser *parser,
259
+ cmark_node *b) {
260
+ bufsize_t pos;
261
+ cmark_strbuf *node_content = &b->content;
262
+ cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
263
+ while (chunk.len && chunk.data[0] == '[' &&
264
+ (pos = cmark_parse_reference_inline(parser->mem, &chunk,
265
+ parser->refmap))) {
266
+
267
+ chunk.data += pos;
268
+ chunk.len -= pos;
269
+ }
270
+ cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
271
+ return !is_blank(&b->content, 0);
250
272
  }
251
273
 
252
274
  static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
@@ -254,6 +276,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
254
276
  cmark_node *item;
255
277
  cmark_node *subitem;
256
278
  cmark_node *parent;
279
+ bool has_content;
257
280
 
258
281
  parent = b->parent;
259
282
  assert(b->flags &
@@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
283
306
  switch (S_type(b)) {
284
307
  case CMARK_NODE_PARAGRAPH:
285
308
  {
286
- cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
287
- while (chunk.len && chunk.data[0] == '[' &&
288
- (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
289
-
290
- chunk.data += pos;
291
- chunk.len -= pos;
292
- }
293
- cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
294
- if (is_blank(node_content, 0)) {
309
+ has_content = resolve_reference_link_definitions(parser, b);
310
+ if (!has_content) {
295
311
  // remove blank node (former reference def)
296
312
  cmark_node_free(b);
297
313
  }
@@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
343
359
  // spaces between them:
344
360
  subitem = item->first_child;
345
361
  while (subitem) {
346
- if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
362
+ if ((item->next || subitem->next) &&
363
+ S_ends_with_blank_line(subitem)) {
347
364
  b->as.list.tight = false;
348
365
  break;
349
366
  }
@@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
748
765
  }
749
766
  }
750
767
 
768
+ // Check for thematic break. On failure, return 0 and update
769
+ // thematic_break_kill_pos with the index at which the
770
+ // parse fails. On success, return length of match.
771
+ // "...three or more hyphens, asterisks,
772
+ // or underscores on a line by themselves. If you wish, you may use
773
+ // spaces between the hyphens or asterisks."
774
+ static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
775
+ bufsize_t offset) {
776
+ bufsize_t i;
777
+ char c;
778
+ char nextc = '\0';
779
+ int count;
780
+ i = offset;
781
+ c = peek_at(input, i);
782
+ if (!(c == '*' || c == '_' || c == '-')) {
783
+ parser->thematic_break_kill_pos = i;
784
+ return 0;
785
+ }
786
+ count = 1;
787
+ while ((nextc = peek_at(input, ++i))) {
788
+ if (nextc == c) {
789
+ count++;
790
+ } else if (nextc != ' ' && nextc != '\t') {
791
+ break;
792
+ }
793
+ }
794
+ if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
795
+ return (i - offset) + 1;
796
+ } else {
797
+ parser->thematic_break_kill_pos = i;
798
+ return 0;
799
+ }
800
+ }
801
+
751
802
  // Find first nonspace character from current offset, setting
752
803
  // parser->first_nonspace, parser->first_nonspace_column,
753
804
  // parser->indent, and parser->blank. Does not advance parser->offset.
@@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1040
1091
  bufsize_t matched = 0;
1041
1092
  int lev = 0;
1042
1093
  bool save_partially_consumed_tab;
1094
+ bool has_content;
1043
1095
  int save_offset;
1044
1096
  int save_column;
1045
1097
 
@@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1112
1164
  } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
1113
1165
  (lev =
1114
1166
  scan_setext_heading_line(input, parser->first_nonspace))) {
1115
- (*container)->type = (uint16_t)CMARK_NODE_HEADING;
1116
- (*container)->as.heading.level = lev;
1117
- (*container)->as.heading.setext = true;
1118
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1167
+ // finalize paragraph, resolving reference links
1168
+ has_content = resolve_reference_link_definitions(parser, *container);
1169
+
1170
+ if (has_content) {
1171
+
1172
+ (*container)->type = (uint16_t)CMARK_NODE_HEADING;
1173
+ (*container)->as.heading.level = lev;
1174
+ (*container)->as.heading.setext = true;
1175
+ S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1176
+ }
1119
1177
  } else if (!indented &&
1120
1178
  !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
1121
- (matched = scan_thematic_break(input, parser->first_nonspace))) {
1179
+ (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
1180
+ (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
1122
1181
  // it's only now that we know the line is not part of a setext heading:
1123
1182
  *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
1124
1183
  parser->first_nonspace + 1);
@@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1377
1436
  parser->column = 0;
1378
1437
  parser->first_nonspace = 0;
1379
1438
  parser->first_nonspace_column = 0;
1439
+ parser->thematic_break_kill_pos = 0;
1380
1440
  parser->indent = 0;
1381
1441
  parser->blank = false;
1382
1442
  parser->partially_consumed_tab = false;
@@ -10,7 +10,6 @@
10
10
  #include "config.h"
11
11
  #include "cmark_ctype.h"
12
12
  #include "buffer.h"
13
- #include "memory.h"
14
13
 
15
14
  /* Used as default value for cmark_strbuf->ptr so that people can always
16
15
  * assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
@@ -6,7 +6,6 @@
6
6
  #include <assert.h>
7
7
  #include "cmark-gfm.h"
8
8
  #include "buffer.h"
9
- #include "memory.h"
10
9
  #include "cmark_ctype.h"
11
10
 
12
11
  #define CMARK_CHUNK_EMPTY \
@@ -690,6 +690,20 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
690
690
  */
691
691
  #define CMARK_OPT_HARDBREAKS (1 << 2)
692
692
 
693
+ /** `CMARK_OPT_SAFE` is defined here for API compatibility,
694
+ but it no longer has any effect. "Safe" mode is now the default:
695
+ set `CMARK_OPT_UNSAFE` to disable it.
696
+ */
697
+ #define CMARK_OPT_SAFE (1 << 3)
698
+
699
+ /** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
700
+ * `file:`, and `data:`, except for `image/png`, `image/gif`,
701
+ * `image/jpeg`, or `image/webp` mime types). By default,
702
+ * raw HTML is replaced by a placeholder HTML comment. Unsafe
703
+ * links are replaced by empty strings.
704
+ */
705
+ #define CMARK_OPT_UNSAFE (1 << 17)
706
+
693
707
  /** Render `softbreak` elements as spaces.
694
708
  */
695
709
  #define CMARK_OPT_NOBREAKS (1 << 4)
@@ -738,14 +752,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
738
752
  */
739
753
  #define CMARK_OPT_FULL_INFO_STRING (1 << 16)
740
754
 
741
- /** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
742
- * all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
743
- * or `image/webp` mime types are allowed. Without this option, raw HTML is
744
- * replaced by a placeholder HTML comment, and unsafe links are replaced by
745
- * empty strings.
746
- */
747
- #define CMARK_OPT_UNSAFE (1 << 17)
748
-
749
755
  /**
750
756
  * ## Version information
751
757
  */
@@ -1,7 +1,7 @@
1
1
  #ifndef CMARK_GFM_VERSION_H
2
2
  #define CMARK_GFM_VERSION_H
3
3
 
4
- #define CMARK_GFM_VERSION ((0 << 24) | (28 << 16) | (3 << 8) | 20)
5
- #define CMARK_GFM_VERSION_STRING "0.28.3.gfm.20"
4
+ #define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 0)
5
+ #define CMARK_GFM_VERSION_STRING "0.29.0.gfm.0"
6
6
 
7
7
  #endif
@@ -34,7 +34,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
34
34
  needs_escaping =
35
35
  c < 0x80 && escape != LITERAL &&
36
36
  ((escape == NORMAL &&
37
- (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
37
+ (c < 0x20 ||
38
+ c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38
39
  c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
39
40
  (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
40
41
  (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
@@ -50,14 +51,18 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
50
51
  (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
51
52
 
52
53
  if (needs_escaping) {
53
- if (cmark_isspace((char)c)) {
54
+ if (escape == URL && cmark_isspace((char)c)) {
54
55
  // use percent encoding for spaces
55
- snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
56
+ snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
56
57
  cmark_strbuf_puts(renderer->buffer, encoded);
57
58
  renderer->column += 3;
58
- } else {
59
+ } else if (cmark_ispunct((char)c)) {
59
60
  cmark_render_ascii(renderer, "\\");
60
61
  cmark_render_code_point(renderer, c);
62
+ } else { // render as entity
63
+ snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
64
+ cmark_strbuf_puts(renderer->buffer, encoded);
65
+ renderer->column += (int)strlen(encoded);
61
66
  }
62
67
  } else {
63
68
  cmark_render_code_point(renderer, c);
@@ -354,8 +354,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
354
354
  case CMARK_NODE_LINK:
355
355
  if (entering) {
356
356
  cmark_strbuf_puts(html, "<a href=\"");
357
- if (!(!(options & CMARK_OPT_UNSAFE) &&
358
- scan_dangerous_url(&node->as.link.url, 0))) {
357
+ if ((options & CMARK_OPT_UNSAFE) ||
358
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
359
359
  houdini_escape_href(html, node->as.link.url.data,
360
360
  node->as.link.url.len);
361
361
  }
@@ -372,8 +372,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
372
372
  case CMARK_NODE_IMAGE:
373
373
  if (entering) {
374
374
  cmark_strbuf_puts(html, "<img src=\"");
375
- if (!(!(options & CMARK_OPT_UNSAFE) &&
376
- scan_dangerous_url(&node->as.link.url, 0))) {
375
+ if ((options & CMARK_OPT_UNSAFE) ||
376
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
377
377
  houdini_escape_href(html, node->as.link.url.data,
378
378
  node->as.link.url.len);
379
379
  }
@@ -322,9 +322,11 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
322
322
  }
323
323
 
324
324
  // Destructively modify string, converting newlines to
325
- // spaces, then removing a single leading + trailing space.
325
+ // spaces, then removing a single leading + trailing space,
326
+ // unless the code span consists entirely of space characters.
326
327
  static void S_normalize_code(cmark_strbuf *s) {
327
328
  bufsize_t r, w;
329
+ bool contains_nonspace = false;
328
330
 
329
331
  for (r = 0, w = 0; r < s->size; ++r) {
330
332
  switch (s->ptr[r]) {
@@ -339,10 +341,14 @@ static void S_normalize_code(cmark_strbuf *s) {
339
341
  default:
340
342
  s->ptr[w++] = s->ptr[r];
341
343
  }
344
+ if (s->ptr[r] != ' ') {
345
+ contains_nonspace = true;
346
+ }
342
347
  }
343
348
 
344
349
  // begins and ends with space?
345
- if (s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
350
+ if (contains_nonspace &&
351
+ s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
346
352
  cmark_strbuf_drop(s, 1);
347
353
  cmark_strbuf_truncate(s, w - 2);
348
354
  } else {
@@ -630,7 +636,6 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
630
636
  delimiter *opener;
631
637
  delimiter *old_closer;
632
638
  bool opener_found;
633
- bool odd_match;
634
639
  delimiter *openers_bottom[3][128];
635
640
  int i;
636
641
 
@@ -655,15 +660,14 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
655
660
  // Now look backwards for first matching opener:
656
661
  opener = closer->previous;
657
662
  opener_found = false;
658
- odd_match = false;
659
663
  while (opener != NULL && opener != stack_bottom &&
660
664
  opener != openers_bottom[closer->length % 3][closer->delim_char]) {
661
665
  if (opener->can_open && opener->delim_char == closer->delim_char) {
662
666
  // interior closer of size 2 can't match opener of size 1
663
667
  // or of size 1 can't match 2
664
- odd_match = (closer->can_open || opener->can_close) &&
665
- ((opener->length + closer->length) % 3 == 0);
666
- if (!odd_match) {
668
+ if (!(closer->can_open || opener->can_close) ||
669
+ closer->length % 3 == 0 ||
670
+ (opener->length + closer->length) % 3 != 0) {
667
671
  opener_found = true;
668
672
  break;
669
673
  }
@@ -969,17 +973,21 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
969
973
  else if (input->data[i] == '(') {
970
974
  ++nb_p;
971
975
  ++i;
972
- if (nb_p > 32)
973
- return -1;
976
+ if (nb_p > 32)
977
+ return -1;
974
978
  } else if (input->data[i] == ')') {
975
979
  if (nb_p == 0)
976
980
  break;
977
981
  --nb_p;
978
982
  ++i;
979
- } else if (cmark_isspace(input->data[i]))
983
+ } else if (cmark_isspace(input->data[i])) {
984
+ if (i == offset) {
985
+ return -1;
986
+ }
980
987
  break;
981
- else
988
+ } else {
982
989
  ++i;
990
+ }
983
991
  }
984
992
 
985
993
  if (i >= input->len)
@@ -1005,7 +1013,7 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
1005
1013
  } else if (input->data[i] == '\\')
1006
1014
  i += 2;
1007
1015
  else if (input->data[i] == '\n' || input->data[i] == '<')
1008
- return manual_scan_link_url_2(input, offset, output);
1016
+ return -1;
1009
1017
  else
1010
1018
  ++i;
1011
1019
  }
@@ -1432,8 +1440,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1432
1440
 
1433
1441
  // parse link url:
1434
1442
  spnl(&subj);
1435
- if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
1436
- url.len > 0) {
1443
+ if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1437
1444
  subj.pos += matchlen;
1438
1445
  } else {
1439
1446
  return 0;
@@ -6,7 +6,6 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm.h"
9
- #include "memory.h"
10
9
 
11
10
  typedef struct {
12
11
  cmark_event_type ev_type;
@@ -1,7 +1,6 @@
1
1
  #ifndef CMARK_MAP_H
2
2
  #define CMARK_MAP_H
3
3
 
4
- #include "memory.h"
5
4
  #include "chunk.h"
6
5
 
7
6
  #ifdef __cplusplus
@@ -50,6 +50,7 @@ typedef struct {
50
50
  enum cmark_node__internal_flags {
51
51
  CMARK_NODE__OPEN = (1 << 0),
52
52
  CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
53
+ CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
53
54
  };
54
55
 
55
56
  struct cmark_node {