commonmarker 0.19.0 → 0.20.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/commonmarker/blocks.c +89 -29
- data/ext/commonmarker/buffer.c +0 -1
- data/ext/commonmarker/chunk.h +0 -1
- data/ext/commonmarker/cmark-gfm.h +14 -8
- data/ext/commonmarker/cmark-gfm_version.h +2 -2
- data/ext/commonmarker/commonmark.c +9 -4
- data/ext/commonmarker/html.c +4 -4
- data/ext/commonmarker/inlines.c +21 -14
- data/ext/commonmarker/iterator.h +0 -1
- data/ext/commonmarker/map.h +0 -1
- data/ext/commonmarker/node.h +1 -0
- data/ext/commonmarker/parser.h +2 -1
- data/ext/commonmarker/render.c +16 -14
- data/ext/commonmarker/render.h +0 -1
- data/ext/commonmarker/scanners.c +777 -951
- data/ext/commonmarker/scanners.h +0 -2
- data/lib/commonmarker/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ddd9dac2dc959a7d2c4297934501575ab3451594
|
4
|
+
data.tar.gz: f8d6defe22a3e3bcf16c20bd5d6403bdae7555e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ea4f88fe4b6c81f70ecde02d26897bfc12b84dde2275becadf85121cdf07be48796fc028537656345c64dea903371703aa233c00071c18e496aa204d3702788
|
7
|
+
data.tar.gz: '08413704c3d74d6f5148aa64dbebce1d66ab313494656d0ea0015cd99c9cbb5707398678d68cf70ad324a93820faf0dda0d1ae13efda5083e69a4a3ac7dba3bd'
|
data/ext/commonmarker/blocks.c
CHANGED
@@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
|
|
36
36
|
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
|
37
37
|
}
|
38
38
|
|
39
|
+
static bool S_last_line_checked(const cmark_node *node) {
|
40
|
+
return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
|
41
|
+
}
|
42
|
+
|
39
43
|
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
|
40
44
|
return (cmark_node_type)node->type;
|
41
45
|
}
|
@@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
|
|
47
51
|
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
|
48
52
|
}
|
49
53
|
|
54
|
+
static void S_set_last_line_checked(cmark_node *node) {
|
55
|
+
node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
|
56
|
+
}
|
57
|
+
|
50
58
|
static CMARK_INLINE bool S_is_line_end_char(char c) {
|
51
59
|
return (c == '\n' || c == '\r');
|
52
60
|
}
|
@@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
|
|
121
129
|
parser->root = document;
|
122
130
|
parser->current = document;
|
123
131
|
|
124
|
-
parser->last_buffer_ended_with_cr = false;
|
125
|
-
|
126
132
|
parser->syntax_extensions = saved_exts;
|
127
133
|
parser->inline_syntax_extensions = saved_inline_exts;
|
128
134
|
parser->options = saved_options;
|
@@ -234,19 +240,35 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
|
|
234
240
|
|
235
241
|
// Check to see if a node ends with a blank line, descending
|
236
242
|
// if needed into lists and sublists.
|
237
|
-
static bool
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
}
|
243
|
+
static bool S_ends_with_blank_line(cmark_node *node) {
|
244
|
+
if (S_last_line_checked(node)) {
|
245
|
+
return(S_last_line_blank(node));
|
246
|
+
} else if ((S_type(node) == CMARK_NODE_LIST ||
|
247
|
+
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
|
248
|
+
S_set_last_line_checked(node);
|
249
|
+
return(S_ends_with_blank_line(node->last_child));
|
250
|
+
} else {
|
251
|
+
S_set_last_line_checked(node);
|
252
|
+
return (S_last_line_blank(node));
|
248
253
|
}
|
249
|
-
|
254
|
+
}
|
255
|
+
|
256
|
+
// returns true if content remains after link defs are resolved.
|
257
|
+
static bool resolve_reference_link_definitions(
|
258
|
+
cmark_parser *parser,
|
259
|
+
cmark_node *b) {
|
260
|
+
bufsize_t pos;
|
261
|
+
cmark_strbuf *node_content = &b->content;
|
262
|
+
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
|
263
|
+
while (chunk.len && chunk.data[0] == '[' &&
|
264
|
+
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
|
265
|
+
parser->refmap))) {
|
266
|
+
|
267
|
+
chunk.data += pos;
|
268
|
+
chunk.len -= pos;
|
269
|
+
}
|
270
|
+
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
|
271
|
+
return !is_blank(&b->content, 0);
|
250
272
|
}
|
251
273
|
|
252
274
|
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
|
@@ -254,6 +276,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
|
|
254
276
|
cmark_node *item;
|
255
277
|
cmark_node *subitem;
|
256
278
|
cmark_node *parent;
|
279
|
+
bool has_content;
|
257
280
|
|
258
281
|
parent = b->parent;
|
259
282
|
assert(b->flags &
|
@@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
|
|
283
306
|
switch (S_type(b)) {
|
284
307
|
case CMARK_NODE_PARAGRAPH:
|
285
308
|
{
|
286
|
-
|
287
|
-
|
288
|
-
(pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
|
289
|
-
|
290
|
-
chunk.data += pos;
|
291
|
-
chunk.len -= pos;
|
292
|
-
}
|
293
|
-
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
|
294
|
-
if (is_blank(node_content, 0)) {
|
309
|
+
has_content = resolve_reference_link_definitions(parser, b);
|
310
|
+
if (!has_content) {
|
295
311
|
// remove blank node (former reference def)
|
296
312
|
cmark_node_free(b);
|
297
313
|
}
|
@@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
|
|
343
359
|
// spaces between them:
|
344
360
|
subitem = item->first_child;
|
345
361
|
while (subitem) {
|
346
|
-
if (
|
362
|
+
if ((item->next || subitem->next) &&
|
363
|
+
S_ends_with_blank_line(subitem)) {
|
347
364
|
b->as.list.tight = false;
|
348
365
|
break;
|
349
366
|
}
|
@@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
|
|
748
765
|
}
|
749
766
|
}
|
750
767
|
|
768
|
+
// Check for thematic break. On failure, return 0 and update
|
769
|
+
// thematic_break_kill_pos with the index at which the
|
770
|
+
// parse fails. On success, return length of match.
|
771
|
+
// "...three or more hyphens, asterisks,
|
772
|
+
// or underscores on a line by themselves. If you wish, you may use
|
773
|
+
// spaces between the hyphens or asterisks."
|
774
|
+
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
|
775
|
+
bufsize_t offset) {
|
776
|
+
bufsize_t i;
|
777
|
+
char c;
|
778
|
+
char nextc = '\0';
|
779
|
+
int count;
|
780
|
+
i = offset;
|
781
|
+
c = peek_at(input, i);
|
782
|
+
if (!(c == '*' || c == '_' || c == '-')) {
|
783
|
+
parser->thematic_break_kill_pos = i;
|
784
|
+
return 0;
|
785
|
+
}
|
786
|
+
count = 1;
|
787
|
+
while ((nextc = peek_at(input, ++i))) {
|
788
|
+
if (nextc == c) {
|
789
|
+
count++;
|
790
|
+
} else if (nextc != ' ' && nextc != '\t') {
|
791
|
+
break;
|
792
|
+
}
|
793
|
+
}
|
794
|
+
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
|
795
|
+
return (i - offset) + 1;
|
796
|
+
} else {
|
797
|
+
parser->thematic_break_kill_pos = i;
|
798
|
+
return 0;
|
799
|
+
}
|
800
|
+
}
|
801
|
+
|
751
802
|
// Find first nonspace character from current offset, setting
|
752
803
|
// parser->first_nonspace, parser->first_nonspace_column,
|
753
804
|
// parser->indent, and parser->blank. Does not advance parser->offset.
|
@@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
|
1040
1091
|
bufsize_t matched = 0;
|
1041
1092
|
int lev = 0;
|
1042
1093
|
bool save_partially_consumed_tab;
|
1094
|
+
bool has_content;
|
1043
1095
|
int save_offset;
|
1044
1096
|
int save_column;
|
1045
1097
|
|
@@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
|
1112
1164
|
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
|
1113
1165
|
(lev =
|
1114
1166
|
scan_setext_heading_line(input, parser->first_nonspace))) {
|
1115
|
-
|
1116
|
-
(*container)
|
1117
|
-
|
1118
|
-
|
1167
|
+
// finalize paragraph, resolving reference links
|
1168
|
+
has_content = resolve_reference_link_definitions(parser, *container);
|
1169
|
+
|
1170
|
+
if (has_content) {
|
1171
|
+
|
1172
|
+
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
|
1173
|
+
(*container)->as.heading.level = lev;
|
1174
|
+
(*container)->as.heading.setext = true;
|
1175
|
+
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
|
1176
|
+
}
|
1119
1177
|
} else if (!indented &&
|
1120
1178
|
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
|
1121
|
-
|
1179
|
+
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
|
1180
|
+
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
|
1122
1181
|
// it's only now that we know the line is not part of a setext heading:
|
1123
1182
|
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
|
1124
1183
|
parser->first_nonspace + 1);
|
@@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
|
|
1377
1436
|
parser->column = 0;
|
1378
1437
|
parser->first_nonspace = 0;
|
1379
1438
|
parser->first_nonspace_column = 0;
|
1439
|
+
parser->thematic_break_kill_pos = 0;
|
1380
1440
|
parser->indent = 0;
|
1381
1441
|
parser->blank = false;
|
1382
1442
|
parser->partially_consumed_tab = false;
|
data/ext/commonmarker/buffer.c
CHANGED
data/ext/commonmarker/chunk.h
CHANGED
@@ -690,6 +690,20 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
|
|
690
690
|
*/
|
691
691
|
#define CMARK_OPT_HARDBREAKS (1 << 2)
|
692
692
|
|
693
|
+
/** `CMARK_OPT_SAFE` is defined here for API compatibility,
|
694
|
+
but it no longer has any effect. "Safe" mode is now the default:
|
695
|
+
set `CMARK_OPT_UNSAFE` to disable it.
|
696
|
+
*/
|
697
|
+
#define CMARK_OPT_SAFE (1 << 3)
|
698
|
+
|
699
|
+
/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
|
700
|
+
* `file:`, and `data:`, except for `image/png`, `image/gif`,
|
701
|
+
* `image/jpeg`, or `image/webp` mime types). By default,
|
702
|
+
* raw HTML is replaced by a placeholder HTML comment. Unsafe
|
703
|
+
* links are replaced by empty strings.
|
704
|
+
*/
|
705
|
+
#define CMARK_OPT_UNSAFE (1 << 17)
|
706
|
+
|
693
707
|
/** Render `softbreak` elements as spaces.
|
694
708
|
*/
|
695
709
|
#define CMARK_OPT_NOBREAKS (1 << 4)
|
@@ -738,14 +752,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
|
|
738
752
|
*/
|
739
753
|
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)
|
740
754
|
|
741
|
-
/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
|
742
|
-
* all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
|
743
|
-
* or `image/webp` mime types are allowed. Without this option, raw HTML is
|
744
|
-
* replaced by a placeholder HTML comment, and unsafe links are replaced by
|
745
|
-
* empty strings.
|
746
|
-
*/
|
747
|
-
#define CMARK_OPT_UNSAFE (1 << 17)
|
748
|
-
|
749
755
|
/**
|
750
756
|
* ## Version information
|
751
757
|
*/
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#ifndef CMARK_GFM_VERSION_H
|
2
2
|
#define CMARK_GFM_VERSION_H
|
3
3
|
|
4
|
-
#define CMARK_GFM_VERSION ((0 << 24) | (
|
5
|
-
#define CMARK_GFM_VERSION_STRING "0.
|
4
|
+
#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 0)
|
5
|
+
#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.0"
|
6
6
|
|
7
7
|
#endif
|
@@ -34,7 +34,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
|
|
34
34
|
needs_escaping =
|
35
35
|
c < 0x80 && escape != LITERAL &&
|
36
36
|
((escape == NORMAL &&
|
37
|
-
(c
|
37
|
+
(c < 0x20 ||
|
38
|
+
c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
|
38
39
|
c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
|
39
40
|
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
|
40
41
|
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
|
@@ -50,14 +51,18 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
|
|
50
51
|
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
|
51
52
|
|
52
53
|
if (needs_escaping) {
|
53
|
-
if (cmark_isspace((char)c)) {
|
54
|
+
if (escape == URL && cmark_isspace((char)c)) {
|
54
55
|
// use percent encoding for spaces
|
55
|
-
snprintf(encoded, ENCODED_SIZE, "%%%
|
56
|
+
snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
|
56
57
|
cmark_strbuf_puts(renderer->buffer, encoded);
|
57
58
|
renderer->column += 3;
|
58
|
-
} else {
|
59
|
+
} else if (cmark_ispunct((char)c)) {
|
59
60
|
cmark_render_ascii(renderer, "\\");
|
60
61
|
cmark_render_code_point(renderer, c);
|
62
|
+
} else { // render as entity
|
63
|
+
snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
|
64
|
+
cmark_strbuf_puts(renderer->buffer, encoded);
|
65
|
+
renderer->column += (int)strlen(encoded);
|
61
66
|
}
|
62
67
|
} else {
|
63
68
|
cmark_render_code_point(renderer, c);
|
data/ext/commonmarker/html.c
CHANGED
@@ -354,8 +354,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
354
354
|
case CMARK_NODE_LINK:
|
355
355
|
if (entering) {
|
356
356
|
cmark_strbuf_puts(html, "<a href=\"");
|
357
|
-
if (
|
358
|
-
scan_dangerous_url(&node->as.link.url, 0))) {
|
357
|
+
if ((options & CMARK_OPT_UNSAFE) ||
|
358
|
+
!(scan_dangerous_url(&node->as.link.url, 0))) {
|
359
359
|
houdini_escape_href(html, node->as.link.url.data,
|
360
360
|
node->as.link.url.len);
|
361
361
|
}
|
@@ -372,8 +372,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
|
|
372
372
|
case CMARK_NODE_IMAGE:
|
373
373
|
if (entering) {
|
374
374
|
cmark_strbuf_puts(html, "<img src=\"");
|
375
|
-
if (
|
376
|
-
scan_dangerous_url(&node->as.link.url, 0))) {
|
375
|
+
if ((options & CMARK_OPT_UNSAFE) ||
|
376
|
+
!(scan_dangerous_url(&node->as.link.url, 0))) {
|
377
377
|
houdini_escape_href(html, node->as.link.url.data,
|
378
378
|
node->as.link.url.len);
|
379
379
|
}
|
data/ext/commonmarker/inlines.c
CHANGED
@@ -322,9 +322,11 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
|
|
322
322
|
}
|
323
323
|
|
324
324
|
// Destructively modify string, converting newlines to
|
325
|
-
// spaces, then removing a single leading + trailing space
|
325
|
+
// spaces, then removing a single leading + trailing space,
|
326
|
+
// unless the code span consists entirely of space characters.
|
326
327
|
static void S_normalize_code(cmark_strbuf *s) {
|
327
328
|
bufsize_t r, w;
|
329
|
+
bool contains_nonspace = false;
|
328
330
|
|
329
331
|
for (r = 0, w = 0; r < s->size; ++r) {
|
330
332
|
switch (s->ptr[r]) {
|
@@ -339,10 +341,14 @@ static void S_normalize_code(cmark_strbuf *s) {
|
|
339
341
|
default:
|
340
342
|
s->ptr[w++] = s->ptr[r];
|
341
343
|
}
|
344
|
+
if (s->ptr[r] != ' ') {
|
345
|
+
contains_nonspace = true;
|
346
|
+
}
|
342
347
|
}
|
343
348
|
|
344
349
|
// begins and ends with space?
|
345
|
-
if (
|
350
|
+
if (contains_nonspace &&
|
351
|
+
s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
|
346
352
|
cmark_strbuf_drop(s, 1);
|
347
353
|
cmark_strbuf_truncate(s, w - 2);
|
348
354
|
} else {
|
@@ -630,7 +636,6 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
|
|
630
636
|
delimiter *opener;
|
631
637
|
delimiter *old_closer;
|
632
638
|
bool opener_found;
|
633
|
-
bool odd_match;
|
634
639
|
delimiter *openers_bottom[3][128];
|
635
640
|
int i;
|
636
641
|
|
@@ -655,15 +660,14 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
|
|
655
660
|
// Now look backwards for first matching opener:
|
656
661
|
opener = closer->previous;
|
657
662
|
opener_found = false;
|
658
|
-
odd_match = false;
|
659
663
|
while (opener != NULL && opener != stack_bottom &&
|
660
664
|
opener != openers_bottom[closer->length % 3][closer->delim_char]) {
|
661
665
|
if (opener->can_open && opener->delim_char == closer->delim_char) {
|
662
666
|
// interior closer of size 2 can't match opener of size 1
|
663
667
|
// or of size 1 can't match 2
|
664
|
-
|
665
|
-
|
666
|
-
|
668
|
+
if (!(closer->can_open || opener->can_close) ||
|
669
|
+
closer->length % 3 == 0 ||
|
670
|
+
(opener->length + closer->length) % 3 != 0) {
|
667
671
|
opener_found = true;
|
668
672
|
break;
|
669
673
|
}
|
@@ -969,17 +973,21 @@ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
|
|
969
973
|
else if (input->data[i] == '(') {
|
970
974
|
++nb_p;
|
971
975
|
++i;
|
972
|
-
|
973
|
-
|
976
|
+
if (nb_p > 32)
|
977
|
+
return -1;
|
974
978
|
} else if (input->data[i] == ')') {
|
975
979
|
if (nb_p == 0)
|
976
980
|
break;
|
977
981
|
--nb_p;
|
978
982
|
++i;
|
979
|
-
} else if (cmark_isspace(input->data[i]))
|
983
|
+
} else if (cmark_isspace(input->data[i])) {
|
984
|
+
if (i == offset) {
|
985
|
+
return -1;
|
986
|
+
}
|
980
987
|
break;
|
981
|
-
else
|
988
|
+
} else {
|
982
989
|
++i;
|
990
|
+
}
|
983
991
|
}
|
984
992
|
|
985
993
|
if (i >= input->len)
|
@@ -1005,7 +1013,7 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
|
|
1005
1013
|
} else if (input->data[i] == '\\')
|
1006
1014
|
i += 2;
|
1007
1015
|
else if (input->data[i] == '\n' || input->data[i] == '<')
|
1008
|
-
return
|
1016
|
+
return -1;
|
1009
1017
|
else
|
1010
1018
|
++i;
|
1011
1019
|
}
|
@@ -1432,8 +1440,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
|
|
1432
1440
|
|
1433
1441
|
// parse link url:
|
1434
1442
|
spnl(&subj);
|
1435
|
-
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1
|
1436
|
-
url.len > 0) {
|
1443
|
+
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
|
1437
1444
|
subj.pos += matchlen;
|
1438
1445
|
} else {
|
1439
1446
|
return 0;
|
data/ext/commonmarker/iterator.h
CHANGED
data/ext/commonmarker/map.h
CHANGED