commonmarker 0.14.15 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/commonmarker/cmark/Makefile +4 -3
  4. data/ext/commonmarker/cmark/README.md +18 -11
  5. data/ext/commonmarker/cmark/extensions/ext_scanners.c +562 -3
  6. data/ext/commonmarker/cmark/extensions/ext_scanners.h +6 -0
  7. data/ext/commonmarker/cmark/extensions/ext_scanners.re +32 -0
  8. data/ext/commonmarker/cmark/extensions/table.c +87 -215
  9. data/ext/commonmarker/cmark/man/CMakeLists.txt +2 -2
  10. data/ext/commonmarker/cmark/man/make_man_page.py +1 -1
  11. data/ext/commonmarker/cmark/man/man1/{cmark.1 → cmark-gfm.1} +10 -4
  12. data/ext/commonmarker/cmark/man/man3/cmark-gfm.3 +2 -2
  13. data/ext/commonmarker/cmark/src/buffer.c +5 -1
  14. data/ext/commonmarker/cmark/src/cmark.c +6 -2
  15. data/ext/commonmarker/cmark/src/cmark.h +1 -1
  16. data/ext/commonmarker/cmark/src/cmark_extension_api.h +0 -2
  17. data/ext/commonmarker/cmark/src/commonmark.c +1 -2
  18. data/ext/commonmarker/cmark/src/inlines.c +1 -1
  19. data/ext/commonmarker/cmark/src/libcmark-gfm.pc.in +1 -1
  20. data/ext/commonmarker/cmark/src/main.c +1 -1
  21. data/ext/commonmarker/cmark/src/render.c +15 -0
  22. data/ext/commonmarker/cmark/test/extensions.txt +24 -3
  23. data/ext/commonmarker/cmark/test/roundtrip_tests.py +1 -0
  24. data/ext/commonmarker/cmark/test/spec.txt +84 -15
  25. data/ext/commonmarker/cmark/test/spec_tests.py +9 -8
  26. data/ext/commonmarker/cmark/tools/Dockerfile +12 -0
  27. data/lib/commonmarker/node.rb +3 -0
  28. data/lib/commonmarker/node/inspect.rb +59 -0
  29. data/lib/commonmarker/version.rb +1 -1
  30. data/test/test_node.rb +8 -0
  31. data/test/test_spec.rb +1 -1
  32. metadata +4 -3
@@ -8,8 +8,14 @@ extern "C" {
8
8
  bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
9
9
  unsigned char *ptr, int len, bufsize_t offset);
10
10
  bufsize_t _scan_table_start(const unsigned char *p);
11
+ bufsize_t _scan_table_cell(const unsigned char *p);
12
+ bufsize_t _scan_table_cell_end(const unsigned char *p);
13
+ bufsize_t _scan_table_row_end(const unsigned char *p);
11
14
 
12
15
  #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
16
+ #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
17
+ #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
18
+ #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
13
19
 
14
20
  #ifdef __cplusplus
15
21
  }
@@ -27,8 +27,10 @@ bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned cha
27
27
 
28
28
  spacechar = [ \t\v\f];
29
29
  newline = [\r]?[\n];
30
+ escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-];
30
31
 
31
32
  table_marker = (spacechar*[:]?[-]+[:]?spacechar*);
33
+ table_cell = (escaped_char|[^|\r\n])*;
32
34
  */
33
35
 
34
36
  bufsize_t _scan_table_start(const unsigned char *p)
@@ -40,3 +42,33 @@ bufsize_t _scan_table_start(const unsigned char *p)
40
42
  .? { return 0; }
41
43
  */
42
44
  }
45
+
46
+ bufsize_t _scan_table_cell(const unsigned char *p)
47
+ {
48
+ const unsigned char *marker = NULL;
49
+ const unsigned char *start = p;
50
+ /*!re2c
51
+ table_cell { return (bufsize_t)(p - start); }
52
+ .? { return 0; }
53
+ */
54
+ }
55
+
56
+ bufsize_t _scan_table_cell_end(const unsigned char *p)
57
+ {
58
+ const unsigned char *marker = NULL;
59
+ const unsigned char *start = p;
60
+ /*!re2c
61
+ [|] spacechar* newline? { return (bufsize_t)(p - start); }
62
+ .? { return 0; }
63
+ */
64
+ }
65
+
66
+ bufsize_t _scan_table_row_end(const unsigned char *p)
67
+ {
68
+ const unsigned char *marker = NULL;
69
+ const unsigned char *start = p;
70
+ /*!re2c
71
+ spacechar* newline { return (bufsize_t)(p - start); }
72
+ .? { return 0; }
73
+ */
74
+ }
@@ -24,12 +24,11 @@ typedef struct {
24
24
 
25
25
  typedef struct {
26
26
  bool is_header;
27
- unsigned char *raw_content;
28
- size_t raw_content_len;
29
27
  } node_table_row;
30
28
 
31
29
  static void free_table_cell(cmark_mem *mem, void *data) {
32
- cmark_node_free((cmark_node *)data);
30
+ cmark_strbuf_free((cmark_strbuf *)data);
31
+ mem->free(data);
33
32
  }
34
33
 
35
34
  static void free_table_row(cmark_mem *mem, table_row *row) {
@@ -48,9 +47,7 @@ static void free_node_table(cmark_mem *mem, void *ptr) {
48
47
  }
49
48
 
50
49
  static void free_node_table_row(cmark_mem *mem, void *ptr) {
51
- node_table_row *ntr = (node_table_row *)ptr;
52
- mem->free(ntr->raw_content);
53
- mem->free(ntr);
50
+ mem->free(ptr);
54
51
  }
55
52
 
56
53
  static int get_n_table_columns(cmark_node *node) {
@@ -83,159 +80,64 @@ static int set_table_alignments(cmark_node *node, uint8_t *alignments) {
83
80
  return 1;
84
81
  }
85
82
 
86
- static void maybe_consume_pipe(cmark_node **n, int *offset) {
87
- if (*n && (*n)->type == CMARK_NODE_TEXT && *offset < (*n)->as.literal.len &&
88
- (*n)->as.literal.data[*offset] == '|')
89
- ++*offset;
90
- }
91
-
92
- static int find_unescaped_pipe(const cmark_chunk *chunk, int offset) {
93
- bool escaping = false;
94
- for (; offset < chunk->len; ++offset) {
95
- if (escaping)
96
- escaping = false;
97
- else if (chunk->data[offset] == '\\')
98
- escaping = true;
99
- else if (chunk->data[offset] == '|')
100
- return offset;
101
- }
102
- return -1;
103
- }
104
-
105
- static cmark_node *consume_until_pipe_or_eol(cmark_syntax_extension *self,
106
- cmark_parser *parser,
107
- cmark_node **n, int *offset) {
108
- cmark_node *result =
109
- cmark_node_new_with_mem(CMARK_NODE_TABLE_CELL, parser->mem);
110
- cmark_node_set_syntax_extension(result, self);
111
- bool was_escape = false;
112
-
113
- while (*n) {
114
- cmark_node *node = *n;
115
-
116
- if (node->type == CMARK_NODE_TEXT) {
117
- cmark_node *child = cmark_parser_add_child(
118
- parser, result, CMARK_NODE_TEXT, cmark_parser_get_offset(parser));
119
-
120
- if (was_escape) {
121
- child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1);
122
- cmark_node_own(child);
123
- if (child->as.literal.data[0] == '|')
124
- cmark_node_free(child->prev);
125
- ++*offset;
126
- if (*offset >= node->as.literal.len) {
127
- *offset = 0;
128
- *n = node->next;
129
- }
130
- was_escape = false;
131
- continue;
132
- }
133
-
134
- const char *lit = (char *)node->as.literal.data + *offset;
135
- const int lit_len = node->as.literal.len - *offset;
136
-
137
- if (lit_len == 1 && lit[0] == '\\' &&
138
- node->next &&
139
- node->next->type == CMARK_NODE_TEXT) {
140
- child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, 1);
141
- cmark_node_own(child);
142
- was_escape = true;
143
- *n = node->next;
144
- continue;
145
- }
146
-
147
- int pipe = find_unescaped_pipe(&node->as.literal, *offset);
148
- if (pipe == -1) {
149
- child->as.literal = cmark_chunk_dup(&node->as.literal, *offset,
150
- node->as.literal.len - *offset);
151
- cmark_node_own(child);
152
- } else {
153
- pipe -= *offset;
154
-
155
- child->as.literal = cmark_chunk_dup(&node->as.literal, *offset, pipe);
156
- cmark_node_own(child);
157
-
158
- *offset += pipe + 1;
159
- if (*offset >= node->as.literal.len) {
160
- *offset = 0;
161
- *n = node->next;
162
- }
163
- break;
164
- }
165
-
166
- *n = node->next;
167
- *offset = 0;
168
- } else {
169
- cmark_node *next = node->next;
170
- cmark_node_append_child(result, node);
171
- cmark_node_own(node);
172
- *n = next;
173
- *offset = 0;
174
- }
175
- }
83
+ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len)
84
+ {
85
+ cmark_strbuf *res = (cmark_strbuf *)mem->calloc(1, sizeof(cmark_strbuf));
86
+ bufsize_t r, w;
176
87
 
177
- if (!result->first_child) {
178
- cmark_node_free(result);
179
- return NULL;
180
- }
88
+ cmark_strbuf_init(mem, res, len + 1);
89
+ cmark_strbuf_put(res, string, len);
90
+ cmark_strbuf_putc(res, '\0');
181
91
 
182
- cmark_consolidate_text_nodes(result);
92
+ for (r = 0, w = 0; r < len; ++r) {
93
+ if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|')
94
+ r++;
183
95
 
184
- if (result->first_child->type == CMARK_NODE_TEXT) {
185
- cmark_chunk c = cmark_chunk_ltrim_new(parser->mem, &result->first_child->as.literal);
186
- cmark_chunk_free(parser->mem, &result->first_child->as.literal);
187
- result->first_child->as.literal = c;
96
+ res->ptr[w++] = res->ptr[r];
188
97
  }
189
98
 
190
- if (result->last_child->type == CMARK_NODE_TEXT) {
191
- cmark_chunk c = cmark_chunk_rtrim_new(parser->mem, &result->last_child->as.literal);
192
- cmark_chunk_free(parser->mem, &result->last_child->as.literal);
193
- result->last_child->as.literal = c;
194
- }
99
+ cmark_strbuf_truncate(res, w);
195
100
 
196
- return result;
197
- }
198
-
199
- static int table_ispunct(char c) {
200
- return cmark_ispunct(c) && c != '|';
101
+ return res;
201
102
  }
202
103
 
203
104
  static table_row *row_from_string(cmark_syntax_extension *self,
204
105
  cmark_parser *parser, unsigned char *string,
205
106
  int len) {
206
107
  table_row *row = NULL;
207
-
208
- cmark_node *temp_container =
209
- cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem);
210
- cmark_strbuf_set(&temp_container->content, string, len);
211
-
212
- cmark_manage_extensions_special_characters(parser, true);
213
- cmark_parser_set_backslash_ispunct_func(parser, table_ispunct);
214
- cmark_parse_inlines(parser, temp_container, parser->refmap, parser->options);
215
- cmark_parser_set_backslash_ispunct_func(parser, NULL);
216
- cmark_manage_extensions_special_characters(parser, false);
217
-
218
- if (!temp_container->first_child) {
219
- cmark_node_free(temp_container);
220
- return NULL;
221
- }
108
+ bufsize_t cell_matched, pipe_matched, offset = 0;
222
109
 
223
110
  row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
224
111
  row->n_columns = 0;
225
112
  row->cells = NULL;
226
113
 
227
- cmark_node *node = temp_container->first_child;
228
- int offset = 0;
114
+ if (len > 0 && string[0] == '|')
115
+ ++offset;
229
116
 
230
- maybe_consume_pipe(&node, &offset);
231
- cmark_node *child;
232
- while ((child = consume_until_pipe_or_eol(self, parser, &node, &offset)) !=
233
- NULL) {
234
- ++row->n_columns;
235
- row->cells = cmark_llist_append(parser->mem, row->cells, child);
236
- }
117
+ do {
118
+ cell_matched = scan_table_cell(string, len, offset);
119
+ pipe_matched = scan_table_cell_end(string, len, offset + cell_matched);
120
+
121
+ if (cell_matched || pipe_matched) {
122
+ cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
123
+ cell_matched);
124
+ cmark_strbuf_trim(cell_buf);
125
+ row->n_columns += 1;
126
+ row->cells = cmark_llist_append(parser->mem, row->cells, cell_buf);
127
+ }
237
128
 
238
- cmark_node_free(temp_container);
129
+ offset += cell_matched + pipe_matched;
130
+
131
+ if (!pipe_matched) {
132
+ pipe_matched = scan_table_row_end(string, len, offset);
133
+ offset += pipe_matched;
134
+ }
135
+ } while ((cell_matched || pipe_matched) && offset < len);
136
+
137
+ if (offset != len || !row->n_columns) {
138
+ free_table_row(parser->mem, row);
139
+ row = NULL;
140
+ }
239
141
 
240
142
  return row;
241
143
  }
@@ -300,25 +202,15 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
300
202
 
301
203
  parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
302
204
 
205
+
303
206
  set_n_table_columns(parent_container, header_row->n_columns);
304
207
 
305
208
  uint8_t *alignments =
306
209
  (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t));
307
210
  cmark_llist *it = marker_row->cells;
308
211
  for (i = 0; it; it = it->next, ++i) {
309
- cmark_node *node = (cmark_node *)it->data;
310
- assert(node->type == CMARK_NODE_TABLE_CELL);
311
-
312
- cmark_strbuf strbuf;
313
- cmark_strbuf_init(parser->mem, &strbuf, 0);
314
- assert(node->first_child->type == CMARK_NODE_TEXT);
315
- assert(node->first_child == node->last_child);
316
- cmark_strbuf_put(&strbuf, node->first_child->as.literal.data, node->first_child->as.literal.len);
317
- cmark_strbuf_trim(&strbuf);
318
- char const *text = cmark_strbuf_cstr(&strbuf);
319
-
320
- bool left = text[0] == ':', right = text[strbuf.size - 1] == ':';
321
- cmark_strbuf_free(&strbuf);
212
+ cmark_strbuf *node = (cmark_strbuf *)it->data;
213
+ bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':';
322
214
 
323
215
  if (left && right)
324
216
  alignments[i] = 'c';
@@ -336,9 +228,18 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
336
228
 
337
229
  table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
338
230
  ntr->is_header = true;
339
- ntr->raw_content_len = strlen(parent_string);
340
- ntr->raw_content = (unsigned char *)malloc(ntr->raw_content_len);
341
- memcpy(ntr->raw_content, parent_string, ntr->raw_content_len);
231
+
232
+ {
233
+ cmark_llist *tmp;
234
+
235
+ for (tmp = header_row->cells; tmp; tmp = tmp->next) {
236
+ cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data;
237
+ cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
238
+ CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
239
+ cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr);
240
+ cmark_node_set_syntax_extension(header_cell, self);
241
+ }
242
+ }
342
243
 
343
244
  cmark_parser_advance_offset(
344
245
  parser, (char *)input,
@@ -354,7 +255,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
354
255
  cmark_node *parent_container,
355
256
  unsigned char *input, int len) {
356
257
  cmark_node *table_row_block;
357
- node_table_row *ntr;
258
+ table_row *row;
358
259
 
359
260
  if (cmark_parser_is_blank(parser))
360
261
  return NULL;
@@ -364,11 +265,31 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
364
265
  cmark_parser_get_offset(parser));
365
266
 
366
267
  cmark_node_set_syntax_extension(table_row_block, self);
367
- table_row_block->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
268
+ table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row));
269
+
270
+ row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser),
271
+ len - cmark_parser_get_first_nonspace(parser));
368
272
 
369
- ntr->raw_content_len = len - cmark_parser_get_first_nonspace(parser);
370
- ntr->raw_content = (unsigned char *)malloc(len);
371
- memcpy(ntr->raw_content, input + cmark_parser_get_first_nonspace(parser), ntr->raw_content_len);
273
+ {
274
+ cmark_llist *tmp;
275
+ int i, table_columns = get_n_table_columns(parent_container);
276
+
277
+ for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) {
278
+ cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data;
279
+ cmark_node *cell = cmark_parser_add_child(parser, table_row_block,
280
+ CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
281
+ cmark_node_set_string_content(cell, (char *) cell_buf->ptr);
282
+ cmark_node_set_syntax_extension(cell, self);
283
+ }
284
+
285
+ for (; i < table_columns; ++i) {
286
+ cmark_node *cell = cmark_parser_add_child(
287
+ parser, table_row_block, CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
288
+ cmark_node_set_syntax_extension(cell, self);
289
+ }
290
+ }
291
+
292
+ free_table_row(parser->mem, row);
372
293
 
373
294
  cmark_parser_advance_offset(parser, (char *)input,
374
295
  len - 1 - cmark_parser_get_offset(parser), false);
@@ -695,59 +616,11 @@ static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node
695
616
  }
696
617
 
697
618
  static int escape(cmark_syntax_extension *self, cmark_node *node, int c) {
698
- return c == '|';
699
- }
700
-
701
- static cmark_node *postprocess(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *root) {
702
- cmark_iter *iter;
703
- cmark_event_type ev;
704
- cmark_node *node;
705
- node_table_row *ntr;
706
- table_row *row;
707
-
708
- iter = cmark_iter_new(root);
709
-
710
- while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
711
- node = cmark_iter_get_node(iter);
712
- if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_TABLE_ROW) {
713
- ntr = (node_table_row *)node->as.opaque;
714
- if (!ntr->raw_content)
715
- continue;
716
- row = row_from_string(self, parser,
717
- ntr->raw_content,
718
- (int)ntr->raw_content_len);
719
- free(ntr->raw_content);
720
- ntr->raw_content = NULL;
721
- ntr->raw_content_len = 0;
722
-
723
- {
724
- cmark_llist *tmp, *next;
725
- int i;
726
- int table_columns = get_n_table_columns(node->parent);
727
-
728
- for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = next, ++i) {
729
- cmark_node *cell = (cmark_node *)tmp->data;
730
- assert(cell->type == CMARK_NODE_TABLE_CELL);
731
- cmark_node_append_child(node, cell);
732
- row->cells = next = tmp->next;
733
- parser->mem->free(tmp);
734
- }
735
-
736
- for (; i < table_columns; ++i) {
737
- cmark_node *cell =
738
- cmark_parser_add_child(parser, node, CMARK_NODE_TABLE_CELL,
739
- cmark_parser_get_offset(parser));
740
- cmark_node_set_syntax_extension(cell, self);
741
- }
742
- }
743
-
744
- free_table_row(parser->mem, row);
745
- }
746
- }
747
-
748
- cmark_iter_free(iter);
749
-
750
- return root;
619
+ return
620
+ node->type != CMARK_NODE_TABLE &&
621
+ node->type != CMARK_NODE_TABLE_ROW &&
622
+ node->type != CMARK_NODE_TABLE_CELL &&
623
+ c == '|';
751
624
  }
752
625
 
753
626
  cmark_syntax_extension *create_table_extension(void) {
@@ -764,7 +637,6 @@ cmark_syntax_extension *create_table_extension(void) {
764
637
  cmark_syntax_extension_set_html_render_func(self, html_render);
765
638
  cmark_syntax_extension_set_opaque_free_func(self, opaque_free);
766
639
  cmark_syntax_extension_set_commonmark_escape_func(self, escape);
767
- cmark_syntax_extension_set_postprocess_func(self, postprocess);
768
640
  CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0);
769
641
  CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0);
770
642
  CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0);
@@ -2,9 +2,9 @@ if (NOT MSVC)
2
2
 
3
3
  include(GNUInstallDirs)
4
4
 
5
- install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1
5
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark-gfm.1
6
6
  DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
7
7
 
8
- install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3
8
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark-gfm.3
9
9
  DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
10
10
  endif(NOT MSVC)