commonmarker 0.23.10 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1156 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +237 -172
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1221 -0
  9. data/ext/commonmarker/src/options.rs +220 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +92 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -191
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,1622 +0,0 @@
1
- /**
2
- * Block parsing implementation.
3
- *
4
- * For a high-level overview of the block parsing process,
5
- * see http://spec.commonmark.org/0.24/#phase-1-block-structure
6
- */
7
-
8
- #include <stdlib.h>
9
- #include <assert.h>
10
- #include <stdio.h>
11
- #include <limits.h>
12
-
13
- #include "cmark_ctype.h"
14
- #include "syntax_extension.h"
15
- #include "config.h"
16
- #include "parser.h"
17
- #include "cmark-gfm.h"
18
- #include "node.h"
19
- #include "references.h"
20
- #include "utf8.h"
21
- #include "scanners.h"
22
- #include "inlines.h"
23
- #include "houdini.h"
24
- #include "buffer.h"
25
- #include "footnotes.h"
26
-
27
- #define CODE_INDENT 4
28
- #define TAB_STOP 4
29
-
30
- /**
31
- * Very deeply nested lists can cause quadratic performance issues.
32
- * This constant is used in open_new_blocks() to limit the nesting
33
- * depth. It is unlikely that a non-contrived markdown document will
34
- * be nested this deeply.
35
- */
36
- #define MAX_LIST_DEPTH 100
37
-
38
- #ifndef MIN
39
- #define MIN(x, y) ((x < y) ? x : y)
40
- #endif
41
-
42
- #define peek_at(i, n) (i)->data[n]
43
-
44
- static bool S_last_line_blank(const cmark_node *node) {
45
- return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
46
- }
47
-
48
- static bool S_last_line_checked(const cmark_node *node) {
49
- return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
50
- }
51
-
52
- static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
53
- return (cmark_node_type)node->type;
54
- }
55
-
56
- static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
57
- if (is_blank)
58
- node->flags |= CMARK_NODE__LAST_LINE_BLANK;
59
- else
60
- node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
61
- }
62
-
63
- static void S_set_last_line_checked(cmark_node *node) {
64
- node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
65
- }
66
-
67
- static CMARK_INLINE bool S_is_line_end_char(char c) {
68
- return (c == '\n' || c == '\r');
69
- }
70
-
71
- static CMARK_INLINE bool S_is_space_or_tab(char c) {
72
- return (c == ' ' || c == '\t');
73
- }
74
-
75
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
76
- size_t len, bool eof);
77
-
78
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
79
- bufsize_t bytes);
80
-
81
- static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
82
- int start_line, int start_column) {
83
- cmark_node *e;
84
-
85
- e = (cmark_node *)mem->calloc(1, sizeof(*e));
86
- cmark_strbuf_init(mem, &e->content, 32);
87
- e->type = (uint16_t)tag;
88
- e->flags = CMARK_NODE__OPEN;
89
- e->start_line = start_line;
90
- e->start_column = start_column;
91
- e->end_line = start_line;
92
-
93
- return e;
94
- }
95
-
96
- // Create a root document node.
97
- static cmark_node *make_document(cmark_mem *mem) {
98
- cmark_node *e = make_block(mem, CMARK_NODE_DOCUMENT, 1, 1);
99
- return e;
100
- }
101
-
102
- int cmark_parser_attach_syntax_extension(cmark_parser *parser,
103
- cmark_syntax_extension *extension) {
104
- parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension);
105
- if (extension->match_inline || extension->insert_inline_from_delim) {
106
- parser->inline_syntax_extensions = cmark_llist_append(
107
- parser->mem, parser->inline_syntax_extensions, extension);
108
- }
109
-
110
- return 1;
111
- }
112
-
113
- static void cmark_parser_dispose(cmark_parser *parser) {
114
- if (parser->root)
115
- cmark_node_free(parser->root);
116
-
117
- if (parser->refmap)
118
- cmark_map_free(parser->refmap);
119
- }
120
-
121
- static void cmark_parser_reset(cmark_parser *parser) {
122
- cmark_llist *saved_exts = parser->syntax_extensions;
123
- cmark_llist *saved_inline_exts = parser->inline_syntax_extensions;
124
- int saved_options = parser->options;
125
- cmark_mem *saved_mem = parser->mem;
126
-
127
- cmark_parser_dispose(parser);
128
-
129
- memset(parser, 0, sizeof(cmark_parser));
130
- parser->mem = saved_mem;
131
-
132
- cmark_strbuf_init(parser->mem, &parser->curline, 256);
133
- cmark_strbuf_init(parser->mem, &parser->linebuf, 0);
134
-
135
- cmark_node *document = make_document(parser->mem);
136
-
137
- parser->refmap = cmark_reference_map_new(parser->mem);
138
- parser->root = document;
139
- parser->current = document;
140
-
141
- parser->syntax_extensions = saved_exts;
142
- parser->inline_syntax_extensions = saved_inline_exts;
143
- parser->options = saved_options;
144
- }
145
-
146
- cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
147
- cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser));
148
- parser->mem = mem;
149
- parser->options = options;
150
- cmark_parser_reset(parser);
151
- return parser;
152
- }
153
-
154
- cmark_parser *cmark_parser_new(int options) {
155
- extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
156
- return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
157
- }
158
-
159
- void cmark_parser_free(cmark_parser *parser) {
160
- cmark_mem *mem = parser->mem;
161
- cmark_parser_dispose(parser);
162
- cmark_strbuf_free(&parser->curline);
163
- cmark_strbuf_free(&parser->linebuf);
164
- cmark_llist_free(parser->mem, parser->syntax_extensions);
165
- cmark_llist_free(parser->mem, parser->inline_syntax_extensions);
166
- mem->free(parser);
167
- }
168
-
169
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
170
-
171
- // Returns true if line has only space characters, else false.
172
- static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
173
- while (offset < s->size) {
174
- switch (s->ptr[offset]) {
175
- case '\r':
176
- case '\n':
177
- return true;
178
- case ' ':
179
- offset++;
180
- break;
181
- case '\t':
182
- offset++;
183
- break;
184
- default:
185
- return false;
186
- }
187
- }
188
-
189
- return true;
190
- }
191
-
192
- static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) {
193
- return (block_type == CMARK_NODE_PARAGRAPH ||
194
- block_type == CMARK_NODE_HEADING ||
195
- block_type == CMARK_NODE_CODE_BLOCK);
196
- }
197
-
198
- static CMARK_INLINE bool contains_inlines(cmark_node *node) {
199
- if (node->extension && node->extension->contains_inlines_func) {
200
- return node->extension->contains_inlines_func(node->extension, node) != 0;
201
- }
202
-
203
- return (node->type == CMARK_NODE_PARAGRAPH ||
204
- node->type == CMARK_NODE_HEADING);
205
- }
206
-
207
- static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
208
- int chars_to_tab;
209
- int i;
210
- assert(node->flags & CMARK_NODE__OPEN);
211
- if (parser->partially_consumed_tab) {
212
- parser->offset += 1; // skip over tab
213
- // add space characters:
214
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
215
- for (i = 0; i < chars_to_tab; i++) {
216
- cmark_strbuf_putc(&node->content, ' ');
217
- }
218
- }
219
- cmark_strbuf_put(&node->content, ch->data + parser->offset,
220
- ch->len - parser->offset);
221
- }
222
-
223
- static void remove_trailing_blank_lines(cmark_strbuf *ln) {
224
- bufsize_t i;
225
- unsigned char c;
226
-
227
- for (i = ln->size - 1; i >= 0; --i) {
228
- c = ln->ptr[i];
229
-
230
- if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
231
- break;
232
- }
233
-
234
- if (i < 0) {
235
- cmark_strbuf_clear(ln);
236
- return;
237
- }
238
-
239
- for (; i < ln->size; ++i) {
240
- c = ln->ptr[i];
241
-
242
- if (!S_is_line_end_char(c))
243
- continue;
244
-
245
- cmark_strbuf_truncate(ln, i);
246
- break;
247
- }
248
- }
249
-
250
- // Check to see if a node ends with a blank line, descending
251
- // if needed into lists and sublists.
252
- static bool S_ends_with_blank_line(cmark_node *node) {
253
- if (S_last_line_checked(node)) {
254
- return(S_last_line_blank(node));
255
- } else if ((S_type(node) == CMARK_NODE_LIST ||
256
- S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
257
- S_set_last_line_checked(node);
258
- return(S_ends_with_blank_line(node->last_child));
259
- } else {
260
- S_set_last_line_checked(node);
261
- return (S_last_line_blank(node));
262
- }
263
- }
264
-
265
- // returns true if content remains after link defs are resolved.
266
- static bool resolve_reference_link_definitions(
267
- cmark_parser *parser,
268
- cmark_node *b) {
269
- bufsize_t pos;
270
- cmark_strbuf *node_content = &b->content;
271
- cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
272
- while (chunk.len && chunk.data[0] == '[' &&
273
- (pos = cmark_parse_reference_inline(parser->mem, &chunk,
274
- parser->refmap))) {
275
-
276
- chunk.data += pos;
277
- chunk.len -= pos;
278
- }
279
- cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
280
- return !is_blank(&b->content, 0);
281
- }
282
-
283
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
284
- bufsize_t pos;
285
- cmark_node *item;
286
- cmark_node *subitem;
287
- cmark_node *parent;
288
- bool has_content;
289
-
290
- parent = b->parent;
291
- assert(b->flags &
292
- CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
293
- b->flags &= ~CMARK_NODE__OPEN;
294
-
295
- if (parser->curline.size == 0) {
296
- // end of input - line number has not been incremented
297
- b->end_line = parser->line_number;
298
- b->end_column = parser->last_line_length;
299
- } else if (S_type(b) == CMARK_NODE_DOCUMENT ||
300
- (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
301
- (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
302
- b->end_line = parser->line_number;
303
- b->end_column = parser->curline.size;
304
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
305
- b->end_column -= 1;
306
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
307
- b->end_column -= 1;
308
- } else {
309
- b->end_line = parser->line_number - 1;
310
- b->end_column = parser->last_line_length;
311
- }
312
-
313
- cmark_strbuf *node_content = &b->content;
314
-
315
- switch (S_type(b)) {
316
- case CMARK_NODE_PARAGRAPH:
317
- {
318
- has_content = resolve_reference_link_definitions(parser, b);
319
- if (!has_content) {
320
- // remove blank node (former reference def)
321
- cmark_node_free(b);
322
- }
323
- break;
324
- }
325
-
326
- case CMARK_NODE_CODE_BLOCK:
327
- if (!b->as.code.fenced) { // indented code
328
- remove_trailing_blank_lines(node_content);
329
- cmark_strbuf_putc(node_content, '\n');
330
- } else {
331
- // first line of contents becomes info
332
- for (pos = 0; pos < node_content->size; ++pos) {
333
- if (S_is_line_end_char(node_content->ptr[pos]))
334
- break;
335
- }
336
- assert(pos < node_content->size);
337
-
338
- cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
339
- houdini_unescape_html_f(&tmp, node_content->ptr, pos);
340
- cmark_strbuf_trim(&tmp);
341
- cmark_strbuf_unescape(&tmp);
342
- b->as.code.info = cmark_chunk_buf_detach(&tmp);
343
-
344
- if (node_content->ptr[pos] == '\r')
345
- pos += 1;
346
- if (node_content->ptr[pos] == '\n')
347
- pos += 1;
348
- cmark_strbuf_drop(node_content, pos);
349
- }
350
- b->as.code.literal = cmark_chunk_buf_detach(node_content);
351
- break;
352
-
353
- case CMARK_NODE_HTML_BLOCK:
354
- b->as.literal = cmark_chunk_buf_detach(node_content);
355
- break;
356
-
357
- case CMARK_NODE_LIST: // determine tight/loose status
358
- b->as.list.tight = true; // tight by default
359
- item = b->first_child;
360
-
361
- while (item) {
362
- // check for non-final non-empty list item ending with blank line:
363
- if (S_last_line_blank(item) && item->next) {
364
- b->as.list.tight = false;
365
- break;
366
- }
367
- // recurse into children of list item, to see if there are
368
- // spaces between them:
369
- subitem = item->first_child;
370
- while (subitem) {
371
- if ((item->next || subitem->next) &&
372
- S_ends_with_blank_line(subitem)) {
373
- b->as.list.tight = false;
374
- break;
375
- }
376
- subitem = subitem->next;
377
- }
378
- if (!(b->as.list.tight)) {
379
- break;
380
- }
381
- item = item->next;
382
- }
383
-
384
- break;
385
-
386
- default:
387
- break;
388
- }
389
-
390
- return parent;
391
- }
392
-
393
- // Add a node as child of another. Return pointer to child.
394
- static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
395
- cmark_node_type block_type, int start_column) {
396
- assert(parent);
397
-
398
- // if 'parent' isn't the kind of node that can accept this child,
399
- // then back up til we hit a node that can.
400
- while (!cmark_node_can_contain_type(parent, block_type)) {
401
- parent = finalize(parser, parent);
402
- }
403
-
404
- cmark_node *child =
405
- make_block(parser->mem, block_type, parser->line_number, start_column);
406
- child->parent = parent;
407
-
408
- if (parent->last_child) {
409
- parent->last_child->next = child;
410
- child->prev = parent->last_child;
411
- } else {
412
- parent->first_child = child;
413
- child->prev = NULL;
414
- }
415
- parent->last_child = child;
416
- return child;
417
- }
418
-
419
- void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
420
- cmark_llist *tmp_ext;
421
-
422
- for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
423
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
424
- cmark_llist *tmp_char;
425
- for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
426
- unsigned char c = (unsigned char)(size_t)tmp_char->data;
427
- if (add)
428
- cmark_inlines_add_special_character(c, ext->emphasis);
429
- else
430
- cmark_inlines_remove_special_character(c, ext->emphasis);
431
- }
432
- }
433
- }
434
-
435
- // Walk through node and all children, recursively, parsing
436
- // string content into inline content where appropriate.
437
- static void process_inlines(cmark_parser *parser,
438
- cmark_map *refmap, int options) {
439
- cmark_iter *iter = cmark_iter_new(parser->root);
440
- cmark_node *cur;
441
- cmark_event_type ev_type;
442
-
443
- cmark_manage_extensions_special_characters(parser, true);
444
-
445
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
446
- cur = cmark_iter_get_node(iter);
447
- if (ev_type == CMARK_EVENT_ENTER) {
448
- if (contains_inlines(cur)) {
449
- cmark_parse_inlines(parser, cur, refmap, options);
450
- }
451
- }
452
- }
453
-
454
- cmark_manage_extensions_special_characters(parser, false);
455
-
456
- cmark_iter_free(iter);
457
- }
458
-
459
- static int sort_footnote_by_ix(const void *_a, const void *_b) {
460
- cmark_footnote *a = *(cmark_footnote **)_a;
461
- cmark_footnote *b = *(cmark_footnote **)_b;
462
- return (int)a->ix - (int)b->ix;
463
- }
464
-
465
- static void process_footnotes(cmark_parser *parser) {
466
- // * Collect definitions in a map.
467
- // * Iterate the references in the document in order, assigning indices to
468
- // definitions in the order they're seen.
469
- // * Write out the footnotes at the bottom of the document in index order.
470
-
471
- cmark_map *map = cmark_footnote_map_new(parser->mem);
472
-
473
- cmark_iter *iter = cmark_iter_new(parser->root);
474
- cmark_node *cur;
475
- cmark_event_type ev_type;
476
-
477
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
478
- cur = cmark_iter_get_node(iter);
479
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
480
- cmark_footnote_create(map, cur);
481
- }
482
- }
483
-
484
- cmark_iter_free(iter);
485
- iter = cmark_iter_new(parser->root);
486
- unsigned int ix = 0;
487
-
488
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
489
- cur = cmark_iter_get_node(iter);
490
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) {
491
- cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal);
492
- if (footnote) {
493
- if (!footnote->ix)
494
- footnote->ix = ++ix;
495
-
496
- // store a reference to this footnote reference's footnote definition
497
- // this is used by renderers when generating label ids
498
- cur->parent_footnote_def = footnote->node;
499
-
500
- // keep track of a) count of how many times this footnote def has been
501
- // referenced, and b) which reference index this footnote ref is at.
502
- // this is used by renderers when generating links and backreferences.
503
- cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
504
-
505
- char n[32];
506
- snprintf(n, sizeof(n), "%d", footnote->ix);
507
- cmark_chunk_free(parser->mem, &cur->as.literal);
508
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
509
- cmark_strbuf_puts(&buf, n);
510
-
511
- cur->as.literal = cmark_chunk_buf_detach(&buf);
512
- } else {
513
- cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text));
514
- cmark_strbuf_init(parser->mem, &text->content, 0);
515
- text->type = (uint16_t) CMARK_NODE_TEXT;
516
-
517
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
518
- cmark_strbuf_puts(&buf, "[^");
519
- cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
520
- cmark_strbuf_putc(&buf, ']');
521
-
522
- text->as.literal = cmark_chunk_buf_detach(&buf);
523
- cmark_node_insert_after(cur, text);
524
- cmark_node_free(cur);
525
- }
526
- }
527
- }
528
-
529
- cmark_iter_free(iter);
530
-
531
- if (map->sorted) {
532
- qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
533
- for (unsigned int i = 0; i < map->size; ++i) {
534
- cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
535
- if (!footnote->ix) {
536
- cmark_node_unlink(footnote->node);
537
- continue;
538
- }
539
- cmark_node_append_child(parser->root, footnote->node);
540
- footnote->node = NULL;
541
- }
542
- }
543
-
544
- cmark_unlink_footnotes_map(map);
545
- cmark_map_free(map);
546
- }
547
-
548
- // Attempts to parse a list item marker (bullet or enumerated).
549
- // On success, returns length of the marker, and populates
550
- // data with the details. On failure, returns 0.
551
- static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input,
552
- bufsize_t pos, bool interrupts_paragraph,
553
- cmark_list **dataptr) {
554
- unsigned char c;
555
- bufsize_t startpos;
556
- cmark_list *data;
557
- bufsize_t i;
558
-
559
- startpos = pos;
560
- c = peek_at(input, pos);
561
-
562
- if (c == '*' || c == '-' || c == '+') {
563
- pos++;
564
- if (!cmark_isspace(peek_at(input, pos))) {
565
- return 0;
566
- }
567
-
568
- if (interrupts_paragraph) {
569
- i = pos;
570
- // require non-blank content after list marker:
571
- while (S_is_space_or_tab(peek_at(input, i))) {
572
- i++;
573
- }
574
- if (peek_at(input, i) == '\n') {
575
- return 0;
576
- }
577
- }
578
-
579
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
580
- data->marker_offset = 0; // will be adjusted later
581
- data->list_type = CMARK_BULLET_LIST;
582
- data->bullet_char = c;
583
- data->start = 0;
584
- data->delimiter = CMARK_NO_DELIM;
585
- data->tight = false;
586
- } else if (cmark_isdigit(c)) {
587
- int start = 0;
588
- int digits = 0;
589
-
590
- do {
591
- start = (10 * start) + (peek_at(input, pos) - '0');
592
- pos++;
593
- digits++;
594
- // We limit to 9 digits to avoid overflow,
595
- // assuming max int is 2^31 - 1
596
- // This also seems to be the limit for 'start' in some browsers.
597
- } while (digits < 9 && cmark_isdigit(peek_at(input, pos)));
598
-
599
- if (interrupts_paragraph && start != 1) {
600
- return 0;
601
- }
602
- c = peek_at(input, pos);
603
- if (c == '.' || c == ')') {
604
- pos++;
605
- if (!cmark_isspace(peek_at(input, pos))) {
606
- return 0;
607
- }
608
- if (interrupts_paragraph) {
609
- // require non-blank content after list marker:
610
- i = pos;
611
- while (S_is_space_or_tab(peek_at(input, i))) {
612
- i++;
613
- }
614
- if (S_is_line_end_char(peek_at(input, i))) {
615
- return 0;
616
- }
617
- }
618
-
619
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
620
- data->marker_offset = 0; // will be adjusted later
621
- data->list_type = CMARK_ORDERED_LIST;
622
- data->bullet_char = 0;
623
- data->start = start;
624
- data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
625
- data->tight = false;
626
- } else {
627
- return 0;
628
- }
629
- } else {
630
- return 0;
631
- }
632
-
633
- *dataptr = data;
634
- return (pos - startpos);
635
- }
636
-
637
- // Return 1 if list item belongs in list, else 0.
638
- static int lists_match(cmark_list *list_data, cmark_list *item_data) {
639
- return (list_data->list_type == item_data->list_type &&
640
- list_data->delimiter == item_data->delimiter &&
641
- // list_data->marker_offset == item_data.marker_offset &&
642
- list_data->bullet_char == item_data->bullet_char);
643
- }
644
-
645
- static cmark_node *finalize_document(cmark_parser *parser) {
646
- while (parser->current != parser->root) {
647
- parser->current = finalize(parser, parser->current);
648
- }
649
-
650
- finalize(parser, parser->root);
651
-
652
- // Limit total size of extra content created from reference links to
653
- // document size to avoid superlinear growth. Always allow 100KB.
654
- if (parser->total_size > 100000)
655
- parser->refmap->max_ref_size = parser->total_size;
656
- else
657
- parser->refmap->max_ref_size = 100000;
658
-
659
- process_inlines(parser, parser->refmap, parser->options);
660
- if (parser->options & CMARK_OPT_FOOTNOTES)
661
- process_footnotes(parser);
662
-
663
- return parser->root;
664
- }
665
-
666
- cmark_node *cmark_parse_file(FILE *f, int options) {
667
- unsigned char buffer[4096];
668
- cmark_parser *parser = cmark_parser_new(options);
669
- size_t bytes;
670
- cmark_node *document;
671
-
672
- while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
673
- bool eof = bytes < sizeof(buffer);
674
- S_parser_feed(parser, buffer, bytes, eof);
675
- if (eof) {
676
- break;
677
- }
678
- }
679
-
680
- document = cmark_parser_finish(parser);
681
- cmark_parser_free(parser);
682
- return document;
683
- }
684
-
685
- cmark_node *cmark_parse_document(const char *buffer, size_t len, int options) {
686
- cmark_parser *parser = cmark_parser_new(options);
687
- cmark_node *document;
688
-
689
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
690
-
691
- document = cmark_parser_finish(parser);
692
- cmark_parser_free(parser);
693
- return document;
694
- }
695
-
696
- void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
697
- S_parser_feed(parser, (const unsigned char *)buffer, len, false);
698
- }
699
-
700
- void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) {
701
- cmark_strbuf saved_linebuf;
702
-
703
- cmark_strbuf_init(parser->mem, &saved_linebuf, 0);
704
- cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf));
705
- cmark_strbuf_clear(&parser->linebuf);
706
-
707
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
708
-
709
- cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf));
710
- cmark_strbuf_free(&saved_linebuf);
711
- }
712
-
713
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
714
- size_t len, bool eof) {
715
- const unsigned char *end = buffer + len;
716
- static const uint8_t repl[] = {239, 191, 189};
717
-
718
- if (len > UINT_MAX - parser->total_size)
719
- parser->total_size = UINT_MAX;
720
- else
721
- parser->total_size += len;
722
-
723
- if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
724
- // skip NL if last buffer ended with CR ; see #117
725
- buffer++;
726
- }
727
- parser->last_buffer_ended_with_cr = false;
728
- while (buffer < end) {
729
- const unsigned char *eol;
730
- bufsize_t chunk_len;
731
- bool process = false;
732
- for (eol = buffer; eol < end; ++eol) {
733
- if (S_is_line_end_char(*eol)) {
734
- process = true;
735
- break;
736
- }
737
- if (*eol == '\0' && eol < end) {
738
- break;
739
- }
740
- }
741
- if (eol >= end && eof) {
742
- process = true;
743
- }
744
-
745
- chunk_len = (bufsize_t)(eol - buffer);
746
- if (process) {
747
- if (parser->linebuf.size > 0) {
748
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
749
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
750
- cmark_strbuf_clear(&parser->linebuf);
751
- } else {
752
- S_process_line(parser, buffer, chunk_len);
753
- }
754
- } else {
755
- if (eol < end && *eol == '\0') {
756
- // omit NULL byte
757
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
758
- // add replacement character
759
- cmark_strbuf_put(&parser->linebuf, repl, 3);
760
- } else {
761
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
762
- }
763
- }
764
-
765
- buffer += chunk_len;
766
- if (buffer < end) {
767
- if (*buffer == '\0') {
768
- // skip over NULL
769
- buffer++;
770
- } else {
771
- // skip over line ending characters
772
- if (*buffer == '\r') {
773
- buffer++;
774
- if (buffer == end)
775
- parser->last_buffer_ended_with_cr = true;
776
- }
777
- if (buffer < end && *buffer == '\n')
778
- buffer++;
779
- }
780
- }
781
- }
782
- }
783
-
784
- static void chop_trailing_hashtags(cmark_chunk *ch) {
785
- bufsize_t n, orig_n;
786
-
787
- cmark_chunk_rtrim(ch);
788
- orig_n = n = ch->len - 1;
789
-
790
- // if string ends in space followed by #s, remove these:
791
- while (n >= 0 && peek_at(ch, n) == '#')
792
- n--;
793
-
794
- // Check for a space before the final #s:
795
- if (n != orig_n && n >= 0 && S_is_space_or_tab(peek_at(ch, n))) {
796
- ch->len = n;
797
- cmark_chunk_rtrim(ch);
798
- }
799
- }
800
-
801
- // Check for thematic break. On failure, return 0 and update
802
- // thematic_break_kill_pos with the index at which the
803
- // parse fails. On success, return length of match.
804
- // "...three or more hyphens, asterisks,
805
- // or underscores on a line by themselves. If you wish, you may use
806
- // spaces between the hyphens or asterisks."
807
- static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
808
- bufsize_t offset) {
809
- bufsize_t i;
810
- char c;
811
- char nextc = '\0';
812
- int count;
813
- i = offset;
814
- c = peek_at(input, i);
815
- if (!(c == '*' || c == '_' || c == '-')) {
816
- parser->thematic_break_kill_pos = i;
817
- return 0;
818
- }
819
- count = 1;
820
- while ((nextc = peek_at(input, ++i))) {
821
- if (nextc == c) {
822
- count++;
823
- } else if (nextc != ' ' && nextc != '\t') {
824
- break;
825
- }
826
- }
827
- if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
828
- return (i - offset) + 1;
829
- } else {
830
- parser->thematic_break_kill_pos = i;
831
- return 0;
832
- }
833
- }
834
-
835
- // Find first nonspace character from current offset, setting
836
- // parser->first_nonspace, parser->first_nonspace_column,
837
- // parser->indent, and parser->blank. Does not advance parser->offset.
838
- static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
839
- char c;
840
- int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
841
-
842
- if (parser->first_nonspace <= parser->offset) {
843
- parser->first_nonspace = parser->offset;
844
- parser->first_nonspace_column = parser->column;
845
- while ((c = peek_at(input, parser->first_nonspace))) {
846
- if (c == ' ') {
847
- parser->first_nonspace += 1;
848
- parser->first_nonspace_column += 1;
849
- chars_to_tab = chars_to_tab - 1;
850
- if (chars_to_tab == 0) {
851
- chars_to_tab = TAB_STOP;
852
- }
853
- } else if (c == '\t') {
854
- parser->first_nonspace += 1;
855
- parser->first_nonspace_column += chars_to_tab;
856
- chars_to_tab = TAB_STOP;
857
- } else {
858
- break;
859
- }
860
- }
861
- }
862
-
863
- parser->indent = parser->first_nonspace_column - parser->column;
864
- parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
865
- }
866
-
867
- // Advance parser->offset and parser->column. parser->offset is the
868
- // byte position in input; parser->column is a virtual column number
869
- // that takes into account tabs. (Multibyte characters are not taken
870
- // into account, because the Markdown line prefixes we are interested in
871
- // analyzing are entirely ASCII.) The count parameter indicates
872
- // how far to advance the offset. If columns is true, then count
873
- // indicates a number of columns; otherwise, a number of bytes.
874
- // If advancing a certain number of columns partially consumes
875
- // a tab character, parser->partially_consumed_tab is set to true.
876
- static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
877
- bufsize_t count, bool columns) {
878
- char c;
879
- int chars_to_tab;
880
- int chars_to_advance;
881
- while (count > 0 && (c = peek_at(input, parser->offset))) {
882
- if (c == '\t') {
883
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
884
- if (columns) {
885
- parser->partially_consumed_tab = chars_to_tab > count;
886
- chars_to_advance = MIN(count, chars_to_tab);
887
- parser->column += chars_to_advance;
888
- parser->offset += (parser->partially_consumed_tab ? 0 : 1);
889
- count -= chars_to_advance;
890
- } else {
891
- parser->partially_consumed_tab = false;
892
- parser->column += chars_to_tab;
893
- parser->offset += 1;
894
- count -= 1;
895
- }
896
- } else {
897
- parser->partially_consumed_tab = false;
898
- parser->offset += 1;
899
- parser->column += 1; // assume ascii; block starts are ascii
900
- count -= 1;
901
- }
902
- }
903
- }
904
-
905
- static bool S_last_child_is_open(cmark_node *container) {
906
- return container->last_child &&
907
- (container->last_child->flags & CMARK_NODE__OPEN);
908
- }
909
-
910
- static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
911
- bool res = false;
912
- bufsize_t matched = 0;
913
-
914
- matched =
915
- parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
916
- if (matched) {
917
-
918
- S_advance_offset(parser, input, parser->indent + 1, true);
919
-
920
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
921
- S_advance_offset(parser, input, 1, true);
922
- }
923
-
924
- res = true;
925
- }
926
- return res;
927
- }
928
-
929
- static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input,
930
- cmark_node *container) {
931
- if (parser->indent >= 4) {
932
- S_advance_offset(parser, input, 4, true);
933
- return true;
934
- } else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) {
935
- return true;
936
- }
937
-
938
- return false;
939
- }
940
-
941
- static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
942
- cmark_node *container) {
943
- bool res = false;
944
-
945
- if (parser->indent >=
946
- container->as.list.marker_offset + container->as.list.padding) {
947
- S_advance_offset(parser, input, container->as.list.marker_offset +
948
- container->as.list.padding,
949
- true);
950
- res = true;
951
- } else if (parser->blank && container->first_child != NULL) {
952
- // if container->first_child is NULL, then the opening line
953
- // of the list item was blank after the list marker; in this
954
- // case, we are done with the list item.
955
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
956
- false);
957
- res = true;
958
- }
959
- return res;
960
- }
961
-
962
- static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
963
- cmark_node *container,
964
- bool *should_continue) {
965
- bool res = false;
966
-
967
- if (!container->as.code.fenced) { // indented
968
- if (parser->indent >= CODE_INDENT) {
969
- S_advance_offset(parser, input, CODE_INDENT, true);
970
- res = true;
971
- } else if (parser->blank) {
972
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
973
- false);
974
- res = true;
975
- }
976
- } else { // fenced
977
- bufsize_t matched = 0;
978
-
979
- if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) ==
980
- container->as.code.fence_char)) {
981
- matched = scan_close_code_fence(input, parser->first_nonspace);
982
- }
983
-
984
- if (matched >= container->as.code.fence_length) {
985
- // closing fence - and since we're at
986
- // the end of a line, we can stop processing it:
987
- *should_continue = false;
988
- S_advance_offset(parser, input, matched, false);
989
- parser->current = finalize(parser, container);
990
- } else {
991
- // skip opt. spaces of fence parser->offset
992
- int i = container->as.code.fence_offset;
993
-
994
- while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
995
- S_advance_offset(parser, input, 1, true);
996
- i--;
997
- }
998
- res = true;
999
- }
1000
- }
1001
-
1002
- return res;
1003
- }
1004
-
1005
- static bool parse_html_block_prefix(cmark_parser *parser,
1006
- cmark_node *container) {
1007
- bool res = false;
1008
- int html_block_type = container->as.html_block_type;
1009
-
1010
- assert(html_block_type >= 1 && html_block_type <= 7);
1011
- switch (html_block_type) {
1012
- case 1:
1013
- case 2:
1014
- case 3:
1015
- case 4:
1016
- case 5:
1017
- // these types of blocks can accept blanks
1018
- res = true;
1019
- break;
1020
- case 6:
1021
- case 7:
1022
- res = !parser->blank;
1023
- break;
1024
- }
1025
-
1026
- return res;
1027
- }
1028
-
1029
- static bool parse_extension_block(cmark_parser *parser,
1030
- cmark_node *container,
1031
- cmark_chunk *input)
1032
- {
1033
- bool res = false;
1034
-
1035
- if (container->extension->last_block_matches) {
1036
- if (container->extension->last_block_matches(
1037
- container->extension, parser, input->data, input->len, container))
1038
- res = true;
1039
- }
1040
-
1041
- return res;
1042
- }
1043
-
1044
- /**
1045
- * For each containing node, try to parse the associated line start.
1046
- *
1047
- * Will not close unmatched blocks, as we may have a lazy continuation
1048
- * line -> http://spec.commonmark.org/0.24/#lazy-continuation-line
1049
- *
1050
- * Returns: The last matching node, or NULL
1051
- */
1052
- static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
1053
- bool *all_matched) {
1054
- bool should_continue = true;
1055
- *all_matched = false;
1056
- cmark_node *container = parser->root;
1057
- cmark_node_type cont_type;
1058
-
1059
- while (S_last_child_is_open(container)) {
1060
- container = container->last_child;
1061
- cont_type = S_type(container);
1062
-
1063
- S_find_first_nonspace(parser, input);
1064
-
1065
- if (container->extension) {
1066
- if (!parse_extension_block(parser, container, input))
1067
- goto done;
1068
- continue;
1069
- }
1070
-
1071
- switch (cont_type) {
1072
- case CMARK_NODE_BLOCK_QUOTE:
1073
- if (!parse_block_quote_prefix(parser, input))
1074
- goto done;
1075
- break;
1076
- case CMARK_NODE_ITEM:
1077
- if (!parse_node_item_prefix(parser, input, container))
1078
- goto done;
1079
- break;
1080
- case CMARK_NODE_CODE_BLOCK:
1081
- if (!parse_code_block_prefix(parser, input, container, &should_continue))
1082
- goto done;
1083
- break;
1084
- case CMARK_NODE_HEADING:
1085
- // a heading can never contain more than one line
1086
- goto done;
1087
- case CMARK_NODE_HTML_BLOCK:
1088
- if (!parse_html_block_prefix(parser, container))
1089
- goto done;
1090
- break;
1091
- case CMARK_NODE_PARAGRAPH:
1092
- if (parser->blank)
1093
- goto done;
1094
- break;
1095
- case CMARK_NODE_FOOTNOTE_DEFINITION:
1096
- if (!parse_footnote_definition_block_prefix(parser, input, container))
1097
- goto done;
1098
- break;
1099
- default:
1100
- break;
1101
- }
1102
- }
1103
-
1104
- *all_matched = true;
1105
-
1106
- done:
1107
- if (!*all_matched) {
1108
- container = container->parent; // back up to last matching node
1109
- }
1110
-
1111
- if (!should_continue) {
1112
- container = NULL;
1113
- }
1114
-
1115
- return container;
1116
- }
1117
-
1118
- static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1119
- cmark_chunk *input, bool all_matched) {
1120
- bool indented;
1121
- cmark_list *data = NULL;
1122
- bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
1123
- cmark_node_type cont_type = S_type(*container);
1124
- bufsize_t matched = 0;
1125
- int lev = 0;
1126
- bool save_partially_consumed_tab;
1127
- bool has_content;
1128
- int save_offset;
1129
- int save_column;
1130
- size_t depth = 0;
1131
-
1132
- while (cont_type != CMARK_NODE_CODE_BLOCK &&
1133
- cont_type != CMARK_NODE_HTML_BLOCK) {
1134
- depth++;
1135
- S_find_first_nonspace(parser, input);
1136
- indented = parser->indent >= CODE_INDENT;
1137
-
1138
- if (!indented && peek_at(input, parser->first_nonspace) == '>') {
1139
-
1140
- bufsize_t blockquote_startpos = parser->first_nonspace;
1141
-
1142
- S_advance_offset(parser, input,
1143
- parser->first_nonspace + 1 - parser->offset, false);
1144
- // optional following character
1145
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
1146
- S_advance_offset(parser, input, 1, true);
1147
- }
1148
- *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
1149
- blockquote_startpos + 1);
1150
-
1151
- } else if (!indented && (matched = scan_atx_heading_start(
1152
- input, parser->first_nonspace))) {
1153
- bufsize_t hashpos;
1154
- int level = 0;
1155
- bufsize_t heading_startpos = parser->first_nonspace;
1156
-
1157
- S_advance_offset(parser, input,
1158
- parser->first_nonspace + matched - parser->offset,
1159
- false);
1160
- *container = add_child(parser, *container, CMARK_NODE_HEADING,
1161
- heading_startpos + 1);
1162
-
1163
- hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
1164
-
1165
- while (peek_at(input, hashpos) == '#') {
1166
- level++;
1167
- hashpos++;
1168
- }
1169
-
1170
- (*container)->as.heading.level = level;
1171
- (*container)->as.heading.setext = false;
1172
- (*container)->internal_offset = matched;
1173
-
1174
- } else if (!indented && (matched = scan_open_code_fence(
1175
- input, parser->first_nonspace))) {
1176
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1177
- parser->first_nonspace + 1);
1178
- (*container)->as.code.fenced = true;
1179
- (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace);
1180
- (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched;
1181
- (*container)->as.code.fence_offset =
1182
- (int8_t)(parser->first_nonspace - parser->offset);
1183
- (*container)->as.code.info = cmark_chunk_literal("");
1184
- S_advance_offset(parser, input,
1185
- parser->first_nonspace + matched - parser->offset,
1186
- false);
1187
-
1188
- } else if (!indented && ((matched = scan_html_block_start(
1189
- input, parser->first_nonspace)) ||
1190
- (cont_type != CMARK_NODE_PARAGRAPH &&
1191
- (matched = scan_html_block_start_7(
1192
- input, parser->first_nonspace))))) {
1193
- *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK,
1194
- parser->first_nonspace + 1);
1195
- (*container)->as.html_block_type = matched;
1196
- // note, we don't adjust parser->offset because the tag is part of the
1197
- // text
1198
- } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
1199
- (lev =
1200
- scan_setext_heading_line(input, parser->first_nonspace))) {
1201
- // finalize paragraph, resolving reference links
1202
- has_content = resolve_reference_link_definitions(parser, *container);
1203
-
1204
- if (has_content) {
1205
-
1206
- (*container)->type = (uint16_t)CMARK_NODE_HEADING;
1207
- (*container)->as.heading.level = lev;
1208
- (*container)->as.heading.setext = true;
1209
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1210
- }
1211
- } else if (!indented &&
1212
- !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
1213
- (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
1214
- (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
1215
- // it's only now that we know the line is not part of a setext heading:
1216
- *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
1217
- parser->first_nonspace + 1);
1218
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1219
- } else if (!indented &&
1220
- (parser->options & CMARK_OPT_FOOTNOTES) &&
1221
- depth < MAX_LIST_DEPTH &&
1222
- (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1223
- cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1224
-
1225
- while (c.data[c.len - 1] != ']')
1226
- --c.len;
1227
- --c.len;
1228
-
1229
- cmark_chunk_to_cstr(parser->mem, &c);
1230
-
1231
- S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1232
- *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1233
- (*container)->as.literal = c;
1234
-
1235
- (*container)->internal_offset = matched;
1236
- } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1237
- parser->indent < 4 &&
1238
- depth < MAX_LIST_DEPTH &&
1239
- (matched = parse_list_marker(
1240
- parser->mem, input, parser->first_nonspace,
1241
- (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
1242
-
1243
- // Note that we can have new list items starting with >= 4
1244
- // spaces indent, as long as the list container is still open.
1245
- int i = 0;
1246
-
1247
- // compute padding:
1248
- S_advance_offset(parser, input,
1249
- parser->first_nonspace + matched - parser->offset,
1250
- false);
1251
-
1252
- save_partially_consumed_tab = parser->partially_consumed_tab;
1253
- save_offset = parser->offset;
1254
- save_column = parser->column;
1255
-
1256
- while (parser->column - save_column <= 5 &&
1257
- S_is_space_or_tab(peek_at(input, parser->offset))) {
1258
- S_advance_offset(parser, input, 1, true);
1259
- }
1260
-
1261
- i = parser->column - save_column;
1262
- if (i >= 5 || i < 1 ||
1263
- // only spaces after list marker:
1264
- S_is_line_end_char(peek_at(input, parser->offset))) {
1265
- data->padding = matched + 1;
1266
- parser->offset = save_offset;
1267
- parser->column = save_column;
1268
- parser->partially_consumed_tab = save_partially_consumed_tab;
1269
- if (i > 0) {
1270
- S_advance_offset(parser, input, 1, true);
1271
- }
1272
- } else {
1273
- data->padding = matched + i;
1274
- }
1275
-
1276
- // check container; if it's a list, see if this list item
1277
- // can continue the list; otherwise, create a list container.
1278
-
1279
- data->marker_offset = parser->indent;
1280
-
1281
- if (cont_type != CMARK_NODE_LIST ||
1282
- !lists_match(&((*container)->as.list), data)) {
1283
- *container = add_child(parser, *container, CMARK_NODE_LIST,
1284
- parser->first_nonspace + 1);
1285
-
1286
- memcpy(&((*container)->as.list), data, sizeof(*data));
1287
- }
1288
-
1289
- // add the list item
1290
- *container = add_child(parser, *container, CMARK_NODE_ITEM,
1291
- parser->first_nonspace + 1);
1292
- /* TODO: static */
1293
- memcpy(&((*container)->as.list), data, sizeof(*data));
1294
- parser->mem->free(data);
1295
- } else if (indented && !maybe_lazy && !parser->blank) {
1296
- S_advance_offset(parser, input, CODE_INDENT, true);
1297
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1298
- parser->offset + 1);
1299
- (*container)->as.code.fenced = false;
1300
- (*container)->as.code.fence_char = 0;
1301
- (*container)->as.code.fence_length = 0;
1302
- (*container)->as.code.fence_offset = 0;
1303
- (*container)->as.code.info = cmark_chunk_literal("");
1304
- } else {
1305
- cmark_llist *tmp;
1306
- cmark_node *new_container = NULL;
1307
-
1308
- for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) {
1309
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
1310
-
1311
- if (ext->try_opening_block) {
1312
- new_container = ext->try_opening_block(
1313
- ext, indented, parser, *container, input->data, input->len);
1314
-
1315
- if (new_container) {
1316
- *container = new_container;
1317
- break;
1318
- }
1319
- }
1320
- }
1321
-
1322
- if (!new_container) {
1323
- break;
1324
- }
1325
- }
1326
-
1327
- if (accepts_lines(S_type(*container))) {
1328
- // if it's a line container, it can't contain other containers
1329
- break;
1330
- }
1331
-
1332
- cont_type = S_type(*container);
1333
- maybe_lazy = false;
1334
- }
1335
- }
1336
-
1337
- static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1338
- cmark_node *last_matched_container,
1339
- cmark_chunk *input) {
1340
- cmark_node *tmp;
1341
- // what remains at parser->offset is a text line. add the text to the
1342
- // appropriate container.
1343
-
1344
- S_find_first_nonspace(parser, input);
1345
-
1346
- if (parser->blank && container->last_child)
1347
- S_set_last_line_blank(container->last_child, true);
1348
-
1349
- // block quote lines are never blank as they start with >
1350
- // and we don't count blanks in fenced code for purposes of tight/loose
1351
- // lists or breaking out of lists. we also don't set last_line_blank
1352
- // on an empty list item.
1353
- const cmark_node_type ctype = S_type(container);
1354
- const bool last_line_blank =
1355
- (parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1356
- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1357
- !(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
1358
- !(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
1359
- container->start_line == parser->line_number));
1360
-
1361
- S_set_last_line_blank(container, last_line_blank);
1362
-
1363
- tmp = container;
1364
- while (tmp->parent) {
1365
- S_set_last_line_blank(tmp->parent, false);
1366
- tmp = tmp->parent;
1367
- }
1368
-
1369
- // If the last line processed belonged to a paragraph node,
1370
- // and we didn't match all of the line prefixes for the open containers,
1371
- // and we didn't start any new containers,
1372
- // and the line isn't blank,
1373
- // then treat this as a "lazy continuation line" and add it to
1374
- // the open paragraph.
1375
- if (parser->current != last_matched_container &&
1376
- container == last_matched_container && !parser->blank &&
1377
- S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
1378
- add_line(parser->current, input, parser);
1379
- } else { // not a lazy continuation
1380
- // Finalize any blocks that were not matched and set cur to container:
1381
- while (parser->current != last_matched_container) {
1382
- parser->current = finalize(parser, parser->current);
1383
- assert(parser->current != NULL);
1384
- }
1385
-
1386
- if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
1387
- add_line(container, input, parser);
1388
- } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
1389
- add_line(container, input, parser);
1390
-
1391
- int matches_end_condition;
1392
- switch (container->as.html_block_type) {
1393
- case 1:
1394
- // </script>, </style>, </pre>
1395
- matches_end_condition =
1396
- scan_html_block_end_1(input, parser->first_nonspace);
1397
- break;
1398
- case 2:
1399
- // -->
1400
- matches_end_condition =
1401
- scan_html_block_end_2(input, parser->first_nonspace);
1402
- break;
1403
- case 3:
1404
- // ?>
1405
- matches_end_condition =
1406
- scan_html_block_end_3(input, parser->first_nonspace);
1407
- break;
1408
- case 4:
1409
- // >
1410
- matches_end_condition =
1411
- scan_html_block_end_4(input, parser->first_nonspace);
1412
- break;
1413
- case 5:
1414
- // ]]>
1415
- matches_end_condition =
1416
- scan_html_block_end_5(input, parser->first_nonspace);
1417
- break;
1418
- default:
1419
- matches_end_condition = 0;
1420
- break;
1421
- }
1422
-
1423
- if (matches_end_condition) {
1424
- container = finalize(parser, container);
1425
- assert(parser->current != NULL);
1426
- }
1427
- } else if (parser->blank) {
1428
- // ??? do nothing
1429
- } else if (accepts_lines(S_type(container))) {
1430
- if (S_type(container) == CMARK_NODE_HEADING &&
1431
- container->as.heading.setext == false) {
1432
- chop_trailing_hashtags(input);
1433
- }
1434
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1435
- false);
1436
- add_line(container, input, parser);
1437
- } else {
1438
- // create paragraph container for line
1439
- container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1440
- parser->first_nonspace + 1);
1441
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1442
- false);
1443
- add_line(container, input, parser);
1444
- }
1445
-
1446
- parser->current = container;
1447
- }
1448
- }
1449
-
1450
- /* See http://spec.commonmark.org/0.24/#phase-1-block-structure */
1451
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1452
- bufsize_t bytes) {
1453
- cmark_node *last_matched_container;
1454
- bool all_matched = true;
1455
- cmark_node *container;
1456
- cmark_chunk input;
1457
- cmark_node *current;
1458
-
1459
- cmark_strbuf_clear(&parser->curline);
1460
-
1461
- if (parser->options & CMARK_OPT_VALIDATE_UTF8)
1462
- cmark_utf8proc_check(&parser->curline, buffer, bytes);
1463
- else
1464
- cmark_strbuf_put(&parser->curline, buffer, bytes);
1465
-
1466
- bytes = parser->curline.size;
1467
-
1468
- // ensure line ends with a newline:
1469
- if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))
1470
- cmark_strbuf_putc(&parser->curline, '\n');
1471
-
1472
- parser->offset = 0;
1473
- parser->column = 0;
1474
- parser->first_nonspace = 0;
1475
- parser->first_nonspace_column = 0;
1476
- parser->thematic_break_kill_pos = 0;
1477
- parser->indent = 0;
1478
- parser->blank = false;
1479
- parser->partially_consumed_tab = false;
1480
-
1481
- input.data = parser->curline.ptr;
1482
- input.len = parser->curline.size;
1483
- input.alloc = 0;
1484
-
1485
- // Skip UTF-8 BOM.
1486
- if (parser->line_number == 0 &&
1487
- input.len >= 3 &&
1488
- memcmp(input.data, "\xef\xbb\xbf", 3) == 0)
1489
- parser->offset += 3;
1490
-
1491
- parser->line_number++;
1492
-
1493
- last_matched_container = check_open_blocks(parser, &input, &all_matched);
1494
-
1495
- if (!last_matched_container)
1496
- goto finished;
1497
-
1498
- container = last_matched_container;
1499
-
1500
- current = parser->current;
1501
-
1502
- open_new_blocks(parser, &container, &input, all_matched);
1503
-
1504
- /* parser->current might have changed if feed_reentrant was called */
1505
- if (current == parser->current)
1506
- add_text_to_container(parser, container, last_matched_container, &input);
1507
-
1508
- finished:
1509
- parser->last_line_length = input.len;
1510
- if (parser->last_line_length &&
1511
- input.data[parser->last_line_length - 1] == '\n')
1512
- parser->last_line_length -= 1;
1513
- if (parser->last_line_length &&
1514
- input.data[parser->last_line_length - 1] == '\r')
1515
- parser->last_line_length -= 1;
1516
-
1517
- cmark_strbuf_clear(&parser->curline);
1518
- }
1519
-
1520
- cmark_node *cmark_parser_finish(cmark_parser *parser) {
1521
- cmark_node *res;
1522
- cmark_llist *extensions;
1523
-
1524
- /* Parser was already finished once */
1525
- if (parser->root == NULL)
1526
- return NULL;
1527
-
1528
- if (parser->linebuf.size) {
1529
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
1530
- cmark_strbuf_clear(&parser->linebuf);
1531
- }
1532
-
1533
- finalize_document(parser);
1534
-
1535
- cmark_consolidate_text_nodes(parser->root);
1536
-
1537
- cmark_strbuf_free(&parser->curline);
1538
- cmark_strbuf_free(&parser->linebuf);
1539
-
1540
- #if CMARK_DEBUG_NODES
1541
- if (cmark_node_check(parser->root, stderr)) {
1542
- abort();
1543
- }
1544
- #endif
1545
-
1546
- for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) {
1547
- cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data;
1548
- if (ext->postprocess_func) {
1549
- cmark_node *processed = ext->postprocess_func(ext, parser, parser->root);
1550
- if (processed)
1551
- parser->root = processed;
1552
- }
1553
- }
1554
-
1555
- res = parser->root;
1556
- parser->root = NULL;
1557
-
1558
- cmark_parser_reset(parser);
1559
-
1560
- return res;
1561
- }
1562
-
1563
- int cmark_parser_get_line_number(cmark_parser *parser) {
1564
- return parser->line_number;
1565
- }
1566
-
1567
- bufsize_t cmark_parser_get_offset(cmark_parser *parser) {
1568
- return parser->offset;
1569
- }
1570
-
1571
- bufsize_t cmark_parser_get_column(cmark_parser *parser) {
1572
- return parser->column;
1573
- }
1574
-
1575
- int cmark_parser_get_first_nonspace(cmark_parser *parser) {
1576
- return parser->first_nonspace;
1577
- }
1578
-
1579
- int cmark_parser_get_first_nonspace_column(cmark_parser *parser) {
1580
- return parser->first_nonspace_column;
1581
- }
1582
-
1583
- int cmark_parser_get_indent(cmark_parser *parser) {
1584
- return parser->indent;
1585
- }
1586
-
1587
- int cmark_parser_is_blank(cmark_parser *parser) {
1588
- return parser->blank;
1589
- }
1590
-
1591
- int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) {
1592
- return parser->partially_consumed_tab;
1593
- }
1594
-
1595
- int cmark_parser_get_last_line_length(cmark_parser *parser) {
1596
- return parser->last_line_length;
1597
- }
1598
-
1599
- cmark_node *cmark_parser_add_child(cmark_parser *parser,
1600
- cmark_node *parent,
1601
- cmark_node_type block_type,
1602
- int start_column) {
1603
- return add_child(parser, parent, block_type, start_column);
1604
- }
1605
-
1606
- void cmark_parser_advance_offset(cmark_parser *parser,
1607
- const char *input,
1608
- int count,
1609
- int columns) {
1610
- cmark_chunk input_chunk = cmark_chunk_literal(input);
1611
-
1612
- S_advance_offset(parser, &input_chunk, count, columns != 0);
1613
- }
1614
-
1615
- void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
1616
- cmark_ispunct_func func) {
1617
- parser->backslash_ispunct = func;
1618
- }
1619
-
1620
- cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) {
1621
- return parser->syntax_extensions;
1622
- }