commonmarker 0.23.8 → 1.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +70 -212
  3. data/commonmarker.gemspec +34 -31
  4. data/ext/commonmarker/Cargo.toml +12 -0
  5. data/ext/commonmarker/_util.rb +102 -0
  6. data/ext/commonmarker/extconf.rb +4 -5
  7. data/ext/commonmarker/src/comrak_options.rs +107 -0
  8. data/ext/commonmarker/src/lib.rs +27 -0
  9. data/lib/commonmarker/config.rb +57 -38
  10. data/lib/commonmarker/extension.rb +14 -0
  11. data/lib/commonmarker/renderer.rb +1 -127
  12. data/lib/commonmarker/version.rb +2 -2
  13. data/lib/commonmarker.rb +14 -29
  14. metadata +37 -181
  15. data/Rakefile +0 -109
  16. data/bin/commonmarker +0 -118
  17. data/ext/commonmarker/arena.c +0 -104
  18. data/ext/commonmarker/autolink.c +0 -508
  19. data/ext/commonmarker/autolink.h +0 -8
  20. data/ext/commonmarker/blocks.c +0 -1610
  21. data/ext/commonmarker/buffer.c +0 -278
  22. data/ext/commonmarker/buffer.h +0 -116
  23. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  24. data/ext/commonmarker/chunk.h +0 -135
  25. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  26. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  27. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  28. data/ext/commonmarker/cmark-gfm.h +0 -822
  29. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  30. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  31. data/ext/commonmarker/cmark.c +0 -55
  32. data/ext/commonmarker/cmark_ctype.c +0 -44
  33. data/ext/commonmarker/cmark_ctype.h +0 -33
  34. data/ext/commonmarker/commonmark.c +0 -529
  35. data/ext/commonmarker/commonmarker.c +0 -1308
  36. data/ext/commonmarker/commonmarker.h +0 -16
  37. data/ext/commonmarker/config.h +0 -76
  38. data/ext/commonmarker/core-extensions.c +0 -27
  39. data/ext/commonmarker/entities.inc +0 -2138
  40. data/ext/commonmarker/ext_scanners.c +0 -879
  41. data/ext/commonmarker/ext_scanners.h +0 -24
  42. data/ext/commonmarker/footnotes.c +0 -63
  43. data/ext/commonmarker/footnotes.h +0 -27
  44. data/ext/commonmarker/houdini.h +0 -57
  45. data/ext/commonmarker/houdini_href_e.c +0 -100
  46. data/ext/commonmarker/houdini_html_e.c +0 -66
  47. data/ext/commonmarker/houdini_html_u.c +0 -149
  48. data/ext/commonmarker/html.c +0 -500
  49. data/ext/commonmarker/html.h +0 -27
  50. data/ext/commonmarker/inlines.c +0 -1788
  51. data/ext/commonmarker/inlines.h +0 -29
  52. data/ext/commonmarker/iterator.c +0 -159
  53. data/ext/commonmarker/iterator.h +0 -26
  54. data/ext/commonmarker/latex.c +0 -466
  55. data/ext/commonmarker/linked_list.c +0 -37
  56. data/ext/commonmarker/man.c +0 -278
  57. data/ext/commonmarker/map.c +0 -129
  58. data/ext/commonmarker/map.h +0 -44
  59. data/ext/commonmarker/node.c +0 -1009
  60. data/ext/commonmarker/node.h +0 -151
  61. data/ext/commonmarker/parser.h +0 -59
  62. data/ext/commonmarker/plaintext.c +0 -235
  63. data/ext/commonmarker/plugin.c +0 -36
  64. data/ext/commonmarker/plugin.h +0 -34
  65. data/ext/commonmarker/references.c +0 -43
  66. data/ext/commonmarker/references.h +0 -26
  67. data/ext/commonmarker/registry.c +0 -63
  68. data/ext/commonmarker/registry.h +0 -24
  69. data/ext/commonmarker/render.c +0 -205
  70. data/ext/commonmarker/render.h +0 -62
  71. data/ext/commonmarker/scanners.c +0 -14056
  72. data/ext/commonmarker/scanners.h +0 -70
  73. data/ext/commonmarker/scanners.re +0 -341
  74. data/ext/commonmarker/strikethrough.c +0 -167
  75. data/ext/commonmarker/strikethrough.h +0 -9
  76. data/ext/commonmarker/syntax_extension.c +0 -149
  77. data/ext/commonmarker/syntax_extension.h +0 -34
  78. data/ext/commonmarker/table.c +0 -872
  79. data/ext/commonmarker/table.h +0 -12
  80. data/ext/commonmarker/tagfilter.c +0 -60
  81. data/ext/commonmarker/tagfilter.h +0 -8
  82. data/ext/commonmarker/tasklist.c +0 -156
  83. data/ext/commonmarker/tasklist.h +0 -8
  84. data/ext/commonmarker/utf8.c +0 -317
  85. data/ext/commonmarker/utf8.h +0 -35
  86. data/ext/commonmarker/xml.c +0 -181
  87. data/lib/commonmarker/node/inspect.rb +0 -47
  88. data/lib/commonmarker/node.rb +0 -83
  89. data/lib/commonmarker/renderer/html_renderer.rb +0 -252
@@ -1,1610 +0,0 @@
1
- /**
2
- * Block parsing implementation.
3
- *
4
- * For a high-level overview of the block parsing process,
5
- * see http://spec.commonmark.org/0.24/#phase-1-block-structure
6
- */
7
-
8
- #include <stdlib.h>
9
- #include <assert.h>
10
- #include <stdio.h>
11
- #include <limits.h>
12
-
13
- #include "cmark_ctype.h"
14
- #include "syntax_extension.h"
15
- #include "config.h"
16
- #include "parser.h"
17
- #include "cmark-gfm.h"
18
- #include "node.h"
19
- #include "references.h"
20
- #include "utf8.h"
21
- #include "scanners.h"
22
- #include "inlines.h"
23
- #include "houdini.h"
24
- #include "buffer.h"
25
- #include "footnotes.h"
26
-
27
- #define CODE_INDENT 4
28
- #define TAB_STOP 4
29
-
30
- #ifndef MIN
31
- #define MIN(x, y) ((x < y) ? x : y)
32
- #endif
33
-
34
- #define peek_at(i, n) (i)->data[n]
35
-
36
- static bool S_last_line_blank(const cmark_node *node) {
37
- return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
38
- }
39
-
40
- static bool S_last_line_checked(const cmark_node *node) {
41
- return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
42
- }
43
-
44
- static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
45
- return (cmark_node_type)node->type;
46
- }
47
-
48
- static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
49
- if (is_blank)
50
- node->flags |= CMARK_NODE__LAST_LINE_BLANK;
51
- else
52
- node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
53
- }
54
-
55
- static void S_set_last_line_checked(cmark_node *node) {
56
- node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
57
- }
58
-
59
- static CMARK_INLINE bool S_is_line_end_char(char c) {
60
- return (c == '\n' || c == '\r');
61
- }
62
-
63
- static CMARK_INLINE bool S_is_space_or_tab(char c) {
64
- return (c == ' ' || c == '\t');
65
- }
66
-
67
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
68
- size_t len, bool eof);
69
-
70
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
71
- bufsize_t bytes);
72
-
73
- static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
74
- int start_line, int start_column) {
75
- cmark_node *e;
76
-
77
- e = (cmark_node *)mem->calloc(1, sizeof(*e));
78
- cmark_strbuf_init(mem, &e->content, 32);
79
- e->type = (uint16_t)tag;
80
- e->flags = CMARK_NODE__OPEN;
81
- e->start_line = start_line;
82
- e->start_column = start_column;
83
- e->end_line = start_line;
84
-
85
- return e;
86
- }
87
-
88
- // Create a root document node.
89
- static cmark_node *make_document(cmark_mem *mem) {
90
- cmark_node *e = make_block(mem, CMARK_NODE_DOCUMENT, 1, 1);
91
- return e;
92
- }
93
-
94
- int cmark_parser_attach_syntax_extension(cmark_parser *parser,
95
- cmark_syntax_extension *extension) {
96
- parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension);
97
- if (extension->match_inline || extension->insert_inline_from_delim) {
98
- parser->inline_syntax_extensions = cmark_llist_append(
99
- parser->mem, parser->inline_syntax_extensions, extension);
100
- }
101
-
102
- return 1;
103
- }
104
-
105
- static void cmark_parser_dispose(cmark_parser *parser) {
106
- if (parser->root)
107
- cmark_node_free(parser->root);
108
-
109
- if (parser->refmap)
110
- cmark_map_free(parser->refmap);
111
- }
112
-
113
- static void cmark_parser_reset(cmark_parser *parser) {
114
- cmark_llist *saved_exts = parser->syntax_extensions;
115
- cmark_llist *saved_inline_exts = parser->inline_syntax_extensions;
116
- int saved_options = parser->options;
117
- cmark_mem *saved_mem = parser->mem;
118
-
119
- cmark_parser_dispose(parser);
120
-
121
- memset(parser, 0, sizeof(cmark_parser));
122
- parser->mem = saved_mem;
123
-
124
- cmark_strbuf_init(parser->mem, &parser->curline, 256);
125
- cmark_strbuf_init(parser->mem, &parser->linebuf, 0);
126
-
127
- cmark_node *document = make_document(parser->mem);
128
-
129
- parser->refmap = cmark_reference_map_new(parser->mem);
130
- parser->root = document;
131
- parser->current = document;
132
-
133
- parser->syntax_extensions = saved_exts;
134
- parser->inline_syntax_extensions = saved_inline_exts;
135
- parser->options = saved_options;
136
- }
137
-
138
- cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
139
- cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser));
140
- parser->mem = mem;
141
- parser->options = options;
142
- cmark_parser_reset(parser);
143
- return parser;
144
- }
145
-
146
- cmark_parser *cmark_parser_new(int options) {
147
- extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
148
- return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
149
- }
150
-
151
- void cmark_parser_free(cmark_parser *parser) {
152
- cmark_mem *mem = parser->mem;
153
- cmark_parser_dispose(parser);
154
- cmark_strbuf_free(&parser->curline);
155
- cmark_strbuf_free(&parser->linebuf);
156
- cmark_llist_free(parser->mem, parser->syntax_extensions);
157
- cmark_llist_free(parser->mem, parser->inline_syntax_extensions);
158
- mem->free(parser);
159
- }
160
-
161
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
162
-
163
- // Returns true if line has only space characters, else false.
164
- static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
165
- while (offset < s->size) {
166
- switch (s->ptr[offset]) {
167
- case '\r':
168
- case '\n':
169
- return true;
170
- case ' ':
171
- offset++;
172
- break;
173
- case '\t':
174
- offset++;
175
- break;
176
- default:
177
- return false;
178
- }
179
- }
180
-
181
- return true;
182
- }
183
-
184
- static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) {
185
- return (block_type == CMARK_NODE_PARAGRAPH ||
186
- block_type == CMARK_NODE_HEADING ||
187
- block_type == CMARK_NODE_CODE_BLOCK);
188
- }
189
-
190
- static CMARK_INLINE bool contains_inlines(cmark_node *node) {
191
- if (node->extension && node->extension->contains_inlines_func) {
192
- return node->extension->contains_inlines_func(node->extension, node) != 0;
193
- }
194
-
195
- return (node->type == CMARK_NODE_PARAGRAPH ||
196
- node->type == CMARK_NODE_HEADING);
197
- }
198
-
199
- static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
200
- int chars_to_tab;
201
- int i;
202
- assert(node->flags & CMARK_NODE__OPEN);
203
- if (parser->partially_consumed_tab) {
204
- parser->offset += 1; // skip over tab
205
- // add space characters:
206
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
207
- for (i = 0; i < chars_to_tab; i++) {
208
- cmark_strbuf_putc(&node->content, ' ');
209
- }
210
- }
211
- cmark_strbuf_put(&node->content, ch->data + parser->offset,
212
- ch->len - parser->offset);
213
- }
214
-
215
- static void remove_trailing_blank_lines(cmark_strbuf *ln) {
216
- bufsize_t i;
217
- unsigned char c;
218
-
219
- for (i = ln->size - 1; i >= 0; --i) {
220
- c = ln->ptr[i];
221
-
222
- if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
223
- break;
224
- }
225
-
226
- if (i < 0) {
227
- cmark_strbuf_clear(ln);
228
- return;
229
- }
230
-
231
- for (; i < ln->size; ++i) {
232
- c = ln->ptr[i];
233
-
234
- if (!S_is_line_end_char(c))
235
- continue;
236
-
237
- cmark_strbuf_truncate(ln, i);
238
- break;
239
- }
240
- }
241
-
242
- // Check to see if a node ends with a blank line, descending
243
- // if needed into lists and sublists.
244
- static bool S_ends_with_blank_line(cmark_node *node) {
245
- if (S_last_line_checked(node)) {
246
- return(S_last_line_blank(node));
247
- } else if ((S_type(node) == CMARK_NODE_LIST ||
248
- S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
249
- S_set_last_line_checked(node);
250
- return(S_ends_with_blank_line(node->last_child));
251
- } else {
252
- S_set_last_line_checked(node);
253
- return (S_last_line_blank(node));
254
- }
255
- }
256
-
257
- // returns true if content remains after link defs are resolved.
258
- static bool resolve_reference_link_definitions(
259
- cmark_parser *parser,
260
- cmark_node *b) {
261
- bufsize_t pos;
262
- cmark_strbuf *node_content = &b->content;
263
- cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
264
- while (chunk.len && chunk.data[0] == '[' &&
265
- (pos = cmark_parse_reference_inline(parser->mem, &chunk,
266
- parser->refmap))) {
267
-
268
- chunk.data += pos;
269
- chunk.len -= pos;
270
- }
271
- cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
272
- return !is_blank(&b->content, 0);
273
- }
274
-
275
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
276
- bufsize_t pos;
277
- cmark_node *item;
278
- cmark_node *subitem;
279
- cmark_node *parent;
280
- bool has_content;
281
-
282
- parent = b->parent;
283
- assert(b->flags &
284
- CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
285
- b->flags &= ~CMARK_NODE__OPEN;
286
-
287
- if (parser->curline.size == 0) {
288
- // end of input - line number has not been incremented
289
- b->end_line = parser->line_number;
290
- b->end_column = parser->last_line_length;
291
- } else if (S_type(b) == CMARK_NODE_DOCUMENT ||
292
- (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
293
- (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
294
- b->end_line = parser->line_number;
295
- b->end_column = parser->curline.size;
296
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
297
- b->end_column -= 1;
298
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
299
- b->end_column -= 1;
300
- } else {
301
- b->end_line = parser->line_number - 1;
302
- b->end_column = parser->last_line_length;
303
- }
304
-
305
- cmark_strbuf *node_content = &b->content;
306
-
307
- switch (S_type(b)) {
308
- case CMARK_NODE_PARAGRAPH:
309
- {
310
- has_content = resolve_reference_link_definitions(parser, b);
311
- if (!has_content) {
312
- // remove blank node (former reference def)
313
- cmark_node_free(b);
314
- }
315
- break;
316
- }
317
-
318
- case CMARK_NODE_CODE_BLOCK:
319
- if (!b->as.code.fenced) { // indented code
320
- remove_trailing_blank_lines(node_content);
321
- cmark_strbuf_putc(node_content, '\n');
322
- } else {
323
- // first line of contents becomes info
324
- for (pos = 0; pos < node_content->size; ++pos) {
325
- if (S_is_line_end_char(node_content->ptr[pos]))
326
- break;
327
- }
328
- assert(pos < node_content->size);
329
-
330
- cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
331
- houdini_unescape_html_f(&tmp, node_content->ptr, pos);
332
- cmark_strbuf_trim(&tmp);
333
- cmark_strbuf_unescape(&tmp);
334
- b->as.code.info = cmark_chunk_buf_detach(&tmp);
335
-
336
- if (node_content->ptr[pos] == '\r')
337
- pos += 1;
338
- if (node_content->ptr[pos] == '\n')
339
- pos += 1;
340
- cmark_strbuf_drop(node_content, pos);
341
- }
342
- b->as.code.literal = cmark_chunk_buf_detach(node_content);
343
- break;
344
-
345
- case CMARK_NODE_HTML_BLOCK:
346
- b->as.literal = cmark_chunk_buf_detach(node_content);
347
- break;
348
-
349
- case CMARK_NODE_LIST: // determine tight/loose status
350
- b->as.list.tight = true; // tight by default
351
- item = b->first_child;
352
-
353
- while (item) {
354
- // check for non-final non-empty list item ending with blank line:
355
- if (S_last_line_blank(item) && item->next) {
356
- b->as.list.tight = false;
357
- break;
358
- }
359
- // recurse into children of list item, to see if there are
360
- // spaces between them:
361
- subitem = item->first_child;
362
- while (subitem) {
363
- if ((item->next || subitem->next) &&
364
- S_ends_with_blank_line(subitem)) {
365
- b->as.list.tight = false;
366
- break;
367
- }
368
- subitem = subitem->next;
369
- }
370
- if (!(b->as.list.tight)) {
371
- break;
372
- }
373
- item = item->next;
374
- }
375
-
376
- break;
377
-
378
- default:
379
- break;
380
- }
381
-
382
- return parent;
383
- }
384
-
385
- // Add a node as child of another. Return pointer to child.
386
- static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
387
- cmark_node_type block_type, int start_column) {
388
- assert(parent);
389
-
390
- // if 'parent' isn't the kind of node that can accept this child,
391
- // then back up til we hit a node that can.
392
- while (!cmark_node_can_contain_type(parent, block_type)) {
393
- parent = finalize(parser, parent);
394
- }
395
-
396
- cmark_node *child =
397
- make_block(parser->mem, block_type, parser->line_number, start_column);
398
- child->parent = parent;
399
-
400
- if (parent->last_child) {
401
- parent->last_child->next = child;
402
- child->prev = parent->last_child;
403
- } else {
404
- parent->first_child = child;
405
- child->prev = NULL;
406
- }
407
- parent->last_child = child;
408
- return child;
409
- }
410
-
411
- void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
412
- cmark_llist *tmp_ext;
413
-
414
- for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
415
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
416
- cmark_llist *tmp_char;
417
- for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
418
- unsigned char c = (unsigned char)(size_t)tmp_char->data;
419
- if (add)
420
- cmark_inlines_add_special_character(c, ext->emphasis);
421
- else
422
- cmark_inlines_remove_special_character(c, ext->emphasis);
423
- }
424
- }
425
- }
426
-
427
- // Walk through node and all children, recursively, parsing
428
- // string content into inline content where appropriate.
429
- static void process_inlines(cmark_parser *parser,
430
- cmark_map *refmap, int options) {
431
- cmark_iter *iter = cmark_iter_new(parser->root);
432
- cmark_node *cur;
433
- cmark_event_type ev_type;
434
-
435
- cmark_manage_extensions_special_characters(parser, true);
436
-
437
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
438
- cur = cmark_iter_get_node(iter);
439
- if (ev_type == CMARK_EVENT_ENTER) {
440
- if (contains_inlines(cur)) {
441
- cmark_parse_inlines(parser, cur, refmap, options);
442
- }
443
- }
444
- }
445
-
446
- cmark_manage_extensions_special_characters(parser, false);
447
-
448
- cmark_iter_free(iter);
449
- }
450
-
451
- static int sort_footnote_by_ix(const void *_a, const void *_b) {
452
- cmark_footnote *a = *(cmark_footnote **)_a;
453
- cmark_footnote *b = *(cmark_footnote **)_b;
454
- return (int)a->ix - (int)b->ix;
455
- }
456
-
457
- static void process_footnotes(cmark_parser *parser) {
458
- // * Collect definitions in a map.
459
- // * Iterate the references in the document in order, assigning indices to
460
- // definitions in the order they're seen.
461
- // * Write out the footnotes at the bottom of the document in index order.
462
-
463
- cmark_map *map = cmark_footnote_map_new(parser->mem);
464
-
465
- cmark_iter *iter = cmark_iter_new(parser->root);
466
- cmark_node *cur;
467
- cmark_event_type ev_type;
468
-
469
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
470
- cur = cmark_iter_get_node(iter);
471
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
472
- cmark_footnote_create(map, cur);
473
- }
474
- }
475
-
476
- cmark_iter_free(iter);
477
- iter = cmark_iter_new(parser->root);
478
- unsigned int ix = 0;
479
-
480
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
481
- cur = cmark_iter_get_node(iter);
482
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) {
483
- cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal);
484
- if (footnote) {
485
- if (!footnote->ix)
486
- footnote->ix = ++ix;
487
-
488
- // store a reference to this footnote reference's footnote definition
489
- // this is used by renderers when generating label ids
490
- cur->parent_footnote_def = footnote->node;
491
-
492
- // keep track of a) count of how many times this footnote def has been
493
- // referenced, and b) which reference index this footnote ref is at.
494
- // this is used by renderers when generating links and backreferences.
495
- cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
496
-
497
- char n[32];
498
- snprintf(n, sizeof(n), "%d", footnote->ix);
499
- cmark_chunk_free(parser->mem, &cur->as.literal);
500
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
501
- cmark_strbuf_puts(&buf, n);
502
-
503
- cur->as.literal = cmark_chunk_buf_detach(&buf);
504
- } else {
505
- cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text));
506
- cmark_strbuf_init(parser->mem, &text->content, 0);
507
- text->type = (uint16_t) CMARK_NODE_TEXT;
508
-
509
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
510
- cmark_strbuf_puts(&buf, "[^");
511
- cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
512
- cmark_strbuf_putc(&buf, ']');
513
-
514
- text->as.literal = cmark_chunk_buf_detach(&buf);
515
- cmark_node_insert_after(cur, text);
516
- cmark_node_free(cur);
517
- }
518
- }
519
- }
520
-
521
- cmark_iter_free(iter);
522
-
523
- if (map->sorted) {
524
- qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
525
- for (unsigned int i = 0; i < map->size; ++i) {
526
- cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
527
- if (!footnote->ix) {
528
- cmark_node_unlink(footnote->node);
529
- continue;
530
- }
531
- cmark_node_append_child(parser->root, footnote->node);
532
- footnote->node = NULL;
533
- }
534
- }
535
-
536
- cmark_unlink_footnotes_map(map);
537
- cmark_map_free(map);
538
- }
539
-
540
- // Attempts to parse a list item marker (bullet or enumerated).
541
- // On success, returns length of the marker, and populates
542
- // data with the details. On failure, returns 0.
543
- static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input,
544
- bufsize_t pos, bool interrupts_paragraph,
545
- cmark_list **dataptr) {
546
- unsigned char c;
547
- bufsize_t startpos;
548
- cmark_list *data;
549
- bufsize_t i;
550
-
551
- startpos = pos;
552
- c = peek_at(input, pos);
553
-
554
- if (c == '*' || c == '-' || c == '+') {
555
- pos++;
556
- if (!cmark_isspace(peek_at(input, pos))) {
557
- return 0;
558
- }
559
-
560
- if (interrupts_paragraph) {
561
- i = pos;
562
- // require non-blank content after list marker:
563
- while (S_is_space_or_tab(peek_at(input, i))) {
564
- i++;
565
- }
566
- if (peek_at(input, i) == '\n') {
567
- return 0;
568
- }
569
- }
570
-
571
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
572
- data->marker_offset = 0; // will be adjusted later
573
- data->list_type = CMARK_BULLET_LIST;
574
- data->bullet_char = c;
575
- data->start = 0;
576
- data->delimiter = CMARK_NO_DELIM;
577
- data->tight = false;
578
- } else if (cmark_isdigit(c)) {
579
- int start = 0;
580
- int digits = 0;
581
-
582
- do {
583
- start = (10 * start) + (peek_at(input, pos) - '0');
584
- pos++;
585
- digits++;
586
- // We limit to 9 digits to avoid overflow,
587
- // assuming max int is 2^31 - 1
588
- // This also seems to be the limit for 'start' in some browsers.
589
- } while (digits < 9 && cmark_isdigit(peek_at(input, pos)));
590
-
591
- if (interrupts_paragraph && start != 1) {
592
- return 0;
593
- }
594
- c = peek_at(input, pos);
595
- if (c == '.' || c == ')') {
596
- pos++;
597
- if (!cmark_isspace(peek_at(input, pos))) {
598
- return 0;
599
- }
600
- if (interrupts_paragraph) {
601
- // require non-blank content after list marker:
602
- i = pos;
603
- while (S_is_space_or_tab(peek_at(input, i))) {
604
- i++;
605
- }
606
- if (S_is_line_end_char(peek_at(input, i))) {
607
- return 0;
608
- }
609
- }
610
-
611
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
612
- data->marker_offset = 0; // will be adjusted later
613
- data->list_type = CMARK_ORDERED_LIST;
614
- data->bullet_char = 0;
615
- data->start = start;
616
- data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
617
- data->tight = false;
618
- } else {
619
- return 0;
620
- }
621
- } else {
622
- return 0;
623
- }
624
-
625
- *dataptr = data;
626
- return (pos - startpos);
627
- }
628
-
629
- // Return 1 if list item belongs in list, else 0.
630
- static int lists_match(cmark_list *list_data, cmark_list *item_data) {
631
- return (list_data->list_type == item_data->list_type &&
632
- list_data->delimiter == item_data->delimiter &&
633
- // list_data->marker_offset == item_data.marker_offset &&
634
- list_data->bullet_char == item_data->bullet_char);
635
- }
636
-
637
- static cmark_node *finalize_document(cmark_parser *parser) {
638
- while (parser->current != parser->root) {
639
- parser->current = finalize(parser, parser->current);
640
- }
641
-
642
- finalize(parser, parser->root);
643
-
644
- // Limit total size of extra content created from reference links to
645
- // document size to avoid superlinear growth. Always allow 100KB.
646
- if (parser->total_size > 100000)
647
- parser->refmap->max_ref_size = parser->total_size;
648
- else
649
- parser->refmap->max_ref_size = 100000;
650
-
651
- process_inlines(parser, parser->refmap, parser->options);
652
- if (parser->options & CMARK_OPT_FOOTNOTES)
653
- process_footnotes(parser);
654
-
655
- return parser->root;
656
- }
657
-
658
- cmark_node *cmark_parse_file(FILE *f, int options) {
659
- unsigned char buffer[4096];
660
- cmark_parser *parser = cmark_parser_new(options);
661
- size_t bytes;
662
- cmark_node *document;
663
-
664
- while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
665
- bool eof = bytes < sizeof(buffer);
666
- S_parser_feed(parser, buffer, bytes, eof);
667
- if (eof) {
668
- break;
669
- }
670
- }
671
-
672
- document = cmark_parser_finish(parser);
673
- cmark_parser_free(parser);
674
- return document;
675
- }
676
-
677
- cmark_node *cmark_parse_document(const char *buffer, size_t len, int options) {
678
- cmark_parser *parser = cmark_parser_new(options);
679
- cmark_node *document;
680
-
681
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
682
-
683
- document = cmark_parser_finish(parser);
684
- cmark_parser_free(parser);
685
- return document;
686
- }
687
-
688
- void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
689
- S_parser_feed(parser, (const unsigned char *)buffer, len, false);
690
- }
691
-
692
- void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) {
693
- cmark_strbuf saved_linebuf;
694
-
695
- cmark_strbuf_init(parser->mem, &saved_linebuf, 0);
696
- cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf));
697
- cmark_strbuf_clear(&parser->linebuf);
698
-
699
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
700
-
701
- cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf));
702
- cmark_strbuf_free(&saved_linebuf);
703
- }
704
-
705
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
706
- size_t len, bool eof) {
707
- const unsigned char *end = buffer + len;
708
- static const uint8_t repl[] = {239, 191, 189};
709
-
710
- if (len > UINT_MAX - parser->total_size)
711
- parser->total_size = UINT_MAX;
712
- else
713
- parser->total_size += len;
714
-
715
- if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
716
- // skip NL if last buffer ended with CR ; see #117
717
- buffer++;
718
- }
719
- parser->last_buffer_ended_with_cr = false;
720
- while (buffer < end) {
721
- const unsigned char *eol;
722
- bufsize_t chunk_len;
723
- bool process = false;
724
- for (eol = buffer; eol < end; ++eol) {
725
- if (S_is_line_end_char(*eol)) {
726
- process = true;
727
- break;
728
- }
729
- if (*eol == '\0' && eol < end) {
730
- break;
731
- }
732
- }
733
- if (eol >= end && eof) {
734
- process = true;
735
- }
736
-
737
- chunk_len = (bufsize_t)(eol - buffer);
738
- if (process) {
739
- if (parser->linebuf.size > 0) {
740
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
741
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
742
- cmark_strbuf_clear(&parser->linebuf);
743
- } else {
744
- S_process_line(parser, buffer, chunk_len);
745
- }
746
- } else {
747
- if (eol < end && *eol == '\0') {
748
- // omit NULL byte
749
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
750
- // add replacement character
751
- cmark_strbuf_put(&parser->linebuf, repl, 3);
752
- } else {
753
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
754
- }
755
- }
756
-
757
- buffer += chunk_len;
758
- if (buffer < end) {
759
- if (*buffer == '\0') {
760
- // skip over NULL
761
- buffer++;
762
- } else {
763
- // skip over line ending characters
764
- if (*buffer == '\r') {
765
- buffer++;
766
- if (buffer == end)
767
- parser->last_buffer_ended_with_cr = true;
768
- }
769
- if (buffer < end && *buffer == '\n')
770
- buffer++;
771
- }
772
- }
773
- }
774
- }
775
-
776
- static void chop_trailing_hashtags(cmark_chunk *ch) {
777
- bufsize_t n, orig_n;
778
-
779
- cmark_chunk_rtrim(ch);
780
- orig_n = n = ch->len - 1;
781
-
782
- // if string ends in space followed by #s, remove these:
783
- while (n >= 0 && peek_at(ch, n) == '#')
784
- n--;
785
-
786
- // Check for a space before the final #s:
787
- if (n != orig_n && n >= 0 && S_is_space_or_tab(peek_at(ch, n))) {
788
- ch->len = n;
789
- cmark_chunk_rtrim(ch);
790
- }
791
- }
792
-
793
- // Check for thematic break. On failure, return 0 and update
794
- // thematic_break_kill_pos with the index at which the
795
- // parse fails. On success, return length of match.
796
- // "...three or more hyphens, asterisks,
797
- // or underscores on a line by themselves. If you wish, you may use
798
- // spaces between the hyphens or asterisks."
799
- static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
800
- bufsize_t offset) {
801
- bufsize_t i;
802
- char c;
803
- char nextc = '\0';
804
- int count;
805
- i = offset;
806
- c = peek_at(input, i);
807
- if (!(c == '*' || c == '_' || c == '-')) {
808
- parser->thematic_break_kill_pos = i;
809
- return 0;
810
- }
811
- count = 1;
812
- while ((nextc = peek_at(input, ++i))) {
813
- if (nextc == c) {
814
- count++;
815
- } else if (nextc != ' ' && nextc != '\t') {
816
- break;
817
- }
818
- }
819
- if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
820
- return (i - offset) + 1;
821
- } else {
822
- parser->thematic_break_kill_pos = i;
823
- return 0;
824
- }
825
- }
826
-
827
- // Find first nonspace character from current offset, setting
828
- // parser->first_nonspace, parser->first_nonspace_column,
829
- // parser->indent, and parser->blank. Does not advance parser->offset.
830
- static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
831
- char c;
832
- int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
833
-
834
- if (parser->first_nonspace <= parser->offset) {
835
- parser->first_nonspace = parser->offset;
836
- parser->first_nonspace_column = parser->column;
837
- while ((c = peek_at(input, parser->first_nonspace))) {
838
- if (c == ' ') {
839
- parser->first_nonspace += 1;
840
- parser->first_nonspace_column += 1;
841
- chars_to_tab = chars_to_tab - 1;
842
- if (chars_to_tab == 0) {
843
- chars_to_tab = TAB_STOP;
844
- }
845
- } else if (c == '\t') {
846
- parser->first_nonspace += 1;
847
- parser->first_nonspace_column += chars_to_tab;
848
- chars_to_tab = TAB_STOP;
849
- } else {
850
- break;
851
- }
852
- }
853
- }
854
-
855
- parser->indent = parser->first_nonspace_column - parser->column;
856
- parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
857
- }
858
-
859
- // Advance parser->offset and parser->column. parser->offset is the
860
- // byte position in input; parser->column is a virtual column number
861
- // that takes into account tabs. (Multibyte characters are not taken
862
- // into account, because the Markdown line prefixes we are interested in
863
- // analyzing are entirely ASCII.) The count parameter indicates
864
- // how far to advance the offset. If columns is true, then count
865
- // indicates a number of columns; otherwise, a number of bytes.
866
- // If advancing a certain number of columns partially consumes
867
- // a tab character, parser->partially_consumed_tab is set to true.
868
- static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
869
- bufsize_t count, bool columns) {
870
- char c;
871
- int chars_to_tab;
872
- int chars_to_advance;
873
- while (count > 0 && (c = peek_at(input, parser->offset))) {
874
- if (c == '\t') {
875
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
876
- if (columns) {
877
- parser->partially_consumed_tab = chars_to_tab > count;
878
- chars_to_advance = MIN(count, chars_to_tab);
879
- parser->column += chars_to_advance;
880
- parser->offset += (parser->partially_consumed_tab ? 0 : 1);
881
- count -= chars_to_advance;
882
- } else {
883
- parser->partially_consumed_tab = false;
884
- parser->column += chars_to_tab;
885
- parser->offset += 1;
886
- count -= 1;
887
- }
888
- } else {
889
- parser->partially_consumed_tab = false;
890
- parser->offset += 1;
891
- parser->column += 1; // assume ascii; block starts are ascii
892
- count -= 1;
893
- }
894
- }
895
- }
896
-
897
- static bool S_last_child_is_open(cmark_node *container) {
898
- return container->last_child &&
899
- (container->last_child->flags & CMARK_NODE__OPEN);
900
- }
901
-
902
- static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
903
- bool res = false;
904
- bufsize_t matched = 0;
905
-
906
- matched =
907
- parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
908
- if (matched) {
909
-
910
- S_advance_offset(parser, input, parser->indent + 1, true);
911
-
912
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
913
- S_advance_offset(parser, input, 1, true);
914
- }
915
-
916
- res = true;
917
- }
918
- return res;
919
- }
920
-
921
- static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input,
922
- cmark_node *container) {
923
- if (parser->indent >= 4) {
924
- S_advance_offset(parser, input, 4, true);
925
- return true;
926
- } else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) {
927
- return true;
928
- }
929
-
930
- return false;
931
- }
932
-
933
- static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
934
- cmark_node *container) {
935
- bool res = false;
936
-
937
- if (parser->indent >=
938
- container->as.list.marker_offset + container->as.list.padding) {
939
- S_advance_offset(parser, input, container->as.list.marker_offset +
940
- container->as.list.padding,
941
- true);
942
- res = true;
943
- } else if (parser->blank && container->first_child != NULL) {
944
- // if container->first_child is NULL, then the opening line
945
- // of the list item was blank after the list marker; in this
946
- // case, we are done with the list item.
947
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
948
- false);
949
- res = true;
950
- }
951
- return res;
952
- }
953
-
954
- static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
955
- cmark_node *container,
956
- bool *should_continue) {
957
- bool res = false;
958
-
959
- if (!container->as.code.fenced) { // indented
960
- if (parser->indent >= CODE_INDENT) {
961
- S_advance_offset(parser, input, CODE_INDENT, true);
962
- res = true;
963
- } else if (parser->blank) {
964
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
965
- false);
966
- res = true;
967
- }
968
- } else { // fenced
969
- bufsize_t matched = 0;
970
-
971
- if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) ==
972
- container->as.code.fence_char)) {
973
- matched = scan_close_code_fence(input, parser->first_nonspace);
974
- }
975
-
976
- if (matched >= container->as.code.fence_length) {
977
- // closing fence - and since we're at
978
- // the end of a line, we can stop processing it:
979
- *should_continue = false;
980
- S_advance_offset(parser, input, matched, false);
981
- parser->current = finalize(parser, container);
982
- } else {
983
- // skip opt. spaces of fence parser->offset
984
- int i = container->as.code.fence_offset;
985
-
986
- while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
987
- S_advance_offset(parser, input, 1, true);
988
- i--;
989
- }
990
- res = true;
991
- }
992
- }
993
-
994
- return res;
995
- }
996
-
997
- static bool parse_html_block_prefix(cmark_parser *parser,
998
- cmark_node *container) {
999
- bool res = false;
1000
- int html_block_type = container->as.html_block_type;
1001
-
1002
- assert(html_block_type >= 1 && html_block_type <= 7);
1003
- switch (html_block_type) {
1004
- case 1:
1005
- case 2:
1006
- case 3:
1007
- case 4:
1008
- case 5:
1009
- // these types of blocks can accept blanks
1010
- res = true;
1011
- break;
1012
- case 6:
1013
- case 7:
1014
- res = !parser->blank;
1015
- break;
1016
- }
1017
-
1018
- return res;
1019
- }
1020
-
1021
- static bool parse_extension_block(cmark_parser *parser,
1022
- cmark_node *container,
1023
- cmark_chunk *input)
1024
- {
1025
- bool res = false;
1026
-
1027
- if (container->extension->last_block_matches) {
1028
- if (container->extension->last_block_matches(
1029
- container->extension, parser, input->data, input->len, container))
1030
- res = true;
1031
- }
1032
-
1033
- return res;
1034
- }
1035
-
1036
- /**
1037
- * For each containing node, try to parse the associated line start.
1038
- *
1039
- * Will not close unmatched blocks, as we may have a lazy continuation
1040
- * line -> http://spec.commonmark.org/0.24/#lazy-continuation-line
1041
- *
1042
- * Returns: The last matching node, or NULL
1043
- */
1044
- static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
1045
- bool *all_matched) {
1046
- bool should_continue = true;
1047
- *all_matched = false;
1048
- cmark_node *container = parser->root;
1049
- cmark_node_type cont_type;
1050
-
1051
- while (S_last_child_is_open(container)) {
1052
- container = container->last_child;
1053
- cont_type = S_type(container);
1054
-
1055
- S_find_first_nonspace(parser, input);
1056
-
1057
- if (container->extension) {
1058
- if (!parse_extension_block(parser, container, input))
1059
- goto done;
1060
- continue;
1061
- }
1062
-
1063
- switch (cont_type) {
1064
- case CMARK_NODE_BLOCK_QUOTE:
1065
- if (!parse_block_quote_prefix(parser, input))
1066
- goto done;
1067
- break;
1068
- case CMARK_NODE_ITEM:
1069
- if (!parse_node_item_prefix(parser, input, container))
1070
- goto done;
1071
- break;
1072
- case CMARK_NODE_CODE_BLOCK:
1073
- if (!parse_code_block_prefix(parser, input, container, &should_continue))
1074
- goto done;
1075
- break;
1076
- case CMARK_NODE_HEADING:
1077
- // a heading can never contain more than one line
1078
- goto done;
1079
- case CMARK_NODE_HTML_BLOCK:
1080
- if (!parse_html_block_prefix(parser, container))
1081
- goto done;
1082
- break;
1083
- case CMARK_NODE_PARAGRAPH:
1084
- if (parser->blank)
1085
- goto done;
1086
- break;
1087
- case CMARK_NODE_FOOTNOTE_DEFINITION:
1088
- if (!parse_footnote_definition_block_prefix(parser, input, container))
1089
- goto done;
1090
- break;
1091
- default:
1092
- break;
1093
- }
1094
- }
1095
-
1096
- *all_matched = true;
1097
-
1098
- done:
1099
- if (!*all_matched) {
1100
- container = container->parent; // back up to last matching node
1101
- }
1102
-
1103
- if (!should_continue) {
1104
- container = NULL;
1105
- }
1106
-
1107
- return container;
1108
- }
1109
-
1110
- static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1111
- cmark_chunk *input, bool all_matched) {
1112
- bool indented;
1113
- cmark_list *data = NULL;
1114
- bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
1115
- cmark_node_type cont_type = S_type(*container);
1116
- bufsize_t matched = 0;
1117
- int lev = 0;
1118
- bool save_partially_consumed_tab;
1119
- bool has_content;
1120
- int save_offset;
1121
- int save_column;
1122
-
1123
- while (cont_type != CMARK_NODE_CODE_BLOCK &&
1124
- cont_type != CMARK_NODE_HTML_BLOCK) {
1125
-
1126
- S_find_first_nonspace(parser, input);
1127
- indented = parser->indent >= CODE_INDENT;
1128
-
1129
- if (!indented && peek_at(input, parser->first_nonspace) == '>') {
1130
-
1131
- bufsize_t blockquote_startpos = parser->first_nonspace;
1132
-
1133
- S_advance_offset(parser, input,
1134
- parser->first_nonspace + 1 - parser->offset, false);
1135
- // optional following character
1136
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
1137
- S_advance_offset(parser, input, 1, true);
1138
- }
1139
- *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
1140
- blockquote_startpos + 1);
1141
-
1142
- } else if (!indented && (matched = scan_atx_heading_start(
1143
- input, parser->first_nonspace))) {
1144
- bufsize_t hashpos;
1145
- int level = 0;
1146
- bufsize_t heading_startpos = parser->first_nonspace;
1147
-
1148
- S_advance_offset(parser, input,
1149
- parser->first_nonspace + matched - parser->offset,
1150
- false);
1151
- *container = add_child(parser, *container, CMARK_NODE_HEADING,
1152
- heading_startpos + 1);
1153
-
1154
- hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
1155
-
1156
- while (peek_at(input, hashpos) == '#') {
1157
- level++;
1158
- hashpos++;
1159
- }
1160
-
1161
- (*container)->as.heading.level = level;
1162
- (*container)->as.heading.setext = false;
1163
- (*container)->internal_offset = matched;
1164
-
1165
- } else if (!indented && (matched = scan_open_code_fence(
1166
- input, parser->first_nonspace))) {
1167
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1168
- parser->first_nonspace + 1);
1169
- (*container)->as.code.fenced = true;
1170
- (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace);
1171
- (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched;
1172
- (*container)->as.code.fence_offset =
1173
- (int8_t)(parser->first_nonspace - parser->offset);
1174
- (*container)->as.code.info = cmark_chunk_literal("");
1175
- S_advance_offset(parser, input,
1176
- parser->first_nonspace + matched - parser->offset,
1177
- false);
1178
-
1179
- } else if (!indented && ((matched = scan_html_block_start(
1180
- input, parser->first_nonspace)) ||
1181
- (cont_type != CMARK_NODE_PARAGRAPH &&
1182
- (matched = scan_html_block_start_7(
1183
- input, parser->first_nonspace))))) {
1184
- *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK,
1185
- parser->first_nonspace + 1);
1186
- (*container)->as.html_block_type = matched;
1187
- // note, we don't adjust parser->offset because the tag is part of the
1188
- // text
1189
- } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
1190
- (lev =
1191
- scan_setext_heading_line(input, parser->first_nonspace))) {
1192
- // finalize paragraph, resolving reference links
1193
- has_content = resolve_reference_link_definitions(parser, *container);
1194
-
1195
- if (has_content) {
1196
-
1197
- (*container)->type = (uint16_t)CMARK_NODE_HEADING;
1198
- (*container)->as.heading.level = lev;
1199
- (*container)->as.heading.setext = true;
1200
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1201
- }
1202
- } else if (!indented &&
1203
- !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
1204
- (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
1205
- (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
1206
- // it's only now that we know the line is not part of a setext heading:
1207
- *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
1208
- parser->first_nonspace + 1);
1209
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1210
- } else if (!indented &&
1211
- parser->options & CMARK_OPT_FOOTNOTES &&
1212
- (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1213
- cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1214
- cmark_chunk_to_cstr(parser->mem, &c);
1215
-
1216
- while (c.data[c.len - 1] != ']')
1217
- --c.len;
1218
- --c.len;
1219
-
1220
- S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1221
- *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1222
- (*container)->as.literal = c;
1223
-
1224
- (*container)->internal_offset = matched;
1225
- } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1226
- parser->indent < 4 &&
1227
- (matched = parse_list_marker(
1228
- parser->mem, input, parser->first_nonspace,
1229
- (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
1230
-
1231
- // Note that we can have new list items starting with >= 4
1232
- // spaces indent, as long as the list container is still open.
1233
- int i = 0;
1234
-
1235
- // compute padding:
1236
- S_advance_offset(parser, input,
1237
- parser->first_nonspace + matched - parser->offset,
1238
- false);
1239
-
1240
- save_partially_consumed_tab = parser->partially_consumed_tab;
1241
- save_offset = parser->offset;
1242
- save_column = parser->column;
1243
-
1244
- while (parser->column - save_column <= 5 &&
1245
- S_is_space_or_tab(peek_at(input, parser->offset))) {
1246
- S_advance_offset(parser, input, 1, true);
1247
- }
1248
-
1249
- i = parser->column - save_column;
1250
- if (i >= 5 || i < 1 ||
1251
- // only spaces after list marker:
1252
- S_is_line_end_char(peek_at(input, parser->offset))) {
1253
- data->padding = matched + 1;
1254
- parser->offset = save_offset;
1255
- parser->column = save_column;
1256
- parser->partially_consumed_tab = save_partially_consumed_tab;
1257
- if (i > 0) {
1258
- S_advance_offset(parser, input, 1, true);
1259
- }
1260
- } else {
1261
- data->padding = matched + i;
1262
- }
1263
-
1264
- // check container; if it's a list, see if this list item
1265
- // can continue the list; otherwise, create a list container.
1266
-
1267
- data->marker_offset = parser->indent;
1268
-
1269
- if (cont_type != CMARK_NODE_LIST ||
1270
- !lists_match(&((*container)->as.list), data)) {
1271
- *container = add_child(parser, *container, CMARK_NODE_LIST,
1272
- parser->first_nonspace + 1);
1273
-
1274
- memcpy(&((*container)->as.list), data, sizeof(*data));
1275
- }
1276
-
1277
- // add the list item
1278
- *container = add_child(parser, *container, CMARK_NODE_ITEM,
1279
- parser->first_nonspace + 1);
1280
- /* TODO: static */
1281
- memcpy(&((*container)->as.list), data, sizeof(*data));
1282
- parser->mem->free(data);
1283
- } else if (indented && !maybe_lazy && !parser->blank) {
1284
- S_advance_offset(parser, input, CODE_INDENT, true);
1285
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1286
- parser->offset + 1);
1287
- (*container)->as.code.fenced = false;
1288
- (*container)->as.code.fence_char = 0;
1289
- (*container)->as.code.fence_length = 0;
1290
- (*container)->as.code.fence_offset = 0;
1291
- (*container)->as.code.info = cmark_chunk_literal("");
1292
- } else {
1293
- cmark_llist *tmp;
1294
- cmark_node *new_container = NULL;
1295
-
1296
- for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) {
1297
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
1298
-
1299
- if (ext->try_opening_block) {
1300
- new_container = ext->try_opening_block(
1301
- ext, indented, parser, *container, input->data, input->len);
1302
-
1303
- if (new_container) {
1304
- *container = new_container;
1305
- break;
1306
- }
1307
- }
1308
- }
1309
-
1310
- if (!new_container) {
1311
- break;
1312
- }
1313
- }
1314
-
1315
- if (accepts_lines(S_type(*container))) {
1316
- // if it's a line container, it can't contain other containers
1317
- break;
1318
- }
1319
-
1320
- cont_type = S_type(*container);
1321
- maybe_lazy = false;
1322
- }
1323
- }
1324
-
1325
- static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1326
- cmark_node *last_matched_container,
1327
- cmark_chunk *input) {
1328
- cmark_node *tmp;
1329
- // what remains at parser->offset is a text line. add the text to the
1330
- // appropriate container.
1331
-
1332
- S_find_first_nonspace(parser, input);
1333
-
1334
- if (parser->blank && container->last_child)
1335
- S_set_last_line_blank(container->last_child, true);
1336
-
1337
- // block quote lines are never blank as they start with >
1338
- // and we don't count blanks in fenced code for purposes of tight/loose
1339
- // lists or breaking out of lists. we also don't set last_line_blank
1340
- // on an empty list item.
1341
- const cmark_node_type ctype = S_type(container);
1342
- const bool last_line_blank =
1343
- (parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1344
- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1345
- !(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
1346
- !(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
1347
- container->start_line == parser->line_number));
1348
-
1349
- S_set_last_line_blank(container, last_line_blank);
1350
-
1351
- tmp = container;
1352
- while (tmp->parent) {
1353
- S_set_last_line_blank(tmp->parent, false);
1354
- tmp = tmp->parent;
1355
- }
1356
-
1357
- // If the last line processed belonged to a paragraph node,
1358
- // and we didn't match all of the line prefixes for the open containers,
1359
- // and we didn't start any new containers,
1360
- // and the line isn't blank,
1361
- // then treat this as a "lazy continuation line" and add it to
1362
- // the open paragraph.
1363
- if (parser->current != last_matched_container &&
1364
- container == last_matched_container && !parser->blank &&
1365
- S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
1366
- add_line(parser->current, input, parser);
1367
- } else { // not a lazy continuation
1368
- // Finalize any blocks that were not matched and set cur to container:
1369
- while (parser->current != last_matched_container) {
1370
- parser->current = finalize(parser, parser->current);
1371
- assert(parser->current != NULL);
1372
- }
1373
-
1374
- if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
1375
- add_line(container, input, parser);
1376
- } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
1377
- add_line(container, input, parser);
1378
-
1379
- int matches_end_condition;
1380
- switch (container->as.html_block_type) {
1381
- case 1:
1382
- // </script>, </style>, </pre>
1383
- matches_end_condition =
1384
- scan_html_block_end_1(input, parser->first_nonspace);
1385
- break;
1386
- case 2:
1387
- // -->
1388
- matches_end_condition =
1389
- scan_html_block_end_2(input, parser->first_nonspace);
1390
- break;
1391
- case 3:
1392
- // ?>
1393
- matches_end_condition =
1394
- scan_html_block_end_3(input, parser->first_nonspace);
1395
- break;
1396
- case 4:
1397
- // >
1398
- matches_end_condition =
1399
- scan_html_block_end_4(input, parser->first_nonspace);
1400
- break;
1401
- case 5:
1402
- // ]]>
1403
- matches_end_condition =
1404
- scan_html_block_end_5(input, parser->first_nonspace);
1405
- break;
1406
- default:
1407
- matches_end_condition = 0;
1408
- break;
1409
- }
1410
-
1411
- if (matches_end_condition) {
1412
- container = finalize(parser, container);
1413
- assert(parser->current != NULL);
1414
- }
1415
- } else if (parser->blank) {
1416
- // ??? do nothing
1417
- } else if (accepts_lines(S_type(container))) {
1418
- if (S_type(container) == CMARK_NODE_HEADING &&
1419
- container->as.heading.setext == false) {
1420
- chop_trailing_hashtags(input);
1421
- }
1422
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1423
- false);
1424
- add_line(container, input, parser);
1425
- } else {
1426
- // create paragraph container for line
1427
- container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1428
- parser->first_nonspace + 1);
1429
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1430
- false);
1431
- add_line(container, input, parser);
1432
- }
1433
-
1434
- parser->current = container;
1435
- }
1436
- }
1437
-
1438
- /* See http://spec.commonmark.org/0.24/#phase-1-block-structure */
1439
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1440
- bufsize_t bytes) {
1441
- cmark_node *last_matched_container;
1442
- bool all_matched = true;
1443
- cmark_node *container;
1444
- cmark_chunk input;
1445
- cmark_node *current;
1446
-
1447
- cmark_strbuf_clear(&parser->curline);
1448
-
1449
- if (parser->options & CMARK_OPT_VALIDATE_UTF8)
1450
- cmark_utf8proc_check(&parser->curline, buffer, bytes);
1451
- else
1452
- cmark_strbuf_put(&parser->curline, buffer, bytes);
1453
-
1454
- bytes = parser->curline.size;
1455
-
1456
- // ensure line ends with a newline:
1457
- if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))
1458
- cmark_strbuf_putc(&parser->curline, '\n');
1459
-
1460
- parser->offset = 0;
1461
- parser->column = 0;
1462
- parser->first_nonspace = 0;
1463
- parser->first_nonspace_column = 0;
1464
- parser->thematic_break_kill_pos = 0;
1465
- parser->indent = 0;
1466
- parser->blank = false;
1467
- parser->partially_consumed_tab = false;
1468
-
1469
- input.data = parser->curline.ptr;
1470
- input.len = parser->curline.size;
1471
- input.alloc = 0;
1472
-
1473
- // Skip UTF-8 BOM.
1474
- if (parser->line_number == 0 &&
1475
- input.len >= 3 &&
1476
- memcmp(input.data, "\xef\xbb\xbf", 3) == 0)
1477
- parser->offset += 3;
1478
-
1479
- parser->line_number++;
1480
-
1481
- last_matched_container = check_open_blocks(parser, &input, &all_matched);
1482
-
1483
- if (!last_matched_container)
1484
- goto finished;
1485
-
1486
- container = last_matched_container;
1487
-
1488
- current = parser->current;
1489
-
1490
- open_new_blocks(parser, &container, &input, all_matched);
1491
-
1492
- /* parser->current might have changed if feed_reentrant was called */
1493
- if (current == parser->current)
1494
- add_text_to_container(parser, container, last_matched_container, &input);
1495
-
1496
- finished:
1497
- parser->last_line_length = input.len;
1498
- if (parser->last_line_length &&
1499
- input.data[parser->last_line_length - 1] == '\n')
1500
- parser->last_line_length -= 1;
1501
- if (parser->last_line_length &&
1502
- input.data[parser->last_line_length - 1] == '\r')
1503
- parser->last_line_length -= 1;
1504
-
1505
- cmark_strbuf_clear(&parser->curline);
1506
- }
1507
-
1508
- cmark_node *cmark_parser_finish(cmark_parser *parser) {
1509
- cmark_node *res;
1510
- cmark_llist *extensions;
1511
-
1512
- /* Parser was already finished once */
1513
- if (parser->root == NULL)
1514
- return NULL;
1515
-
1516
- if (parser->linebuf.size) {
1517
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
1518
- cmark_strbuf_clear(&parser->linebuf);
1519
- }
1520
-
1521
- finalize_document(parser);
1522
-
1523
- cmark_consolidate_text_nodes(parser->root);
1524
-
1525
- cmark_strbuf_free(&parser->curline);
1526
- cmark_strbuf_free(&parser->linebuf);
1527
-
1528
- #if CMARK_DEBUG_NODES
1529
- if (cmark_node_check(parser->root, stderr)) {
1530
- abort();
1531
- }
1532
- #endif
1533
-
1534
- for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) {
1535
- cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data;
1536
- if (ext->postprocess_func) {
1537
- cmark_node *processed = ext->postprocess_func(ext, parser, parser->root);
1538
- if (processed)
1539
- parser->root = processed;
1540
- }
1541
- }
1542
-
1543
- res = parser->root;
1544
- parser->root = NULL;
1545
-
1546
- cmark_parser_reset(parser);
1547
-
1548
- return res;
1549
- }
1550
-
1551
- int cmark_parser_get_line_number(cmark_parser *parser) {
1552
- return parser->line_number;
1553
- }
1554
-
1555
- bufsize_t cmark_parser_get_offset(cmark_parser *parser) {
1556
- return parser->offset;
1557
- }
1558
-
1559
- bufsize_t cmark_parser_get_column(cmark_parser *parser) {
1560
- return parser->column;
1561
- }
1562
-
1563
- int cmark_parser_get_first_nonspace(cmark_parser *parser) {
1564
- return parser->first_nonspace;
1565
- }
1566
-
1567
- int cmark_parser_get_first_nonspace_column(cmark_parser *parser) {
1568
- return parser->first_nonspace_column;
1569
- }
1570
-
1571
- int cmark_parser_get_indent(cmark_parser *parser) {
1572
- return parser->indent;
1573
- }
1574
-
1575
- int cmark_parser_is_blank(cmark_parser *parser) {
1576
- return parser->blank;
1577
- }
1578
-
1579
- int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) {
1580
- return parser->partially_consumed_tab;
1581
- }
1582
-
1583
- int cmark_parser_get_last_line_length(cmark_parser *parser) {
1584
- return parser->last_line_length;
1585
- }
1586
-
1587
- cmark_node *cmark_parser_add_child(cmark_parser *parser,
1588
- cmark_node *parent,
1589
- cmark_node_type block_type,
1590
- int start_column) {
1591
- return add_child(parser, parent, block_type, start_column);
1592
- }
1593
-
1594
- void cmark_parser_advance_offset(cmark_parser *parser,
1595
- const char *input,
1596
- int count,
1597
- int columns) {
1598
- cmark_chunk input_chunk = cmark_chunk_literal(input);
1599
-
1600
- S_advance_offset(parser, &input_chunk, count, columns != 0);
1601
- }
1602
-
1603
- void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
1604
- cmark_ispunct_func func) {
1605
- parser->backslash_ispunct = func;
1606
- }
1607
-
1608
- cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) {
1609
- return parser->syntax_extensions;
1610
- }