commonmarker 0.23.7.pre1 → 1.0.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +70 -212
  3. data/commonmarker.gemspec +34 -31
  4. data/ext/commonmarker/Cargo.toml +12 -0
  5. data/ext/commonmarker/_util.rb +102 -0
  6. data/ext/commonmarker/extconf.rb +4 -5
  7. data/ext/commonmarker/src/comrak_options.rs +136 -0
  8. data/ext/commonmarker/src/lib.rs +29 -0
  9. data/lib/commonmarker/config.rb +57 -38
  10. data/lib/commonmarker/extension.rb +14 -0
  11. data/lib/commonmarker/renderer.rb +1 -127
  12. data/lib/commonmarker/version.rb +2 -2
  13. data/lib/commonmarker.rb +14 -29
  14. metadata +34 -178
  15. data/Rakefile +0 -109
  16. data/bin/commonmarker +0 -118
  17. data/ext/commonmarker/arena.c +0 -103
  18. data/ext/commonmarker/autolink.c +0 -456
  19. data/ext/commonmarker/autolink.h +0 -8
  20. data/ext/commonmarker/blocks.c +0 -1596
  21. data/ext/commonmarker/buffer.c +0 -278
  22. data/ext/commonmarker/buffer.h +0 -116
  23. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  24. data/ext/commonmarker/chunk.h +0 -135
  25. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  26. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -736
  27. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  28. data/ext/commonmarker/cmark-gfm.h +0 -817
  29. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  30. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  31. data/ext/commonmarker/cmark.c +0 -55
  32. data/ext/commonmarker/cmark_ctype.c +0 -44
  33. data/ext/commonmarker/cmark_ctype.h +0 -33
  34. data/ext/commonmarker/commonmark.c +0 -529
  35. data/ext/commonmarker/commonmarker.c +0 -1307
  36. data/ext/commonmarker/commonmarker.h +0 -16
  37. data/ext/commonmarker/config.h +0 -76
  38. data/ext/commonmarker/core-extensions.c +0 -27
  39. data/ext/commonmarker/entities.inc +0 -2138
  40. data/ext/commonmarker/ext_scanners.c +0 -879
  41. data/ext/commonmarker/ext_scanners.h +0 -24
  42. data/ext/commonmarker/footnotes.c +0 -63
  43. data/ext/commonmarker/footnotes.h +0 -27
  44. data/ext/commonmarker/houdini.h +0 -57
  45. data/ext/commonmarker/houdini_href_e.c +0 -100
  46. data/ext/commonmarker/houdini_html_e.c +0 -66
  47. data/ext/commonmarker/houdini_html_u.c +0 -149
  48. data/ext/commonmarker/html.c +0 -486
  49. data/ext/commonmarker/html.h +0 -27
  50. data/ext/commonmarker/inlines.c +0 -1716
  51. data/ext/commonmarker/inlines.h +0 -29
  52. data/ext/commonmarker/iterator.c +0 -159
  53. data/ext/commonmarker/iterator.h +0 -26
  54. data/ext/commonmarker/latex.c +0 -466
  55. data/ext/commonmarker/linked_list.c +0 -37
  56. data/ext/commonmarker/man.c +0 -278
  57. data/ext/commonmarker/map.c +0 -122
  58. data/ext/commonmarker/map.h +0 -41
  59. data/ext/commonmarker/node.c +0 -979
  60. data/ext/commonmarker/node.h +0 -125
  61. data/ext/commonmarker/parser.h +0 -58
  62. data/ext/commonmarker/plaintext.c +0 -235
  63. data/ext/commonmarker/plugin.c +0 -36
  64. data/ext/commonmarker/plugin.h +0 -34
  65. data/ext/commonmarker/references.c +0 -42
  66. data/ext/commonmarker/references.h +0 -26
  67. data/ext/commonmarker/registry.c +0 -63
  68. data/ext/commonmarker/registry.h +0 -24
  69. data/ext/commonmarker/render.c +0 -205
  70. data/ext/commonmarker/render.h +0 -62
  71. data/ext/commonmarker/scanners.c +0 -10508
  72. data/ext/commonmarker/scanners.h +0 -62
  73. data/ext/commonmarker/scanners.re +0 -341
  74. data/ext/commonmarker/strikethrough.c +0 -167
  75. data/ext/commonmarker/strikethrough.h +0 -9
  76. data/ext/commonmarker/syntax_extension.c +0 -149
  77. data/ext/commonmarker/syntax_extension.h +0 -34
  78. data/ext/commonmarker/table.c +0 -848
  79. data/ext/commonmarker/table.h +0 -12
  80. data/ext/commonmarker/tagfilter.c +0 -60
  81. data/ext/commonmarker/tagfilter.h +0 -8
  82. data/ext/commonmarker/tasklist.c +0 -156
  83. data/ext/commonmarker/tasklist.h +0 -8
  84. data/ext/commonmarker/utf8.c +0 -317
  85. data/ext/commonmarker/utf8.h +0 -35
  86. data/ext/commonmarker/xml.c +0 -181
  87. data/lib/commonmarker/node/inspect.rb +0 -47
  88. data/lib/commonmarker/node.rb +0 -83
  89. data/lib/commonmarker/renderer/html_renderer.rb +0 -252
@@ -1,1596 +0,0 @@
1
- /**
2
- * Block parsing implementation.
3
- *
4
- * For a high-level overview of the block parsing process,
5
- * see http://spec.commonmark.org/0.24/#phase-1-block-structure
6
- */
7
-
8
- #include <stdlib.h>
9
- #include <assert.h>
10
- #include <stdio.h>
11
-
12
- #include "cmark_ctype.h"
13
- #include "syntax_extension.h"
14
- #include "config.h"
15
- #include "parser.h"
16
- #include "cmark-gfm.h"
17
- #include "node.h"
18
- #include "references.h"
19
- #include "utf8.h"
20
- #include "scanners.h"
21
- #include "inlines.h"
22
- #include "houdini.h"
23
- #include "buffer.h"
24
- #include "footnotes.h"
25
-
26
- #define CODE_INDENT 4
27
- #define TAB_STOP 4
28
-
29
- #ifndef MIN
30
- #define MIN(x, y) ((x < y) ? x : y)
31
- #endif
32
-
33
- #define peek_at(i, n) (i)->data[n]
34
-
35
- static bool S_last_line_blank(const cmark_node *node) {
36
- return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
37
- }
38
-
39
- static bool S_last_line_checked(const cmark_node *node) {
40
- return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
41
- }
42
-
43
- static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
44
- return (cmark_node_type)node->type;
45
- }
46
-
47
- static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
48
- if (is_blank)
49
- node->flags |= CMARK_NODE__LAST_LINE_BLANK;
50
- else
51
- node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
52
- }
53
-
54
- static void S_set_last_line_checked(cmark_node *node) {
55
- node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
56
- }
57
-
58
- static CMARK_INLINE bool S_is_line_end_char(char c) {
59
- return (c == '\n' || c == '\r');
60
- }
61
-
62
- static CMARK_INLINE bool S_is_space_or_tab(char c) {
63
- return (c == ' ' || c == '\t');
64
- }
65
-
66
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
67
- size_t len, bool eof);
68
-
69
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
70
- bufsize_t bytes);
71
-
72
- static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
73
- int start_line, int start_column) {
74
- cmark_node *e;
75
-
76
- e = (cmark_node *)mem->calloc(1, sizeof(*e));
77
- cmark_strbuf_init(mem, &e->content, 32);
78
- e->type = (uint16_t)tag;
79
- e->flags = CMARK_NODE__OPEN;
80
- e->start_line = start_line;
81
- e->start_column = start_column;
82
- e->end_line = start_line;
83
-
84
- return e;
85
- }
86
-
87
- // Create a root document node.
88
- static cmark_node *make_document(cmark_mem *mem) {
89
- cmark_node *e = make_block(mem, CMARK_NODE_DOCUMENT, 1, 1);
90
- return e;
91
- }
92
-
93
- int cmark_parser_attach_syntax_extension(cmark_parser *parser,
94
- cmark_syntax_extension *extension) {
95
- parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension);
96
- if (extension->match_inline || extension->insert_inline_from_delim) {
97
- parser->inline_syntax_extensions = cmark_llist_append(
98
- parser->mem, parser->inline_syntax_extensions, extension);
99
- }
100
-
101
- return 1;
102
- }
103
-
104
- static void cmark_parser_dispose(cmark_parser *parser) {
105
- if (parser->root)
106
- cmark_node_free(parser->root);
107
-
108
- if (parser->refmap)
109
- cmark_map_free(parser->refmap);
110
- }
111
-
112
- static void cmark_parser_reset(cmark_parser *parser) {
113
- cmark_llist *saved_exts = parser->syntax_extensions;
114
- cmark_llist *saved_inline_exts = parser->inline_syntax_extensions;
115
- int saved_options = parser->options;
116
- cmark_mem *saved_mem = parser->mem;
117
-
118
- cmark_parser_dispose(parser);
119
-
120
- memset(parser, 0, sizeof(cmark_parser));
121
- parser->mem = saved_mem;
122
-
123
- cmark_strbuf_init(parser->mem, &parser->curline, 256);
124
- cmark_strbuf_init(parser->mem, &parser->linebuf, 0);
125
-
126
- cmark_node *document = make_document(parser->mem);
127
-
128
- parser->refmap = cmark_reference_map_new(parser->mem);
129
- parser->root = document;
130
- parser->current = document;
131
-
132
- parser->syntax_extensions = saved_exts;
133
- parser->inline_syntax_extensions = saved_inline_exts;
134
- parser->options = saved_options;
135
- }
136
-
137
- cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
138
- cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser));
139
- parser->mem = mem;
140
- parser->options = options;
141
- cmark_parser_reset(parser);
142
- return parser;
143
- }
144
-
145
- cmark_parser *cmark_parser_new(int options) {
146
- extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
147
- return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
148
- }
149
-
150
- void cmark_parser_free(cmark_parser *parser) {
151
- cmark_mem *mem = parser->mem;
152
- cmark_parser_dispose(parser);
153
- cmark_strbuf_free(&parser->curline);
154
- cmark_strbuf_free(&parser->linebuf);
155
- cmark_llist_free(parser->mem, parser->syntax_extensions);
156
- cmark_llist_free(parser->mem, parser->inline_syntax_extensions);
157
- mem->free(parser);
158
- }
159
-
160
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
161
-
162
- // Returns true if line has only space characters, else false.
163
- static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
164
- while (offset < s->size) {
165
- switch (s->ptr[offset]) {
166
- case '\r':
167
- case '\n':
168
- return true;
169
- case ' ':
170
- offset++;
171
- break;
172
- case '\t':
173
- offset++;
174
- break;
175
- default:
176
- return false;
177
- }
178
- }
179
-
180
- return true;
181
- }
182
-
183
- static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) {
184
- return (block_type == CMARK_NODE_PARAGRAPH ||
185
- block_type == CMARK_NODE_HEADING ||
186
- block_type == CMARK_NODE_CODE_BLOCK);
187
- }
188
-
189
- static CMARK_INLINE bool contains_inlines(cmark_node *node) {
190
- if (node->extension && node->extension->contains_inlines_func) {
191
- return node->extension->contains_inlines_func(node->extension, node) != 0;
192
- }
193
-
194
- return (node->type == CMARK_NODE_PARAGRAPH ||
195
- node->type == CMARK_NODE_HEADING);
196
- }
197
-
198
- static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
199
- int chars_to_tab;
200
- int i;
201
- assert(node->flags & CMARK_NODE__OPEN);
202
- if (parser->partially_consumed_tab) {
203
- parser->offset += 1; // skip over tab
204
- // add space characters:
205
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
206
- for (i = 0; i < chars_to_tab; i++) {
207
- cmark_strbuf_putc(&node->content, ' ');
208
- }
209
- }
210
- cmark_strbuf_put(&node->content, ch->data + parser->offset,
211
- ch->len - parser->offset);
212
- }
213
-
214
- static void remove_trailing_blank_lines(cmark_strbuf *ln) {
215
- bufsize_t i;
216
- unsigned char c;
217
-
218
- for (i = ln->size - 1; i >= 0; --i) {
219
- c = ln->ptr[i];
220
-
221
- if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
222
- break;
223
- }
224
-
225
- if (i < 0) {
226
- cmark_strbuf_clear(ln);
227
- return;
228
- }
229
-
230
- for (; i < ln->size; ++i) {
231
- c = ln->ptr[i];
232
-
233
- if (!S_is_line_end_char(c))
234
- continue;
235
-
236
- cmark_strbuf_truncate(ln, i);
237
- break;
238
- }
239
- }
240
-
241
- // Check to see if a node ends with a blank line, descending
242
- // if needed into lists and sublists.
243
- static bool S_ends_with_blank_line(cmark_node *node) {
244
- if (S_last_line_checked(node)) {
245
- return(S_last_line_blank(node));
246
- } else if ((S_type(node) == CMARK_NODE_LIST ||
247
- S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
248
- S_set_last_line_checked(node);
249
- return(S_ends_with_blank_line(node->last_child));
250
- } else {
251
- S_set_last_line_checked(node);
252
- return (S_last_line_blank(node));
253
- }
254
- }
255
-
256
- // returns true if content remains after link defs are resolved.
257
- static bool resolve_reference_link_definitions(
258
- cmark_parser *parser,
259
- cmark_node *b) {
260
- bufsize_t pos;
261
- cmark_strbuf *node_content = &b->content;
262
- cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
263
- while (chunk.len && chunk.data[0] == '[' &&
264
- (pos = cmark_parse_reference_inline(parser->mem, &chunk,
265
- parser->refmap))) {
266
-
267
- chunk.data += pos;
268
- chunk.len -= pos;
269
- }
270
- cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
271
- return !is_blank(&b->content, 0);
272
- }
273
-
274
- static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
275
- bufsize_t pos;
276
- cmark_node *item;
277
- cmark_node *subitem;
278
- cmark_node *parent;
279
- bool has_content;
280
-
281
- parent = b->parent;
282
- assert(b->flags &
283
- CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
284
- b->flags &= ~CMARK_NODE__OPEN;
285
-
286
- if (parser->curline.size == 0) {
287
- // end of input - line number has not been incremented
288
- b->end_line = parser->line_number;
289
- b->end_column = parser->last_line_length;
290
- } else if (S_type(b) == CMARK_NODE_DOCUMENT ||
291
- (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
292
- (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
293
- b->end_line = parser->line_number;
294
- b->end_column = parser->curline.size;
295
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
296
- b->end_column -= 1;
297
- if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
298
- b->end_column -= 1;
299
- } else {
300
- b->end_line = parser->line_number - 1;
301
- b->end_column = parser->last_line_length;
302
- }
303
-
304
- cmark_strbuf *node_content = &b->content;
305
-
306
- switch (S_type(b)) {
307
- case CMARK_NODE_PARAGRAPH:
308
- {
309
- has_content = resolve_reference_link_definitions(parser, b);
310
- if (!has_content) {
311
- // remove blank node (former reference def)
312
- cmark_node_free(b);
313
- }
314
- break;
315
- }
316
-
317
- case CMARK_NODE_CODE_BLOCK:
318
- if (!b->as.code.fenced) { // indented code
319
- remove_trailing_blank_lines(node_content);
320
- cmark_strbuf_putc(node_content, '\n');
321
- } else {
322
- // first line of contents becomes info
323
- for (pos = 0; pos < node_content->size; ++pos) {
324
- if (S_is_line_end_char(node_content->ptr[pos]))
325
- break;
326
- }
327
- assert(pos < node_content->size);
328
-
329
- cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
330
- houdini_unescape_html_f(&tmp, node_content->ptr, pos);
331
- cmark_strbuf_trim(&tmp);
332
- cmark_strbuf_unescape(&tmp);
333
- b->as.code.info = cmark_chunk_buf_detach(&tmp);
334
-
335
- if (node_content->ptr[pos] == '\r')
336
- pos += 1;
337
- if (node_content->ptr[pos] == '\n')
338
- pos += 1;
339
- cmark_strbuf_drop(node_content, pos);
340
- }
341
- b->as.code.literal = cmark_chunk_buf_detach(node_content);
342
- break;
343
-
344
- case CMARK_NODE_HTML_BLOCK:
345
- b->as.literal = cmark_chunk_buf_detach(node_content);
346
- break;
347
-
348
- case CMARK_NODE_LIST: // determine tight/loose status
349
- b->as.list.tight = true; // tight by default
350
- item = b->first_child;
351
-
352
- while (item) {
353
- // check for non-final non-empty list item ending with blank line:
354
- if (S_last_line_blank(item) && item->next) {
355
- b->as.list.tight = false;
356
- break;
357
- }
358
- // recurse into children of list item, to see if there are
359
- // spaces between them:
360
- subitem = item->first_child;
361
- while (subitem) {
362
- if ((item->next || subitem->next) &&
363
- S_ends_with_blank_line(subitem)) {
364
- b->as.list.tight = false;
365
- break;
366
- }
367
- subitem = subitem->next;
368
- }
369
- if (!(b->as.list.tight)) {
370
- break;
371
- }
372
- item = item->next;
373
- }
374
-
375
- break;
376
-
377
- default:
378
- break;
379
- }
380
-
381
- return parent;
382
- }
383
-
384
- // Add a node as child of another. Return pointer to child.
385
- static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
386
- cmark_node_type block_type, int start_column) {
387
- assert(parent);
388
-
389
- // if 'parent' isn't the kind of node that can accept this child,
390
- // then back up til we hit a node that can.
391
- while (!cmark_node_can_contain_type(parent, block_type)) {
392
- parent = finalize(parser, parent);
393
- }
394
-
395
- cmark_node *child =
396
- make_block(parser->mem, block_type, parser->line_number, start_column);
397
- child->parent = parent;
398
-
399
- if (parent->last_child) {
400
- parent->last_child->next = child;
401
- child->prev = parent->last_child;
402
- } else {
403
- parent->first_child = child;
404
- child->prev = NULL;
405
- }
406
- parent->last_child = child;
407
- return child;
408
- }
409
-
410
- void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
411
- cmark_llist *tmp_ext;
412
-
413
- for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
414
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
415
- cmark_llist *tmp_char;
416
- for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
417
- unsigned char c = (unsigned char)(size_t)tmp_char->data;
418
- if (add)
419
- cmark_inlines_add_special_character(c, ext->emphasis);
420
- else
421
- cmark_inlines_remove_special_character(c, ext->emphasis);
422
- }
423
- }
424
- }
425
-
426
- // Walk through node and all children, recursively, parsing
427
- // string content into inline content where appropriate.
428
- static void process_inlines(cmark_parser *parser,
429
- cmark_map *refmap, int options) {
430
- cmark_iter *iter = cmark_iter_new(parser->root);
431
- cmark_node *cur;
432
- cmark_event_type ev_type;
433
-
434
- cmark_manage_extensions_special_characters(parser, true);
435
-
436
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
437
- cur = cmark_iter_get_node(iter);
438
- if (ev_type == CMARK_EVENT_ENTER) {
439
- if (contains_inlines(cur)) {
440
- cmark_parse_inlines(parser, cur, refmap, options);
441
- }
442
- }
443
- }
444
-
445
- cmark_manage_extensions_special_characters(parser, false);
446
-
447
- cmark_iter_free(iter);
448
- }
449
-
450
- static int sort_footnote_by_ix(const void *_a, const void *_b) {
451
- cmark_footnote *a = *(cmark_footnote **)_a;
452
- cmark_footnote *b = *(cmark_footnote **)_b;
453
- return (int)a->ix - (int)b->ix;
454
- }
455
-
456
- static void process_footnotes(cmark_parser *parser) {
457
- // * Collect definitions in a map.
458
- // * Iterate the references in the document in order, assigning indices to
459
- // definitions in the order they're seen.
460
- // * Write out the footnotes at the bottom of the document in index order.
461
-
462
- cmark_map *map = cmark_footnote_map_new(parser->mem);
463
-
464
- cmark_iter *iter = cmark_iter_new(parser->root);
465
- cmark_node *cur;
466
- cmark_event_type ev_type;
467
-
468
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
469
- cur = cmark_iter_get_node(iter);
470
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
471
- cmark_footnote_create(map, cur);
472
- }
473
- }
474
-
475
- cmark_iter_free(iter);
476
- iter = cmark_iter_new(parser->root);
477
- unsigned int ix = 0;
478
-
479
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
480
- cur = cmark_iter_get_node(iter);
481
- if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) {
482
- cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal);
483
- if (footnote) {
484
- if (!footnote->ix)
485
- footnote->ix = ++ix;
486
-
487
- // store a reference to this footnote reference's footnote definition
488
- // this is used by renderers when generating label ids
489
- cur->parent_footnote_def = footnote->node;
490
-
491
- // keep track of a) count of how many times this footnote def has been
492
- // referenced, and b) which reference index this footnote ref is at.
493
- // this is used by renderers when generating links and backreferences.
494
- cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
495
-
496
- char n[32];
497
- snprintf(n, sizeof(n), "%d", footnote->ix);
498
- cmark_chunk_free(parser->mem, &cur->as.literal);
499
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
500
- cmark_strbuf_puts(&buf, n);
501
-
502
- cur->as.literal = cmark_chunk_buf_detach(&buf);
503
- } else {
504
- cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text));
505
- cmark_strbuf_init(parser->mem, &text->content, 0);
506
- text->type = (uint16_t) CMARK_NODE_TEXT;
507
-
508
- cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
509
- cmark_strbuf_puts(&buf, "[^");
510
- cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
511
- cmark_strbuf_putc(&buf, ']');
512
-
513
- text->as.literal = cmark_chunk_buf_detach(&buf);
514
- cmark_node_insert_after(cur, text);
515
- cmark_node_free(cur);
516
- }
517
- }
518
- }
519
-
520
- cmark_iter_free(iter);
521
-
522
- if (map->sorted) {
523
- qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
524
- for (unsigned int i = 0; i < map->size; ++i) {
525
- cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
526
- if (!footnote->ix) {
527
- cmark_node_unlink(footnote->node);
528
- continue;
529
- }
530
- cmark_node_append_child(parser->root, footnote->node);
531
- footnote->node = NULL;
532
- }
533
- }
534
-
535
- cmark_unlink_footnotes_map(map);
536
- cmark_map_free(map);
537
- }
538
-
539
- // Attempts to parse a list item marker (bullet or enumerated).
540
- // On success, returns length of the marker, and populates
541
- // data with the details. On failure, returns 0.
542
- static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input,
543
- bufsize_t pos, bool interrupts_paragraph,
544
- cmark_list **dataptr) {
545
- unsigned char c;
546
- bufsize_t startpos;
547
- cmark_list *data;
548
- bufsize_t i;
549
-
550
- startpos = pos;
551
- c = peek_at(input, pos);
552
-
553
- if (c == '*' || c == '-' || c == '+') {
554
- pos++;
555
- if (!cmark_isspace(peek_at(input, pos))) {
556
- return 0;
557
- }
558
-
559
- if (interrupts_paragraph) {
560
- i = pos;
561
- // require non-blank content after list marker:
562
- while (S_is_space_or_tab(peek_at(input, i))) {
563
- i++;
564
- }
565
- if (peek_at(input, i) == '\n') {
566
- return 0;
567
- }
568
- }
569
-
570
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
571
- data->marker_offset = 0; // will be adjusted later
572
- data->list_type = CMARK_BULLET_LIST;
573
- data->bullet_char = c;
574
- data->start = 0;
575
- data->delimiter = CMARK_NO_DELIM;
576
- data->tight = false;
577
- } else if (cmark_isdigit(c)) {
578
- int start = 0;
579
- int digits = 0;
580
-
581
- do {
582
- start = (10 * start) + (peek_at(input, pos) - '0');
583
- pos++;
584
- digits++;
585
- // We limit to 9 digits to avoid overflow,
586
- // assuming max int is 2^31 - 1
587
- // This also seems to be the limit for 'start' in some browsers.
588
- } while (digits < 9 && cmark_isdigit(peek_at(input, pos)));
589
-
590
- if (interrupts_paragraph && start != 1) {
591
- return 0;
592
- }
593
- c = peek_at(input, pos);
594
- if (c == '.' || c == ')') {
595
- pos++;
596
- if (!cmark_isspace(peek_at(input, pos))) {
597
- return 0;
598
- }
599
- if (interrupts_paragraph) {
600
- // require non-blank content after list marker:
601
- i = pos;
602
- while (S_is_space_or_tab(peek_at(input, i))) {
603
- i++;
604
- }
605
- if (S_is_line_end_char(peek_at(input, i))) {
606
- return 0;
607
- }
608
- }
609
-
610
- data = (cmark_list *)mem->calloc(1, sizeof(*data));
611
- data->marker_offset = 0; // will be adjusted later
612
- data->list_type = CMARK_ORDERED_LIST;
613
- data->bullet_char = 0;
614
- data->start = start;
615
- data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
616
- data->tight = false;
617
- } else {
618
- return 0;
619
- }
620
- } else {
621
- return 0;
622
- }
623
-
624
- *dataptr = data;
625
- return (pos - startpos);
626
- }
627
-
628
- // Return 1 if list item belongs in list, else 0.
629
- static int lists_match(cmark_list *list_data, cmark_list *item_data) {
630
- return (list_data->list_type == item_data->list_type &&
631
- list_data->delimiter == item_data->delimiter &&
632
- // list_data->marker_offset == item_data.marker_offset &&
633
- list_data->bullet_char == item_data->bullet_char);
634
- }
635
-
636
- static cmark_node *finalize_document(cmark_parser *parser) {
637
- while (parser->current != parser->root) {
638
- parser->current = finalize(parser, parser->current);
639
- }
640
-
641
- finalize(parser, parser->root);
642
- process_inlines(parser, parser->refmap, parser->options);
643
- if (parser->options & CMARK_OPT_FOOTNOTES)
644
- process_footnotes(parser);
645
-
646
- return parser->root;
647
- }
648
-
649
- cmark_node *cmark_parse_file(FILE *f, int options) {
650
- unsigned char buffer[4096];
651
- cmark_parser *parser = cmark_parser_new(options);
652
- size_t bytes;
653
- cmark_node *document;
654
-
655
- while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
656
- bool eof = bytes < sizeof(buffer);
657
- S_parser_feed(parser, buffer, bytes, eof);
658
- if (eof) {
659
- break;
660
- }
661
- }
662
-
663
- document = cmark_parser_finish(parser);
664
- cmark_parser_free(parser);
665
- return document;
666
- }
667
-
668
- cmark_node *cmark_parse_document(const char *buffer, size_t len, int options) {
669
- cmark_parser *parser = cmark_parser_new(options);
670
- cmark_node *document;
671
-
672
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
673
-
674
- document = cmark_parser_finish(parser);
675
- cmark_parser_free(parser);
676
- return document;
677
- }
678
-
679
- void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
680
- S_parser_feed(parser, (const unsigned char *)buffer, len, false);
681
- }
682
-
683
- void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) {
684
- cmark_strbuf saved_linebuf;
685
-
686
- cmark_strbuf_init(parser->mem, &saved_linebuf, 0);
687
- cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf));
688
- cmark_strbuf_clear(&parser->linebuf);
689
-
690
- S_parser_feed(parser, (const unsigned char *)buffer, len, true);
691
-
692
- cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf));
693
- cmark_strbuf_free(&saved_linebuf);
694
- }
695
-
696
- static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
697
- size_t len, bool eof) {
698
- const unsigned char *end = buffer + len;
699
- static const uint8_t repl[] = {239, 191, 189};
700
-
701
- if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
702
- // skip NL if last buffer ended with CR ; see #117
703
- buffer++;
704
- }
705
- parser->last_buffer_ended_with_cr = false;
706
- while (buffer < end) {
707
- const unsigned char *eol;
708
- bufsize_t chunk_len;
709
- bool process = false;
710
- for (eol = buffer; eol < end; ++eol) {
711
- if (S_is_line_end_char(*eol)) {
712
- process = true;
713
- break;
714
- }
715
- if (*eol == '\0' && eol < end) {
716
- break;
717
- }
718
- }
719
- if (eol >= end && eof) {
720
- process = true;
721
- }
722
-
723
- chunk_len = (bufsize_t)(eol - buffer);
724
- if (process) {
725
- if (parser->linebuf.size > 0) {
726
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
727
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
728
- cmark_strbuf_clear(&parser->linebuf);
729
- } else {
730
- S_process_line(parser, buffer, chunk_len);
731
- }
732
- } else {
733
- if (eol < end && *eol == '\0') {
734
- // omit NULL byte
735
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
736
- // add replacement character
737
- cmark_strbuf_put(&parser->linebuf, repl, 3);
738
- } else {
739
- cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
740
- }
741
- }
742
-
743
- buffer += chunk_len;
744
- if (buffer < end) {
745
- if (*buffer == '\0') {
746
- // skip over NULL
747
- buffer++;
748
- } else {
749
- // skip over line ending characters
750
- if (*buffer == '\r') {
751
- buffer++;
752
- if (buffer == end)
753
- parser->last_buffer_ended_with_cr = true;
754
- }
755
- if (buffer < end && *buffer == '\n')
756
- buffer++;
757
- }
758
- }
759
- }
760
- }
761
-
762
- static void chop_trailing_hashtags(cmark_chunk *ch) {
763
- bufsize_t n, orig_n;
764
-
765
- cmark_chunk_rtrim(ch);
766
- orig_n = n = ch->len - 1;
767
-
768
- // if string ends in space followed by #s, remove these:
769
- while (n >= 0 && peek_at(ch, n) == '#')
770
- n--;
771
-
772
- // Check for a space before the final #s:
773
- if (n != orig_n && n >= 0 && S_is_space_or_tab(peek_at(ch, n))) {
774
- ch->len = n;
775
- cmark_chunk_rtrim(ch);
776
- }
777
- }
778
-
779
- // Check for thematic break. On failure, return 0 and update
780
- // thematic_break_kill_pos with the index at which the
781
- // parse fails. On success, return length of match.
782
- // "...three or more hyphens, asterisks,
783
- // or underscores on a line by themselves. If you wish, you may use
784
- // spaces between the hyphens or asterisks."
785
- static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
786
- bufsize_t offset) {
787
- bufsize_t i;
788
- char c;
789
- char nextc = '\0';
790
- int count;
791
- i = offset;
792
- c = peek_at(input, i);
793
- if (!(c == '*' || c == '_' || c == '-')) {
794
- parser->thematic_break_kill_pos = i;
795
- return 0;
796
- }
797
- count = 1;
798
- while ((nextc = peek_at(input, ++i))) {
799
- if (nextc == c) {
800
- count++;
801
- } else if (nextc != ' ' && nextc != '\t') {
802
- break;
803
- }
804
- }
805
- if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
806
- return (i - offset) + 1;
807
- } else {
808
- parser->thematic_break_kill_pos = i;
809
- return 0;
810
- }
811
- }
812
-
813
- // Find first nonspace character from current offset, setting
814
- // parser->first_nonspace, parser->first_nonspace_column,
815
- // parser->indent, and parser->blank. Does not advance parser->offset.
816
- static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
817
- char c;
818
- int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
819
-
820
- if (parser->first_nonspace <= parser->offset) {
821
- parser->first_nonspace = parser->offset;
822
- parser->first_nonspace_column = parser->column;
823
- while ((c = peek_at(input, parser->first_nonspace))) {
824
- if (c == ' ') {
825
- parser->first_nonspace += 1;
826
- parser->first_nonspace_column += 1;
827
- chars_to_tab = chars_to_tab - 1;
828
- if (chars_to_tab == 0) {
829
- chars_to_tab = TAB_STOP;
830
- }
831
- } else if (c == '\t') {
832
- parser->first_nonspace += 1;
833
- parser->first_nonspace_column += chars_to_tab;
834
- chars_to_tab = TAB_STOP;
835
- } else {
836
- break;
837
- }
838
- }
839
- }
840
-
841
- parser->indent = parser->first_nonspace_column - parser->column;
842
- parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
843
- }
844
-
845
- // Advance parser->offset and parser->column. parser->offset is the
846
- // byte position in input; parser->column is a virtual column number
847
- // that takes into account tabs. (Multibyte characters are not taken
848
- // into account, because the Markdown line prefixes we are interested in
849
- // analyzing are entirely ASCII.) The count parameter indicates
850
- // how far to advance the offset. If columns is true, then count
851
- // indicates a number of columns; otherwise, a number of bytes.
852
- // If advancing a certain number of columns partially consumes
853
- // a tab character, parser->partially_consumed_tab is set to true.
854
- static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
855
- bufsize_t count, bool columns) {
856
- char c;
857
- int chars_to_tab;
858
- int chars_to_advance;
859
- while (count > 0 && (c = peek_at(input, parser->offset))) {
860
- if (c == '\t') {
861
- chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
862
- if (columns) {
863
- parser->partially_consumed_tab = chars_to_tab > count;
864
- chars_to_advance = MIN(count, chars_to_tab);
865
- parser->column += chars_to_advance;
866
- parser->offset += (parser->partially_consumed_tab ? 0 : 1);
867
- count -= chars_to_advance;
868
- } else {
869
- parser->partially_consumed_tab = false;
870
- parser->column += chars_to_tab;
871
- parser->offset += 1;
872
- count -= 1;
873
- }
874
- } else {
875
- parser->partially_consumed_tab = false;
876
- parser->offset += 1;
877
- parser->column += 1; // assume ascii; block starts are ascii
878
- count -= 1;
879
- }
880
- }
881
- }
882
-
883
- static bool S_last_child_is_open(cmark_node *container) {
884
- return container->last_child &&
885
- (container->last_child->flags & CMARK_NODE__OPEN);
886
- }
887
-
888
- static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
889
- bool res = false;
890
- bufsize_t matched = 0;
891
-
892
- matched =
893
- parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
894
- if (matched) {
895
-
896
- S_advance_offset(parser, input, parser->indent + 1, true);
897
-
898
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
899
- S_advance_offset(parser, input, 1, true);
900
- }
901
-
902
- res = true;
903
- }
904
- return res;
905
- }
906
-
907
- static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input,
908
- cmark_node *container) {
909
- if (parser->indent >= 4) {
910
- S_advance_offset(parser, input, 4, true);
911
- return true;
912
- } else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) {
913
- return true;
914
- }
915
-
916
- return false;
917
- }
918
-
919
- static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
920
- cmark_node *container) {
921
- bool res = false;
922
-
923
- if (parser->indent >=
924
- container->as.list.marker_offset + container->as.list.padding) {
925
- S_advance_offset(parser, input, container->as.list.marker_offset +
926
- container->as.list.padding,
927
- true);
928
- res = true;
929
- } else if (parser->blank && container->first_child != NULL) {
930
- // if container->first_child is NULL, then the opening line
931
- // of the list item was blank after the list marker; in this
932
- // case, we are done with the list item.
933
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
934
- false);
935
- res = true;
936
- }
937
- return res;
938
- }
939
-
940
- static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
941
- cmark_node *container,
942
- bool *should_continue) {
943
- bool res = false;
944
-
945
- if (!container->as.code.fenced) { // indented
946
- if (parser->indent >= CODE_INDENT) {
947
- S_advance_offset(parser, input, CODE_INDENT, true);
948
- res = true;
949
- } else if (parser->blank) {
950
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
951
- false);
952
- res = true;
953
- }
954
- } else { // fenced
955
- bufsize_t matched = 0;
956
-
957
- if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) ==
958
- container->as.code.fence_char)) {
959
- matched = scan_close_code_fence(input, parser->first_nonspace);
960
- }
961
-
962
- if (matched >= container->as.code.fence_length) {
963
- // closing fence - and since we're at
964
- // the end of a line, we can stop processing it:
965
- *should_continue = false;
966
- S_advance_offset(parser, input, matched, false);
967
- parser->current = finalize(parser, container);
968
- } else {
969
- // skip opt. spaces of fence parser->offset
970
- int i = container->as.code.fence_offset;
971
-
972
- while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
973
- S_advance_offset(parser, input, 1, true);
974
- i--;
975
- }
976
- res = true;
977
- }
978
- }
979
-
980
- return res;
981
- }
982
-
983
- static bool parse_html_block_prefix(cmark_parser *parser,
984
- cmark_node *container) {
985
- bool res = false;
986
- int html_block_type = container->as.html_block_type;
987
-
988
- assert(html_block_type >= 1 && html_block_type <= 7);
989
- switch (html_block_type) {
990
- case 1:
991
- case 2:
992
- case 3:
993
- case 4:
994
- case 5:
995
- // these types of blocks can accept blanks
996
- res = true;
997
- break;
998
- case 6:
999
- case 7:
1000
- res = !parser->blank;
1001
- break;
1002
- }
1003
-
1004
- return res;
1005
- }
1006
-
1007
- static bool parse_extension_block(cmark_parser *parser,
1008
- cmark_node *container,
1009
- cmark_chunk *input)
1010
- {
1011
- bool res = false;
1012
-
1013
- if (container->extension->last_block_matches) {
1014
- if (container->extension->last_block_matches(
1015
- container->extension, parser, input->data, input->len, container))
1016
- res = true;
1017
- }
1018
-
1019
- return res;
1020
- }
1021
-
1022
- /**
1023
- * For each containing node, try to parse the associated line start.
1024
- *
1025
- * Will not close unmatched blocks, as we may have a lazy continuation
1026
- * line -> http://spec.commonmark.org/0.24/#lazy-continuation-line
1027
- *
1028
- * Returns: The last matching node, or NULL
1029
- */
1030
- static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
1031
- bool *all_matched) {
1032
- bool should_continue = true;
1033
- *all_matched = false;
1034
- cmark_node *container = parser->root;
1035
- cmark_node_type cont_type;
1036
-
1037
- while (S_last_child_is_open(container)) {
1038
- container = container->last_child;
1039
- cont_type = S_type(container);
1040
-
1041
- S_find_first_nonspace(parser, input);
1042
-
1043
- if (container->extension) {
1044
- if (!parse_extension_block(parser, container, input))
1045
- goto done;
1046
- continue;
1047
- }
1048
-
1049
- switch (cont_type) {
1050
- case CMARK_NODE_BLOCK_QUOTE:
1051
- if (!parse_block_quote_prefix(parser, input))
1052
- goto done;
1053
- break;
1054
- case CMARK_NODE_ITEM:
1055
- if (!parse_node_item_prefix(parser, input, container))
1056
- goto done;
1057
- break;
1058
- case CMARK_NODE_CODE_BLOCK:
1059
- if (!parse_code_block_prefix(parser, input, container, &should_continue))
1060
- goto done;
1061
- break;
1062
- case CMARK_NODE_HEADING:
1063
- // a heading can never contain more than one line
1064
- goto done;
1065
- case CMARK_NODE_HTML_BLOCK:
1066
- if (!parse_html_block_prefix(parser, container))
1067
- goto done;
1068
- break;
1069
- case CMARK_NODE_PARAGRAPH:
1070
- if (parser->blank)
1071
- goto done;
1072
- break;
1073
- case CMARK_NODE_FOOTNOTE_DEFINITION:
1074
- if (!parse_footnote_definition_block_prefix(parser, input, container))
1075
- goto done;
1076
- break;
1077
- default:
1078
- break;
1079
- }
1080
- }
1081
-
1082
- *all_matched = true;
1083
-
1084
- done:
1085
- if (!*all_matched) {
1086
- container = container->parent; // back up to last matching node
1087
- }
1088
-
1089
- if (!should_continue) {
1090
- container = NULL;
1091
- }
1092
-
1093
- return container;
1094
- }
1095
-
1096
- static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1097
- cmark_chunk *input, bool all_matched) {
1098
- bool indented;
1099
- cmark_list *data = NULL;
1100
- bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
1101
- cmark_node_type cont_type = S_type(*container);
1102
- bufsize_t matched = 0;
1103
- int lev = 0;
1104
- bool save_partially_consumed_tab;
1105
- bool has_content;
1106
- int save_offset;
1107
- int save_column;
1108
-
1109
- while (cont_type != CMARK_NODE_CODE_BLOCK &&
1110
- cont_type != CMARK_NODE_HTML_BLOCK) {
1111
-
1112
- S_find_first_nonspace(parser, input);
1113
- indented = parser->indent >= CODE_INDENT;
1114
-
1115
- if (!indented && peek_at(input, parser->first_nonspace) == '>') {
1116
-
1117
- bufsize_t blockquote_startpos = parser->first_nonspace;
1118
-
1119
- S_advance_offset(parser, input,
1120
- parser->first_nonspace + 1 - parser->offset, false);
1121
- // optional following character
1122
- if (S_is_space_or_tab(peek_at(input, parser->offset))) {
1123
- S_advance_offset(parser, input, 1, true);
1124
- }
1125
- *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
1126
- blockquote_startpos + 1);
1127
-
1128
- } else if (!indented && (matched = scan_atx_heading_start(
1129
- input, parser->first_nonspace))) {
1130
- bufsize_t hashpos;
1131
- int level = 0;
1132
- bufsize_t heading_startpos = parser->first_nonspace;
1133
-
1134
- S_advance_offset(parser, input,
1135
- parser->first_nonspace + matched - parser->offset,
1136
- false);
1137
- *container = add_child(parser, *container, CMARK_NODE_HEADING,
1138
- heading_startpos + 1);
1139
-
1140
- hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
1141
-
1142
- while (peek_at(input, hashpos) == '#') {
1143
- level++;
1144
- hashpos++;
1145
- }
1146
-
1147
- (*container)->as.heading.level = level;
1148
- (*container)->as.heading.setext = false;
1149
- (*container)->internal_offset = matched;
1150
-
1151
- } else if (!indented && (matched = scan_open_code_fence(
1152
- input, parser->first_nonspace))) {
1153
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1154
- parser->first_nonspace + 1);
1155
- (*container)->as.code.fenced = true;
1156
- (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace);
1157
- (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched;
1158
- (*container)->as.code.fence_offset =
1159
- (int8_t)(parser->first_nonspace - parser->offset);
1160
- (*container)->as.code.info = cmark_chunk_literal("");
1161
- S_advance_offset(parser, input,
1162
- parser->first_nonspace + matched - parser->offset,
1163
- false);
1164
-
1165
- } else if (!indented && ((matched = scan_html_block_start(
1166
- input, parser->first_nonspace)) ||
1167
- (cont_type != CMARK_NODE_PARAGRAPH &&
1168
- (matched = scan_html_block_start_7(
1169
- input, parser->first_nonspace))))) {
1170
- *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK,
1171
- parser->first_nonspace + 1);
1172
- (*container)->as.html_block_type = matched;
1173
- // note, we don't adjust parser->offset because the tag is part of the
1174
- // text
1175
- } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
1176
- (lev =
1177
- scan_setext_heading_line(input, parser->first_nonspace))) {
1178
- // finalize paragraph, resolving reference links
1179
- has_content = resolve_reference_link_definitions(parser, *container);
1180
-
1181
- if (has_content) {
1182
-
1183
- (*container)->type = (uint16_t)CMARK_NODE_HEADING;
1184
- (*container)->as.heading.level = lev;
1185
- (*container)->as.heading.setext = true;
1186
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1187
- }
1188
- } else if (!indented &&
1189
- !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
1190
- (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
1191
- (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
1192
- // it's only now that we know the line is not part of a setext heading:
1193
- *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
1194
- parser->first_nonspace + 1);
1195
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1196
- } else if (!indented &&
1197
- parser->options & CMARK_OPT_FOOTNOTES &&
1198
- (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1199
- cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1200
- cmark_chunk_to_cstr(parser->mem, &c);
1201
-
1202
- while (c.data[c.len - 1] != ']')
1203
- --c.len;
1204
- --c.len;
1205
-
1206
- S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1207
- *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1208
- (*container)->as.literal = c;
1209
-
1210
- (*container)->internal_offset = matched;
1211
- } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1212
- parser->indent < 4 &&
1213
- (matched = parse_list_marker(
1214
- parser->mem, input, parser->first_nonspace,
1215
- (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
1216
-
1217
- // Note that we can have new list items starting with >= 4
1218
- // spaces indent, as long as the list container is still open.
1219
- int i = 0;
1220
-
1221
- // compute padding:
1222
- S_advance_offset(parser, input,
1223
- parser->first_nonspace + matched - parser->offset,
1224
- false);
1225
-
1226
- save_partially_consumed_tab = parser->partially_consumed_tab;
1227
- save_offset = parser->offset;
1228
- save_column = parser->column;
1229
-
1230
- while (parser->column - save_column <= 5 &&
1231
- S_is_space_or_tab(peek_at(input, parser->offset))) {
1232
- S_advance_offset(parser, input, 1, true);
1233
- }
1234
-
1235
- i = parser->column - save_column;
1236
- if (i >= 5 || i < 1 ||
1237
- // only spaces after list marker:
1238
- S_is_line_end_char(peek_at(input, parser->offset))) {
1239
- data->padding = matched + 1;
1240
- parser->offset = save_offset;
1241
- parser->column = save_column;
1242
- parser->partially_consumed_tab = save_partially_consumed_tab;
1243
- if (i > 0) {
1244
- S_advance_offset(parser, input, 1, true);
1245
- }
1246
- } else {
1247
- data->padding = matched + i;
1248
- }
1249
-
1250
- // check container; if it's a list, see if this list item
1251
- // can continue the list; otherwise, create a list container.
1252
-
1253
- data->marker_offset = parser->indent;
1254
-
1255
- if (cont_type != CMARK_NODE_LIST ||
1256
- !lists_match(&((*container)->as.list), data)) {
1257
- *container = add_child(parser, *container, CMARK_NODE_LIST,
1258
- parser->first_nonspace + 1);
1259
-
1260
- memcpy(&((*container)->as.list), data, sizeof(*data));
1261
- }
1262
-
1263
- // add the list item
1264
- *container = add_child(parser, *container, CMARK_NODE_ITEM,
1265
- parser->first_nonspace + 1);
1266
- /* TODO: static */
1267
- memcpy(&((*container)->as.list), data, sizeof(*data));
1268
- parser->mem->free(data);
1269
- } else if (indented && !maybe_lazy && !parser->blank) {
1270
- S_advance_offset(parser, input, CODE_INDENT, true);
1271
- *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
1272
- parser->offset + 1);
1273
- (*container)->as.code.fenced = false;
1274
- (*container)->as.code.fence_char = 0;
1275
- (*container)->as.code.fence_length = 0;
1276
- (*container)->as.code.fence_offset = 0;
1277
- (*container)->as.code.info = cmark_chunk_literal("");
1278
- } else {
1279
- cmark_llist *tmp;
1280
- cmark_node *new_container = NULL;
1281
-
1282
- for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) {
1283
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
1284
-
1285
- if (ext->try_opening_block) {
1286
- new_container = ext->try_opening_block(
1287
- ext, indented, parser, *container, input->data, input->len);
1288
-
1289
- if (new_container) {
1290
- *container = new_container;
1291
- break;
1292
- }
1293
- }
1294
- }
1295
-
1296
- if (!new_container) {
1297
- break;
1298
- }
1299
- }
1300
-
1301
- if (accepts_lines(S_type(*container))) {
1302
- // if it's a line container, it can't contain other containers
1303
- break;
1304
- }
1305
-
1306
- cont_type = S_type(*container);
1307
- maybe_lazy = false;
1308
- }
1309
- }
1310
-
1311
- static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1312
- cmark_node *last_matched_container,
1313
- cmark_chunk *input) {
1314
- cmark_node *tmp;
1315
- // what remains at parser->offset is a text line. add the text to the
1316
- // appropriate container.
1317
-
1318
- S_find_first_nonspace(parser, input);
1319
-
1320
- if (parser->blank && container->last_child)
1321
- S_set_last_line_blank(container->last_child, true);
1322
-
1323
- // block quote lines are never blank as they start with >
1324
- // and we don't count blanks in fenced code for purposes of tight/loose
1325
- // lists or breaking out of lists. we also don't set last_line_blank
1326
- // on an empty list item.
1327
- const cmark_node_type ctype = S_type(container);
1328
- const bool last_line_blank =
1329
- (parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1330
- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1331
- !(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
1332
- !(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
1333
- container->start_line == parser->line_number));
1334
-
1335
- S_set_last_line_blank(container, last_line_blank);
1336
-
1337
- tmp = container;
1338
- while (tmp->parent) {
1339
- S_set_last_line_blank(tmp->parent, false);
1340
- tmp = tmp->parent;
1341
- }
1342
-
1343
- // If the last line processed belonged to a paragraph node,
1344
- // and we didn't match all of the line prefixes for the open containers,
1345
- // and we didn't start any new containers,
1346
- // and the line isn't blank,
1347
- // then treat this as a "lazy continuation line" and add it to
1348
- // the open paragraph.
1349
- if (parser->current != last_matched_container &&
1350
- container == last_matched_container && !parser->blank &&
1351
- S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
1352
- add_line(parser->current, input, parser);
1353
- } else { // not a lazy continuation
1354
- // Finalize any blocks that were not matched and set cur to container:
1355
- while (parser->current != last_matched_container) {
1356
- parser->current = finalize(parser, parser->current);
1357
- assert(parser->current != NULL);
1358
- }
1359
-
1360
- if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
1361
- add_line(container, input, parser);
1362
- } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
1363
- add_line(container, input, parser);
1364
-
1365
- int matches_end_condition;
1366
- switch (container->as.html_block_type) {
1367
- case 1:
1368
- // </script>, </style>, </pre>
1369
- matches_end_condition =
1370
- scan_html_block_end_1(input, parser->first_nonspace);
1371
- break;
1372
- case 2:
1373
- // -->
1374
- matches_end_condition =
1375
- scan_html_block_end_2(input, parser->first_nonspace);
1376
- break;
1377
- case 3:
1378
- // ?>
1379
- matches_end_condition =
1380
- scan_html_block_end_3(input, parser->first_nonspace);
1381
- break;
1382
- case 4:
1383
- // >
1384
- matches_end_condition =
1385
- scan_html_block_end_4(input, parser->first_nonspace);
1386
- break;
1387
- case 5:
1388
- // ]]>
1389
- matches_end_condition =
1390
- scan_html_block_end_5(input, parser->first_nonspace);
1391
- break;
1392
- default:
1393
- matches_end_condition = 0;
1394
- break;
1395
- }
1396
-
1397
- if (matches_end_condition) {
1398
- container = finalize(parser, container);
1399
- assert(parser->current != NULL);
1400
- }
1401
- } else if (parser->blank) {
1402
- // ??? do nothing
1403
- } else if (accepts_lines(S_type(container))) {
1404
- if (S_type(container) == CMARK_NODE_HEADING &&
1405
- container->as.heading.setext == false) {
1406
- chop_trailing_hashtags(input);
1407
- }
1408
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1409
- false);
1410
- add_line(container, input, parser);
1411
- } else {
1412
- // create paragraph container for line
1413
- container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1414
- parser->first_nonspace + 1);
1415
- S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1416
- false);
1417
- add_line(container, input, parser);
1418
- }
1419
-
1420
- parser->current = container;
1421
- }
1422
- }
1423
-
1424
- /* See http://spec.commonmark.org/0.24/#phase-1-block-structure */
1425
- static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1426
- bufsize_t bytes) {
1427
- cmark_node *last_matched_container;
1428
- bool all_matched = true;
1429
- cmark_node *container;
1430
- cmark_chunk input;
1431
- cmark_node *current;
1432
-
1433
- cmark_strbuf_clear(&parser->curline);
1434
-
1435
- if (parser->options & CMARK_OPT_VALIDATE_UTF8)
1436
- cmark_utf8proc_check(&parser->curline, buffer, bytes);
1437
- else
1438
- cmark_strbuf_put(&parser->curline, buffer, bytes);
1439
-
1440
- bytes = parser->curline.size;
1441
-
1442
- // ensure line ends with a newline:
1443
- if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))
1444
- cmark_strbuf_putc(&parser->curline, '\n');
1445
-
1446
- parser->offset = 0;
1447
- parser->column = 0;
1448
- parser->first_nonspace = 0;
1449
- parser->first_nonspace_column = 0;
1450
- parser->thematic_break_kill_pos = 0;
1451
- parser->indent = 0;
1452
- parser->blank = false;
1453
- parser->partially_consumed_tab = false;
1454
-
1455
- input.data = parser->curline.ptr;
1456
- input.len = parser->curline.size;
1457
- input.alloc = 0;
1458
-
1459
- // Skip UTF-8 BOM.
1460
- if (parser->line_number == 0 &&
1461
- input.len >= 3 &&
1462
- memcmp(input.data, "\xef\xbb\xbf", 3) == 0)
1463
- parser->offset += 3;
1464
-
1465
- parser->line_number++;
1466
-
1467
- last_matched_container = check_open_blocks(parser, &input, &all_matched);
1468
-
1469
- if (!last_matched_container)
1470
- goto finished;
1471
-
1472
- container = last_matched_container;
1473
-
1474
- current = parser->current;
1475
-
1476
- open_new_blocks(parser, &container, &input, all_matched);
1477
-
1478
- /* parser->current might have changed if feed_reentrant was called */
1479
- if (current == parser->current)
1480
- add_text_to_container(parser, container, last_matched_container, &input);
1481
-
1482
- finished:
1483
- parser->last_line_length = input.len;
1484
- if (parser->last_line_length &&
1485
- input.data[parser->last_line_length - 1] == '\n')
1486
- parser->last_line_length -= 1;
1487
- if (parser->last_line_length &&
1488
- input.data[parser->last_line_length - 1] == '\r')
1489
- parser->last_line_length -= 1;
1490
-
1491
- cmark_strbuf_clear(&parser->curline);
1492
- }
1493
-
1494
- cmark_node *cmark_parser_finish(cmark_parser *parser) {
1495
- cmark_node *res;
1496
- cmark_llist *extensions;
1497
-
1498
- /* Parser was already finished once */
1499
- if (parser->root == NULL)
1500
- return NULL;
1501
-
1502
- if (parser->linebuf.size) {
1503
- S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
1504
- cmark_strbuf_clear(&parser->linebuf);
1505
- }
1506
-
1507
- finalize_document(parser);
1508
-
1509
- cmark_consolidate_text_nodes(parser->root);
1510
-
1511
- cmark_strbuf_free(&parser->curline);
1512
- cmark_strbuf_free(&parser->linebuf);
1513
-
1514
- #if CMARK_DEBUG_NODES
1515
- if (cmark_node_check(parser->root, stderr)) {
1516
- abort();
1517
- }
1518
- #endif
1519
-
1520
- for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) {
1521
- cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data;
1522
- if (ext->postprocess_func) {
1523
- cmark_node *processed = ext->postprocess_func(ext, parser, parser->root);
1524
- if (processed)
1525
- parser->root = processed;
1526
- }
1527
- }
1528
-
1529
- res = parser->root;
1530
- parser->root = NULL;
1531
-
1532
- cmark_parser_reset(parser);
1533
-
1534
- return res;
1535
- }
1536
-
1537
- int cmark_parser_get_line_number(cmark_parser *parser) {
1538
- return parser->line_number;
1539
- }
1540
-
1541
- bufsize_t cmark_parser_get_offset(cmark_parser *parser) {
1542
- return parser->offset;
1543
- }
1544
-
1545
- bufsize_t cmark_parser_get_column(cmark_parser *parser) {
1546
- return parser->column;
1547
- }
1548
-
1549
- int cmark_parser_get_first_nonspace(cmark_parser *parser) {
1550
- return parser->first_nonspace;
1551
- }
1552
-
1553
- int cmark_parser_get_first_nonspace_column(cmark_parser *parser) {
1554
- return parser->first_nonspace_column;
1555
- }
1556
-
1557
- int cmark_parser_get_indent(cmark_parser *parser) {
1558
- return parser->indent;
1559
- }
1560
-
1561
- int cmark_parser_is_blank(cmark_parser *parser) {
1562
- return parser->blank;
1563
- }
1564
-
1565
- int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) {
1566
- return parser->partially_consumed_tab;
1567
- }
1568
-
1569
- int cmark_parser_get_last_line_length(cmark_parser *parser) {
1570
- return parser->last_line_length;
1571
- }
1572
-
1573
- cmark_node *cmark_parser_add_child(cmark_parser *parser,
1574
- cmark_node *parent,
1575
- cmark_node_type block_type,
1576
- int start_column) {
1577
- return add_child(parser, parent, block_type, start_column);
1578
- }
1579
-
1580
- void cmark_parser_advance_offset(cmark_parser *parser,
1581
- const char *input,
1582
- int count,
1583
- int columns) {
1584
- cmark_chunk input_chunk = cmark_chunk_literal(input);
1585
-
1586
- S_advance_offset(parser, &input_chunk, count, columns != 0);
1587
- }
1588
-
1589
- void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
1590
- cmark_ispunct_func func) {
1591
- parser->backslash_ispunct = func;
1592
- }
1593
-
1594
- cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) {
1595
- return parser->syntax_extensions;
1596
- }