commonmarker 0.23.10 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +1156 -0
- data/Cargo.toml +7 -0
- data/README.md +237 -172
- data/ext/commonmarker/Cargo.toml +20 -0
- data/ext/commonmarker/extconf.rb +3 -6
- data/ext/commonmarker/src/lib.rs +103 -0
- data/ext/commonmarker/src/node.rs +1221 -0
- data/ext/commonmarker/src/options.rs +220 -0
- data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
- data/ext/commonmarker/src/plugins.rs +6 -0
- data/ext/commonmarker/src/utils.rs +8 -0
- data/lib/commonmarker/config.rb +92 -40
- data/lib/commonmarker/constants.rb +7 -0
- data/lib/commonmarker/extension.rb +14 -0
- data/lib/commonmarker/node/ast.rb +8 -0
- data/lib/commonmarker/node/inspect.rb +14 -4
- data/lib/commonmarker/node.rb +29 -47
- data/lib/commonmarker/renderer.rb +1 -127
- data/lib/commonmarker/utils.rb +22 -0
- data/lib/commonmarker/version.rb +2 -2
- data/lib/commonmarker.rb +27 -25
- metadata +38 -191
- data/Rakefile +0 -109
- data/bin/commonmarker +0 -118
- data/commonmarker.gemspec +0 -38
- data/ext/commonmarker/arena.c +0 -104
- data/ext/commonmarker/autolink.c +0 -508
- data/ext/commonmarker/autolink.h +0 -8
- data/ext/commonmarker/blocks.c +0 -1622
- data/ext/commonmarker/buffer.c +0 -278
- data/ext/commonmarker/buffer.h +0 -116
- data/ext/commonmarker/case_fold_switch.inc +0 -4327
- data/ext/commonmarker/chunk.h +0 -135
- data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
- data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
- data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
- data/ext/commonmarker/cmark-gfm.h +0 -833
- data/ext/commonmarker/cmark-gfm_export.h +0 -42
- data/ext/commonmarker/cmark-gfm_version.h +0 -7
- data/ext/commonmarker/cmark.c +0 -55
- data/ext/commonmarker/cmark_ctype.c +0 -44
- data/ext/commonmarker/cmark_ctype.h +0 -33
- data/ext/commonmarker/commonmark.c +0 -514
- data/ext/commonmarker/commonmarker.c +0 -1308
- data/ext/commonmarker/commonmarker.h +0 -16
- data/ext/commonmarker/config.h +0 -76
- data/ext/commonmarker/core-extensions.c +0 -27
- data/ext/commonmarker/entities.inc +0 -2138
- data/ext/commonmarker/ext_scanners.c +0 -879
- data/ext/commonmarker/ext_scanners.h +0 -24
- data/ext/commonmarker/footnotes.c +0 -63
- data/ext/commonmarker/footnotes.h +0 -27
- data/ext/commonmarker/houdini.h +0 -57
- data/ext/commonmarker/houdini_href_e.c +0 -100
- data/ext/commonmarker/houdini_html_e.c +0 -66
- data/ext/commonmarker/houdini_html_u.c +0 -149
- data/ext/commonmarker/html.c +0 -502
- data/ext/commonmarker/html.h +0 -27
- data/ext/commonmarker/inlines.c +0 -1788
- data/ext/commonmarker/inlines.h +0 -29
- data/ext/commonmarker/iterator.c +0 -159
- data/ext/commonmarker/iterator.h +0 -26
- data/ext/commonmarker/latex.c +0 -468
- data/ext/commonmarker/linked_list.c +0 -37
- data/ext/commonmarker/man.c +0 -274
- data/ext/commonmarker/map.c +0 -129
- data/ext/commonmarker/map.h +0 -44
- data/ext/commonmarker/node.c +0 -1045
- data/ext/commonmarker/node.h +0 -167
- data/ext/commonmarker/parser.h +0 -59
- data/ext/commonmarker/plaintext.c +0 -218
- data/ext/commonmarker/plugin.c +0 -36
- data/ext/commonmarker/plugin.h +0 -34
- data/ext/commonmarker/references.c +0 -43
- data/ext/commonmarker/references.h +0 -26
- data/ext/commonmarker/registry.c +0 -63
- data/ext/commonmarker/registry.h +0 -24
- data/ext/commonmarker/render.c +0 -213
- data/ext/commonmarker/render.h +0 -62
- data/ext/commonmarker/scanners.c +0 -14056
- data/ext/commonmarker/scanners.h +0 -70
- data/ext/commonmarker/scanners.re +0 -341
- data/ext/commonmarker/strikethrough.c +0 -167
- data/ext/commonmarker/strikethrough.h +0 -9
- data/ext/commonmarker/syntax_extension.c +0 -149
- data/ext/commonmarker/syntax_extension.h +0 -34
- data/ext/commonmarker/table.c +0 -917
- data/ext/commonmarker/table.h +0 -12
- data/ext/commonmarker/tagfilter.c +0 -60
- data/ext/commonmarker/tagfilter.h +0 -8
- data/ext/commonmarker/tasklist.c +0 -156
- data/ext/commonmarker/tasklist.h +0 -8
- data/ext/commonmarker/utf8.c +0 -317
- data/ext/commonmarker/utf8.h +0 -35
- data/ext/commonmarker/xml.c +0 -182
- data/lib/commonmarker/renderer/html_renderer.rb +0 -256
data/ext/commonmarker/blocks.c
DELETED
@@ -1,1622 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Block parsing implementation.
|
3
|
-
*
|
4
|
-
* For a high-level overview of the block parsing process,
|
5
|
-
* see http://spec.commonmark.org/0.24/#phase-1-block-structure
|
6
|
-
*/
|
7
|
-
|
8
|
-
#include <stdlib.h>
|
9
|
-
#include <assert.h>
|
10
|
-
#include <stdio.h>
|
11
|
-
#include <limits.h>
|
12
|
-
|
13
|
-
#include "cmark_ctype.h"
|
14
|
-
#include "syntax_extension.h"
|
15
|
-
#include "config.h"
|
16
|
-
#include "parser.h"
|
17
|
-
#include "cmark-gfm.h"
|
18
|
-
#include "node.h"
|
19
|
-
#include "references.h"
|
20
|
-
#include "utf8.h"
|
21
|
-
#include "scanners.h"
|
22
|
-
#include "inlines.h"
|
23
|
-
#include "houdini.h"
|
24
|
-
#include "buffer.h"
|
25
|
-
#include "footnotes.h"
|
26
|
-
|
27
|
-
#define CODE_INDENT 4
|
28
|
-
#define TAB_STOP 4
|
29
|
-
|
30
|
-
/**
|
31
|
-
* Very deeply nested lists can cause quadratic performance issues.
|
32
|
-
* This constant is used in open_new_blocks() to limit the nesting
|
33
|
-
* depth. It is unlikely that a non-contrived markdown document will
|
34
|
-
* be nested this deeply.
|
35
|
-
*/
|
36
|
-
#define MAX_LIST_DEPTH 100
|
37
|
-
|
38
|
-
#ifndef MIN
|
39
|
-
#define MIN(x, y) ((x < y) ? x : y)
|
40
|
-
#endif
|
41
|
-
|
42
|
-
#define peek_at(i, n) (i)->data[n]
|
43
|
-
|
44
|
-
static bool S_last_line_blank(const cmark_node *node) {
|
45
|
-
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
|
46
|
-
}
|
47
|
-
|
48
|
-
static bool S_last_line_checked(const cmark_node *node) {
|
49
|
-
return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
|
50
|
-
}
|
51
|
-
|
52
|
-
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
|
53
|
-
return (cmark_node_type)node->type;
|
54
|
-
}
|
55
|
-
|
56
|
-
static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
|
57
|
-
if (is_blank)
|
58
|
-
node->flags |= CMARK_NODE__LAST_LINE_BLANK;
|
59
|
-
else
|
60
|
-
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
|
61
|
-
}
|
62
|
-
|
63
|
-
static void S_set_last_line_checked(cmark_node *node) {
|
64
|
-
node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
|
65
|
-
}
|
66
|
-
|
67
|
-
static CMARK_INLINE bool S_is_line_end_char(char c) {
|
68
|
-
return (c == '\n' || c == '\r');
|
69
|
-
}
|
70
|
-
|
71
|
-
static CMARK_INLINE bool S_is_space_or_tab(char c) {
|
72
|
-
return (c == ' ' || c == '\t');
|
73
|
-
}
|
74
|
-
|
75
|
-
static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
|
76
|
-
size_t len, bool eof);
|
77
|
-
|
78
|
-
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
|
79
|
-
bufsize_t bytes);
|
80
|
-
|
81
|
-
static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
|
82
|
-
int start_line, int start_column) {
|
83
|
-
cmark_node *e;
|
84
|
-
|
85
|
-
e = (cmark_node *)mem->calloc(1, sizeof(*e));
|
86
|
-
cmark_strbuf_init(mem, &e->content, 32);
|
87
|
-
e->type = (uint16_t)tag;
|
88
|
-
e->flags = CMARK_NODE__OPEN;
|
89
|
-
e->start_line = start_line;
|
90
|
-
e->start_column = start_column;
|
91
|
-
e->end_line = start_line;
|
92
|
-
|
93
|
-
return e;
|
94
|
-
}
|
95
|
-
|
96
|
-
// Create a root document node.
|
97
|
-
static cmark_node *make_document(cmark_mem *mem) {
|
98
|
-
cmark_node *e = make_block(mem, CMARK_NODE_DOCUMENT, 1, 1);
|
99
|
-
return e;
|
100
|
-
}
|
101
|
-
|
102
|
-
int cmark_parser_attach_syntax_extension(cmark_parser *parser,
|
103
|
-
cmark_syntax_extension *extension) {
|
104
|
-
parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension);
|
105
|
-
if (extension->match_inline || extension->insert_inline_from_delim) {
|
106
|
-
parser->inline_syntax_extensions = cmark_llist_append(
|
107
|
-
parser->mem, parser->inline_syntax_extensions, extension);
|
108
|
-
}
|
109
|
-
|
110
|
-
return 1;
|
111
|
-
}
|
112
|
-
|
113
|
-
static void cmark_parser_dispose(cmark_parser *parser) {
|
114
|
-
if (parser->root)
|
115
|
-
cmark_node_free(parser->root);
|
116
|
-
|
117
|
-
if (parser->refmap)
|
118
|
-
cmark_map_free(parser->refmap);
|
119
|
-
}
|
120
|
-
|
121
|
-
static void cmark_parser_reset(cmark_parser *parser) {
|
122
|
-
cmark_llist *saved_exts = parser->syntax_extensions;
|
123
|
-
cmark_llist *saved_inline_exts = parser->inline_syntax_extensions;
|
124
|
-
int saved_options = parser->options;
|
125
|
-
cmark_mem *saved_mem = parser->mem;
|
126
|
-
|
127
|
-
cmark_parser_dispose(parser);
|
128
|
-
|
129
|
-
memset(parser, 0, sizeof(cmark_parser));
|
130
|
-
parser->mem = saved_mem;
|
131
|
-
|
132
|
-
cmark_strbuf_init(parser->mem, &parser->curline, 256);
|
133
|
-
cmark_strbuf_init(parser->mem, &parser->linebuf, 0);
|
134
|
-
|
135
|
-
cmark_node *document = make_document(parser->mem);
|
136
|
-
|
137
|
-
parser->refmap = cmark_reference_map_new(parser->mem);
|
138
|
-
parser->root = document;
|
139
|
-
parser->current = document;
|
140
|
-
|
141
|
-
parser->syntax_extensions = saved_exts;
|
142
|
-
parser->inline_syntax_extensions = saved_inline_exts;
|
143
|
-
parser->options = saved_options;
|
144
|
-
}
|
145
|
-
|
146
|
-
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
|
147
|
-
cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser));
|
148
|
-
parser->mem = mem;
|
149
|
-
parser->options = options;
|
150
|
-
cmark_parser_reset(parser);
|
151
|
-
return parser;
|
152
|
-
}
|
153
|
-
|
154
|
-
cmark_parser *cmark_parser_new(int options) {
|
155
|
-
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
|
156
|
-
return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
|
157
|
-
}
|
158
|
-
|
159
|
-
void cmark_parser_free(cmark_parser *parser) {
|
160
|
-
cmark_mem *mem = parser->mem;
|
161
|
-
cmark_parser_dispose(parser);
|
162
|
-
cmark_strbuf_free(&parser->curline);
|
163
|
-
cmark_strbuf_free(&parser->linebuf);
|
164
|
-
cmark_llist_free(parser->mem, parser->syntax_extensions);
|
165
|
-
cmark_llist_free(parser->mem, parser->inline_syntax_extensions);
|
166
|
-
mem->free(parser);
|
167
|
-
}
|
168
|
-
|
169
|
-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
|
170
|
-
|
171
|
-
// Returns true if line has only space characters, else false.
|
172
|
-
static bool is_blank(cmark_strbuf *s, bufsize_t offset) {
|
173
|
-
while (offset < s->size) {
|
174
|
-
switch (s->ptr[offset]) {
|
175
|
-
case '\r':
|
176
|
-
case '\n':
|
177
|
-
return true;
|
178
|
-
case ' ':
|
179
|
-
offset++;
|
180
|
-
break;
|
181
|
-
case '\t':
|
182
|
-
offset++;
|
183
|
-
break;
|
184
|
-
default:
|
185
|
-
return false;
|
186
|
-
}
|
187
|
-
}
|
188
|
-
|
189
|
-
return true;
|
190
|
-
}
|
191
|
-
|
192
|
-
static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) {
|
193
|
-
return (block_type == CMARK_NODE_PARAGRAPH ||
|
194
|
-
block_type == CMARK_NODE_HEADING ||
|
195
|
-
block_type == CMARK_NODE_CODE_BLOCK);
|
196
|
-
}
|
197
|
-
|
198
|
-
static CMARK_INLINE bool contains_inlines(cmark_node *node) {
|
199
|
-
if (node->extension && node->extension->contains_inlines_func) {
|
200
|
-
return node->extension->contains_inlines_func(node->extension, node) != 0;
|
201
|
-
}
|
202
|
-
|
203
|
-
return (node->type == CMARK_NODE_PARAGRAPH ||
|
204
|
-
node->type == CMARK_NODE_HEADING);
|
205
|
-
}
|
206
|
-
|
207
|
-
static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
|
208
|
-
int chars_to_tab;
|
209
|
-
int i;
|
210
|
-
assert(node->flags & CMARK_NODE__OPEN);
|
211
|
-
if (parser->partially_consumed_tab) {
|
212
|
-
parser->offset += 1; // skip over tab
|
213
|
-
// add space characters:
|
214
|
-
chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
|
215
|
-
for (i = 0; i < chars_to_tab; i++) {
|
216
|
-
cmark_strbuf_putc(&node->content, ' ');
|
217
|
-
}
|
218
|
-
}
|
219
|
-
cmark_strbuf_put(&node->content, ch->data + parser->offset,
|
220
|
-
ch->len - parser->offset);
|
221
|
-
}
|
222
|
-
|
223
|
-
static void remove_trailing_blank_lines(cmark_strbuf *ln) {
|
224
|
-
bufsize_t i;
|
225
|
-
unsigned char c;
|
226
|
-
|
227
|
-
for (i = ln->size - 1; i >= 0; --i) {
|
228
|
-
c = ln->ptr[i];
|
229
|
-
|
230
|
-
if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
|
231
|
-
break;
|
232
|
-
}
|
233
|
-
|
234
|
-
if (i < 0) {
|
235
|
-
cmark_strbuf_clear(ln);
|
236
|
-
return;
|
237
|
-
}
|
238
|
-
|
239
|
-
for (; i < ln->size; ++i) {
|
240
|
-
c = ln->ptr[i];
|
241
|
-
|
242
|
-
if (!S_is_line_end_char(c))
|
243
|
-
continue;
|
244
|
-
|
245
|
-
cmark_strbuf_truncate(ln, i);
|
246
|
-
break;
|
247
|
-
}
|
248
|
-
}
|
249
|
-
|
250
|
-
// Check to see if a node ends with a blank line, descending
|
251
|
-
// if needed into lists and sublists.
|
252
|
-
static bool S_ends_with_blank_line(cmark_node *node) {
|
253
|
-
if (S_last_line_checked(node)) {
|
254
|
-
return(S_last_line_blank(node));
|
255
|
-
} else if ((S_type(node) == CMARK_NODE_LIST ||
|
256
|
-
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
|
257
|
-
S_set_last_line_checked(node);
|
258
|
-
return(S_ends_with_blank_line(node->last_child));
|
259
|
-
} else {
|
260
|
-
S_set_last_line_checked(node);
|
261
|
-
return (S_last_line_blank(node));
|
262
|
-
}
|
263
|
-
}
|
264
|
-
|
265
|
-
// returns true if content remains after link defs are resolved.
|
266
|
-
static bool resolve_reference_link_definitions(
|
267
|
-
cmark_parser *parser,
|
268
|
-
cmark_node *b) {
|
269
|
-
bufsize_t pos;
|
270
|
-
cmark_strbuf *node_content = &b->content;
|
271
|
-
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
|
272
|
-
while (chunk.len && chunk.data[0] == '[' &&
|
273
|
-
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
|
274
|
-
parser->refmap))) {
|
275
|
-
|
276
|
-
chunk.data += pos;
|
277
|
-
chunk.len -= pos;
|
278
|
-
}
|
279
|
-
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
|
280
|
-
return !is_blank(&b->content, 0);
|
281
|
-
}
|
282
|
-
|
283
|
-
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
|
284
|
-
bufsize_t pos;
|
285
|
-
cmark_node *item;
|
286
|
-
cmark_node *subitem;
|
287
|
-
cmark_node *parent;
|
288
|
-
bool has_content;
|
289
|
-
|
290
|
-
parent = b->parent;
|
291
|
-
assert(b->flags &
|
292
|
-
CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
|
293
|
-
b->flags &= ~CMARK_NODE__OPEN;
|
294
|
-
|
295
|
-
if (parser->curline.size == 0) {
|
296
|
-
// end of input - line number has not been incremented
|
297
|
-
b->end_line = parser->line_number;
|
298
|
-
b->end_column = parser->last_line_length;
|
299
|
-
} else if (S_type(b) == CMARK_NODE_DOCUMENT ||
|
300
|
-
(S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
|
301
|
-
(S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
|
302
|
-
b->end_line = parser->line_number;
|
303
|
-
b->end_column = parser->curline.size;
|
304
|
-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
|
305
|
-
b->end_column -= 1;
|
306
|
-
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r')
|
307
|
-
b->end_column -= 1;
|
308
|
-
} else {
|
309
|
-
b->end_line = parser->line_number - 1;
|
310
|
-
b->end_column = parser->last_line_length;
|
311
|
-
}
|
312
|
-
|
313
|
-
cmark_strbuf *node_content = &b->content;
|
314
|
-
|
315
|
-
switch (S_type(b)) {
|
316
|
-
case CMARK_NODE_PARAGRAPH:
|
317
|
-
{
|
318
|
-
has_content = resolve_reference_link_definitions(parser, b);
|
319
|
-
if (!has_content) {
|
320
|
-
// remove blank node (former reference def)
|
321
|
-
cmark_node_free(b);
|
322
|
-
}
|
323
|
-
break;
|
324
|
-
}
|
325
|
-
|
326
|
-
case CMARK_NODE_CODE_BLOCK:
|
327
|
-
if (!b->as.code.fenced) { // indented code
|
328
|
-
remove_trailing_blank_lines(node_content);
|
329
|
-
cmark_strbuf_putc(node_content, '\n');
|
330
|
-
} else {
|
331
|
-
// first line of contents becomes info
|
332
|
-
for (pos = 0; pos < node_content->size; ++pos) {
|
333
|
-
if (S_is_line_end_char(node_content->ptr[pos]))
|
334
|
-
break;
|
335
|
-
}
|
336
|
-
assert(pos < node_content->size);
|
337
|
-
|
338
|
-
cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
|
339
|
-
houdini_unescape_html_f(&tmp, node_content->ptr, pos);
|
340
|
-
cmark_strbuf_trim(&tmp);
|
341
|
-
cmark_strbuf_unescape(&tmp);
|
342
|
-
b->as.code.info = cmark_chunk_buf_detach(&tmp);
|
343
|
-
|
344
|
-
if (node_content->ptr[pos] == '\r')
|
345
|
-
pos += 1;
|
346
|
-
if (node_content->ptr[pos] == '\n')
|
347
|
-
pos += 1;
|
348
|
-
cmark_strbuf_drop(node_content, pos);
|
349
|
-
}
|
350
|
-
b->as.code.literal = cmark_chunk_buf_detach(node_content);
|
351
|
-
break;
|
352
|
-
|
353
|
-
case CMARK_NODE_HTML_BLOCK:
|
354
|
-
b->as.literal = cmark_chunk_buf_detach(node_content);
|
355
|
-
break;
|
356
|
-
|
357
|
-
case CMARK_NODE_LIST: // determine tight/loose status
|
358
|
-
b->as.list.tight = true; // tight by default
|
359
|
-
item = b->first_child;
|
360
|
-
|
361
|
-
while (item) {
|
362
|
-
// check for non-final non-empty list item ending with blank line:
|
363
|
-
if (S_last_line_blank(item) && item->next) {
|
364
|
-
b->as.list.tight = false;
|
365
|
-
break;
|
366
|
-
}
|
367
|
-
// recurse into children of list item, to see if there are
|
368
|
-
// spaces between them:
|
369
|
-
subitem = item->first_child;
|
370
|
-
while (subitem) {
|
371
|
-
if ((item->next || subitem->next) &&
|
372
|
-
S_ends_with_blank_line(subitem)) {
|
373
|
-
b->as.list.tight = false;
|
374
|
-
break;
|
375
|
-
}
|
376
|
-
subitem = subitem->next;
|
377
|
-
}
|
378
|
-
if (!(b->as.list.tight)) {
|
379
|
-
break;
|
380
|
-
}
|
381
|
-
item = item->next;
|
382
|
-
}
|
383
|
-
|
384
|
-
break;
|
385
|
-
|
386
|
-
default:
|
387
|
-
break;
|
388
|
-
}
|
389
|
-
|
390
|
-
return parent;
|
391
|
-
}
|
392
|
-
|
393
|
-
// Add a node as child of another. Return pointer to child.
|
394
|
-
static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
|
395
|
-
cmark_node_type block_type, int start_column) {
|
396
|
-
assert(parent);
|
397
|
-
|
398
|
-
// if 'parent' isn't the kind of node that can accept this child,
|
399
|
-
// then back up til we hit a node that can.
|
400
|
-
while (!cmark_node_can_contain_type(parent, block_type)) {
|
401
|
-
parent = finalize(parser, parent);
|
402
|
-
}
|
403
|
-
|
404
|
-
cmark_node *child =
|
405
|
-
make_block(parser->mem, block_type, parser->line_number, start_column);
|
406
|
-
child->parent = parent;
|
407
|
-
|
408
|
-
if (parent->last_child) {
|
409
|
-
parent->last_child->next = child;
|
410
|
-
child->prev = parent->last_child;
|
411
|
-
} else {
|
412
|
-
parent->first_child = child;
|
413
|
-
child->prev = NULL;
|
414
|
-
}
|
415
|
-
parent->last_child = child;
|
416
|
-
return child;
|
417
|
-
}
|
418
|
-
|
419
|
-
void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
|
420
|
-
cmark_llist *tmp_ext;
|
421
|
-
|
422
|
-
for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
|
423
|
-
cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
|
424
|
-
cmark_llist *tmp_char;
|
425
|
-
for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
|
426
|
-
unsigned char c = (unsigned char)(size_t)tmp_char->data;
|
427
|
-
if (add)
|
428
|
-
cmark_inlines_add_special_character(c, ext->emphasis);
|
429
|
-
else
|
430
|
-
cmark_inlines_remove_special_character(c, ext->emphasis);
|
431
|
-
}
|
432
|
-
}
|
433
|
-
}
|
434
|
-
|
435
|
-
// Walk through node and all children, recursively, parsing
|
436
|
-
// string content into inline content where appropriate.
|
437
|
-
static void process_inlines(cmark_parser *parser,
|
438
|
-
cmark_map *refmap, int options) {
|
439
|
-
cmark_iter *iter = cmark_iter_new(parser->root);
|
440
|
-
cmark_node *cur;
|
441
|
-
cmark_event_type ev_type;
|
442
|
-
|
443
|
-
cmark_manage_extensions_special_characters(parser, true);
|
444
|
-
|
445
|
-
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
446
|
-
cur = cmark_iter_get_node(iter);
|
447
|
-
if (ev_type == CMARK_EVENT_ENTER) {
|
448
|
-
if (contains_inlines(cur)) {
|
449
|
-
cmark_parse_inlines(parser, cur, refmap, options);
|
450
|
-
}
|
451
|
-
}
|
452
|
-
}
|
453
|
-
|
454
|
-
cmark_manage_extensions_special_characters(parser, false);
|
455
|
-
|
456
|
-
cmark_iter_free(iter);
|
457
|
-
}
|
458
|
-
|
459
|
-
static int sort_footnote_by_ix(const void *_a, const void *_b) {
|
460
|
-
cmark_footnote *a = *(cmark_footnote **)_a;
|
461
|
-
cmark_footnote *b = *(cmark_footnote **)_b;
|
462
|
-
return (int)a->ix - (int)b->ix;
|
463
|
-
}
|
464
|
-
|
465
|
-
static void process_footnotes(cmark_parser *parser) {
|
466
|
-
// * Collect definitions in a map.
|
467
|
-
// * Iterate the references in the document in order, assigning indices to
|
468
|
-
// definitions in the order they're seen.
|
469
|
-
// * Write out the footnotes at the bottom of the document in index order.
|
470
|
-
|
471
|
-
cmark_map *map = cmark_footnote_map_new(parser->mem);
|
472
|
-
|
473
|
-
cmark_iter *iter = cmark_iter_new(parser->root);
|
474
|
-
cmark_node *cur;
|
475
|
-
cmark_event_type ev_type;
|
476
|
-
|
477
|
-
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
478
|
-
cur = cmark_iter_get_node(iter);
|
479
|
-
if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
|
480
|
-
cmark_footnote_create(map, cur);
|
481
|
-
}
|
482
|
-
}
|
483
|
-
|
484
|
-
cmark_iter_free(iter);
|
485
|
-
iter = cmark_iter_new(parser->root);
|
486
|
-
unsigned int ix = 0;
|
487
|
-
|
488
|
-
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
489
|
-
cur = cmark_iter_get_node(iter);
|
490
|
-
if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_REFERENCE) {
|
491
|
-
cmark_footnote *footnote = (cmark_footnote *)cmark_map_lookup(map, &cur->as.literal);
|
492
|
-
if (footnote) {
|
493
|
-
if (!footnote->ix)
|
494
|
-
footnote->ix = ++ix;
|
495
|
-
|
496
|
-
// store a reference to this footnote reference's footnote definition
|
497
|
-
// this is used by renderers when generating label ids
|
498
|
-
cur->parent_footnote_def = footnote->node;
|
499
|
-
|
500
|
-
// keep track of a) count of how many times this footnote def has been
|
501
|
-
// referenced, and b) which reference index this footnote ref is at.
|
502
|
-
// this is used by renderers when generating links and backreferences.
|
503
|
-
cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
|
504
|
-
|
505
|
-
char n[32];
|
506
|
-
snprintf(n, sizeof(n), "%d", footnote->ix);
|
507
|
-
cmark_chunk_free(parser->mem, &cur->as.literal);
|
508
|
-
cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
|
509
|
-
cmark_strbuf_puts(&buf, n);
|
510
|
-
|
511
|
-
cur->as.literal = cmark_chunk_buf_detach(&buf);
|
512
|
-
} else {
|
513
|
-
cmark_node *text = (cmark_node *)parser->mem->calloc(1, sizeof(*text));
|
514
|
-
cmark_strbuf_init(parser->mem, &text->content, 0);
|
515
|
-
text->type = (uint16_t) CMARK_NODE_TEXT;
|
516
|
-
|
517
|
-
cmark_strbuf buf = CMARK_BUF_INIT(parser->mem);
|
518
|
-
cmark_strbuf_puts(&buf, "[^");
|
519
|
-
cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
|
520
|
-
cmark_strbuf_putc(&buf, ']');
|
521
|
-
|
522
|
-
text->as.literal = cmark_chunk_buf_detach(&buf);
|
523
|
-
cmark_node_insert_after(cur, text);
|
524
|
-
cmark_node_free(cur);
|
525
|
-
}
|
526
|
-
}
|
527
|
-
}
|
528
|
-
|
529
|
-
cmark_iter_free(iter);
|
530
|
-
|
531
|
-
if (map->sorted) {
|
532
|
-
qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
|
533
|
-
for (unsigned int i = 0; i < map->size; ++i) {
|
534
|
-
cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
|
535
|
-
if (!footnote->ix) {
|
536
|
-
cmark_node_unlink(footnote->node);
|
537
|
-
continue;
|
538
|
-
}
|
539
|
-
cmark_node_append_child(parser->root, footnote->node);
|
540
|
-
footnote->node = NULL;
|
541
|
-
}
|
542
|
-
}
|
543
|
-
|
544
|
-
cmark_unlink_footnotes_map(map);
|
545
|
-
cmark_map_free(map);
|
546
|
-
}
|
547
|
-
|
548
|
-
// Attempts to parse a list item marker (bullet or enumerated).
|
549
|
-
// On success, returns length of the marker, and populates
|
550
|
-
// data with the details. On failure, returns 0.
|
551
|
-
static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input,
|
552
|
-
bufsize_t pos, bool interrupts_paragraph,
|
553
|
-
cmark_list **dataptr) {
|
554
|
-
unsigned char c;
|
555
|
-
bufsize_t startpos;
|
556
|
-
cmark_list *data;
|
557
|
-
bufsize_t i;
|
558
|
-
|
559
|
-
startpos = pos;
|
560
|
-
c = peek_at(input, pos);
|
561
|
-
|
562
|
-
if (c == '*' || c == '-' || c == '+') {
|
563
|
-
pos++;
|
564
|
-
if (!cmark_isspace(peek_at(input, pos))) {
|
565
|
-
return 0;
|
566
|
-
}
|
567
|
-
|
568
|
-
if (interrupts_paragraph) {
|
569
|
-
i = pos;
|
570
|
-
// require non-blank content after list marker:
|
571
|
-
while (S_is_space_or_tab(peek_at(input, i))) {
|
572
|
-
i++;
|
573
|
-
}
|
574
|
-
if (peek_at(input, i) == '\n') {
|
575
|
-
return 0;
|
576
|
-
}
|
577
|
-
}
|
578
|
-
|
579
|
-
data = (cmark_list *)mem->calloc(1, sizeof(*data));
|
580
|
-
data->marker_offset = 0; // will be adjusted later
|
581
|
-
data->list_type = CMARK_BULLET_LIST;
|
582
|
-
data->bullet_char = c;
|
583
|
-
data->start = 0;
|
584
|
-
data->delimiter = CMARK_NO_DELIM;
|
585
|
-
data->tight = false;
|
586
|
-
} else if (cmark_isdigit(c)) {
|
587
|
-
int start = 0;
|
588
|
-
int digits = 0;
|
589
|
-
|
590
|
-
do {
|
591
|
-
start = (10 * start) + (peek_at(input, pos) - '0');
|
592
|
-
pos++;
|
593
|
-
digits++;
|
594
|
-
// We limit to 9 digits to avoid overflow,
|
595
|
-
// assuming max int is 2^31 - 1
|
596
|
-
// This also seems to be the limit for 'start' in some browsers.
|
597
|
-
} while (digits < 9 && cmark_isdigit(peek_at(input, pos)));
|
598
|
-
|
599
|
-
if (interrupts_paragraph && start != 1) {
|
600
|
-
return 0;
|
601
|
-
}
|
602
|
-
c = peek_at(input, pos);
|
603
|
-
if (c == '.' || c == ')') {
|
604
|
-
pos++;
|
605
|
-
if (!cmark_isspace(peek_at(input, pos))) {
|
606
|
-
return 0;
|
607
|
-
}
|
608
|
-
if (interrupts_paragraph) {
|
609
|
-
// require non-blank content after list marker:
|
610
|
-
i = pos;
|
611
|
-
while (S_is_space_or_tab(peek_at(input, i))) {
|
612
|
-
i++;
|
613
|
-
}
|
614
|
-
if (S_is_line_end_char(peek_at(input, i))) {
|
615
|
-
return 0;
|
616
|
-
}
|
617
|
-
}
|
618
|
-
|
619
|
-
data = (cmark_list *)mem->calloc(1, sizeof(*data));
|
620
|
-
data->marker_offset = 0; // will be adjusted later
|
621
|
-
data->list_type = CMARK_ORDERED_LIST;
|
622
|
-
data->bullet_char = 0;
|
623
|
-
data->start = start;
|
624
|
-
data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
|
625
|
-
data->tight = false;
|
626
|
-
} else {
|
627
|
-
return 0;
|
628
|
-
}
|
629
|
-
} else {
|
630
|
-
return 0;
|
631
|
-
}
|
632
|
-
|
633
|
-
*dataptr = data;
|
634
|
-
return (pos - startpos);
|
635
|
-
}
|
636
|
-
|
637
|
-
// Return 1 if list item belongs in list, else 0.
|
638
|
-
static int lists_match(cmark_list *list_data, cmark_list *item_data) {
|
639
|
-
return (list_data->list_type == item_data->list_type &&
|
640
|
-
list_data->delimiter == item_data->delimiter &&
|
641
|
-
// list_data->marker_offset == item_data.marker_offset &&
|
642
|
-
list_data->bullet_char == item_data->bullet_char);
|
643
|
-
}
|
644
|
-
|
645
|
-
static cmark_node *finalize_document(cmark_parser *parser) {
|
646
|
-
while (parser->current != parser->root) {
|
647
|
-
parser->current = finalize(parser, parser->current);
|
648
|
-
}
|
649
|
-
|
650
|
-
finalize(parser, parser->root);
|
651
|
-
|
652
|
-
// Limit total size of extra content created from reference links to
|
653
|
-
// document size to avoid superlinear growth. Always allow 100KB.
|
654
|
-
if (parser->total_size > 100000)
|
655
|
-
parser->refmap->max_ref_size = parser->total_size;
|
656
|
-
else
|
657
|
-
parser->refmap->max_ref_size = 100000;
|
658
|
-
|
659
|
-
process_inlines(parser, parser->refmap, parser->options);
|
660
|
-
if (parser->options & CMARK_OPT_FOOTNOTES)
|
661
|
-
process_footnotes(parser);
|
662
|
-
|
663
|
-
return parser->root;
|
664
|
-
}
|
665
|
-
|
666
|
-
cmark_node *cmark_parse_file(FILE *f, int options) {
|
667
|
-
unsigned char buffer[4096];
|
668
|
-
cmark_parser *parser = cmark_parser_new(options);
|
669
|
-
size_t bytes;
|
670
|
-
cmark_node *document;
|
671
|
-
|
672
|
-
while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
|
673
|
-
bool eof = bytes < sizeof(buffer);
|
674
|
-
S_parser_feed(parser, buffer, bytes, eof);
|
675
|
-
if (eof) {
|
676
|
-
break;
|
677
|
-
}
|
678
|
-
}
|
679
|
-
|
680
|
-
document = cmark_parser_finish(parser);
|
681
|
-
cmark_parser_free(parser);
|
682
|
-
return document;
|
683
|
-
}
|
684
|
-
|
685
|
-
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options) {
|
686
|
-
cmark_parser *parser = cmark_parser_new(options);
|
687
|
-
cmark_node *document;
|
688
|
-
|
689
|
-
S_parser_feed(parser, (const unsigned char *)buffer, len, true);
|
690
|
-
|
691
|
-
document = cmark_parser_finish(parser);
|
692
|
-
cmark_parser_free(parser);
|
693
|
-
return document;
|
694
|
-
}
|
695
|
-
|
696
|
-
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
|
697
|
-
S_parser_feed(parser, (const unsigned char *)buffer, len, false);
|
698
|
-
}
|
699
|
-
|
700
|
-
void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) {
|
701
|
-
cmark_strbuf saved_linebuf;
|
702
|
-
|
703
|
-
cmark_strbuf_init(parser->mem, &saved_linebuf, 0);
|
704
|
-
cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf));
|
705
|
-
cmark_strbuf_clear(&parser->linebuf);
|
706
|
-
|
707
|
-
S_parser_feed(parser, (const unsigned char *)buffer, len, true);
|
708
|
-
|
709
|
-
cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf));
|
710
|
-
cmark_strbuf_free(&saved_linebuf);
|
711
|
-
}
|
712
|
-
|
713
|
-
static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
|
714
|
-
size_t len, bool eof) {
|
715
|
-
const unsigned char *end = buffer + len;
|
716
|
-
static const uint8_t repl[] = {239, 191, 189};
|
717
|
-
|
718
|
-
if (len > UINT_MAX - parser->total_size)
|
719
|
-
parser->total_size = UINT_MAX;
|
720
|
-
else
|
721
|
-
parser->total_size += len;
|
722
|
-
|
723
|
-
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
|
724
|
-
// skip NL if last buffer ended with CR ; see #117
|
725
|
-
buffer++;
|
726
|
-
}
|
727
|
-
parser->last_buffer_ended_with_cr = false;
|
728
|
-
while (buffer < end) {
|
729
|
-
const unsigned char *eol;
|
730
|
-
bufsize_t chunk_len;
|
731
|
-
bool process = false;
|
732
|
-
for (eol = buffer; eol < end; ++eol) {
|
733
|
-
if (S_is_line_end_char(*eol)) {
|
734
|
-
process = true;
|
735
|
-
break;
|
736
|
-
}
|
737
|
-
if (*eol == '\0' && eol < end) {
|
738
|
-
break;
|
739
|
-
}
|
740
|
-
}
|
741
|
-
if (eol >= end && eof) {
|
742
|
-
process = true;
|
743
|
-
}
|
744
|
-
|
745
|
-
chunk_len = (bufsize_t)(eol - buffer);
|
746
|
-
if (process) {
|
747
|
-
if (parser->linebuf.size > 0) {
|
748
|
-
cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
|
749
|
-
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
|
750
|
-
cmark_strbuf_clear(&parser->linebuf);
|
751
|
-
} else {
|
752
|
-
S_process_line(parser, buffer, chunk_len);
|
753
|
-
}
|
754
|
-
} else {
|
755
|
-
if (eol < end && *eol == '\0') {
|
756
|
-
// omit NULL byte
|
757
|
-
cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
|
758
|
-
// add replacement character
|
759
|
-
cmark_strbuf_put(&parser->linebuf, repl, 3);
|
760
|
-
} else {
|
761
|
-
cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
|
762
|
-
}
|
763
|
-
}
|
764
|
-
|
765
|
-
buffer += chunk_len;
|
766
|
-
if (buffer < end) {
|
767
|
-
if (*buffer == '\0') {
|
768
|
-
// skip over NULL
|
769
|
-
buffer++;
|
770
|
-
} else {
|
771
|
-
// skip over line ending characters
|
772
|
-
if (*buffer == '\r') {
|
773
|
-
buffer++;
|
774
|
-
if (buffer == end)
|
775
|
-
parser->last_buffer_ended_with_cr = true;
|
776
|
-
}
|
777
|
-
if (buffer < end && *buffer == '\n')
|
778
|
-
buffer++;
|
779
|
-
}
|
780
|
-
}
|
781
|
-
}
|
782
|
-
}
|
783
|
-
|
784
|
-
static void chop_trailing_hashtags(cmark_chunk *ch) {
|
785
|
-
bufsize_t n, orig_n;
|
786
|
-
|
787
|
-
cmark_chunk_rtrim(ch);
|
788
|
-
orig_n = n = ch->len - 1;
|
789
|
-
|
790
|
-
// if string ends in space followed by #s, remove these:
|
791
|
-
while (n >= 0 && peek_at(ch, n) == '#')
|
792
|
-
n--;
|
793
|
-
|
794
|
-
// Check for a space before the final #s:
|
795
|
-
if (n != orig_n && n >= 0 && S_is_space_or_tab(peek_at(ch, n))) {
|
796
|
-
ch->len = n;
|
797
|
-
cmark_chunk_rtrim(ch);
|
798
|
-
}
|
799
|
-
}
|
800
|
-
|
801
|
-
// Check for thematic break. On failure, return 0 and update
|
802
|
-
// thematic_break_kill_pos with the index at which the
|
803
|
-
// parse fails. On success, return length of match.
|
804
|
-
// "...three or more hyphens, asterisks,
|
805
|
-
// or underscores on a line by themselves. If you wish, you may use
|
806
|
-
// spaces between the hyphens or asterisks."
|
807
|
-
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
|
808
|
-
bufsize_t offset) {
|
809
|
-
bufsize_t i;
|
810
|
-
char c;
|
811
|
-
char nextc = '\0';
|
812
|
-
int count;
|
813
|
-
i = offset;
|
814
|
-
c = peek_at(input, i);
|
815
|
-
if (!(c == '*' || c == '_' || c == '-')) {
|
816
|
-
parser->thematic_break_kill_pos = i;
|
817
|
-
return 0;
|
818
|
-
}
|
819
|
-
count = 1;
|
820
|
-
while ((nextc = peek_at(input, ++i))) {
|
821
|
-
if (nextc == c) {
|
822
|
-
count++;
|
823
|
-
} else if (nextc != ' ' && nextc != '\t') {
|
824
|
-
break;
|
825
|
-
}
|
826
|
-
}
|
827
|
-
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
|
828
|
-
return (i - offset) + 1;
|
829
|
-
} else {
|
830
|
-
parser->thematic_break_kill_pos = i;
|
831
|
-
return 0;
|
832
|
-
}
|
833
|
-
}
|
834
|
-
|
835
|
-
// Find first nonspace character from current offset, setting
|
836
|
-
// parser->first_nonspace, parser->first_nonspace_column,
|
837
|
-
// parser->indent, and parser->blank. Does not advance parser->offset.
|
838
|
-
static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
|
839
|
-
char c;
|
840
|
-
int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
|
841
|
-
|
842
|
-
if (parser->first_nonspace <= parser->offset) {
|
843
|
-
parser->first_nonspace = parser->offset;
|
844
|
-
parser->first_nonspace_column = parser->column;
|
845
|
-
while ((c = peek_at(input, parser->first_nonspace))) {
|
846
|
-
if (c == ' ') {
|
847
|
-
parser->first_nonspace += 1;
|
848
|
-
parser->first_nonspace_column += 1;
|
849
|
-
chars_to_tab = chars_to_tab - 1;
|
850
|
-
if (chars_to_tab == 0) {
|
851
|
-
chars_to_tab = TAB_STOP;
|
852
|
-
}
|
853
|
-
} else if (c == '\t') {
|
854
|
-
parser->first_nonspace += 1;
|
855
|
-
parser->first_nonspace_column += chars_to_tab;
|
856
|
-
chars_to_tab = TAB_STOP;
|
857
|
-
} else {
|
858
|
-
break;
|
859
|
-
}
|
860
|
-
}
|
861
|
-
}
|
862
|
-
|
863
|
-
parser->indent = parser->first_nonspace_column - parser->column;
|
864
|
-
parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
|
865
|
-
}
|
866
|
-
|
867
|
-
// Advance parser->offset and parser->column. parser->offset is the
|
868
|
-
// byte position in input; parser->column is a virtual column number
|
869
|
-
// that takes into account tabs. (Multibyte characters are not taken
|
870
|
-
// into account, because the Markdown line prefixes we are interested in
|
871
|
-
// analyzing are entirely ASCII.) The count parameter indicates
|
872
|
-
// how far to advance the offset. If columns is true, then count
|
873
|
-
// indicates a number of columns; otherwise, a number of bytes.
|
874
|
-
// If advancing a certain number of columns partially consumes
|
875
|
-
// a tab character, parser->partially_consumed_tab is set to true.
|
876
|
-
static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
|
877
|
-
bufsize_t count, bool columns) {
|
878
|
-
char c;
|
879
|
-
int chars_to_tab;
|
880
|
-
int chars_to_advance;
|
881
|
-
while (count > 0 && (c = peek_at(input, parser->offset))) {
|
882
|
-
if (c == '\t') {
|
883
|
-
chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
|
884
|
-
if (columns) {
|
885
|
-
parser->partially_consumed_tab = chars_to_tab > count;
|
886
|
-
chars_to_advance = MIN(count, chars_to_tab);
|
887
|
-
parser->column += chars_to_advance;
|
888
|
-
parser->offset += (parser->partially_consumed_tab ? 0 : 1);
|
889
|
-
count -= chars_to_advance;
|
890
|
-
} else {
|
891
|
-
parser->partially_consumed_tab = false;
|
892
|
-
parser->column += chars_to_tab;
|
893
|
-
parser->offset += 1;
|
894
|
-
count -= 1;
|
895
|
-
}
|
896
|
-
} else {
|
897
|
-
parser->partially_consumed_tab = false;
|
898
|
-
parser->offset += 1;
|
899
|
-
parser->column += 1; // assume ascii; block starts are ascii
|
900
|
-
count -= 1;
|
901
|
-
}
|
902
|
-
}
|
903
|
-
}
|
904
|
-
|
905
|
-
static bool S_last_child_is_open(cmark_node *container) {
|
906
|
-
return container->last_child &&
|
907
|
-
(container->last_child->flags & CMARK_NODE__OPEN);
|
908
|
-
}
|
909
|
-
|
910
|
-
static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
|
911
|
-
bool res = false;
|
912
|
-
bufsize_t matched = 0;
|
913
|
-
|
914
|
-
matched =
|
915
|
-
parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
|
916
|
-
if (matched) {
|
917
|
-
|
918
|
-
S_advance_offset(parser, input, parser->indent + 1, true);
|
919
|
-
|
920
|
-
if (S_is_space_or_tab(peek_at(input, parser->offset))) {
|
921
|
-
S_advance_offset(parser, input, 1, true);
|
922
|
-
}
|
923
|
-
|
924
|
-
res = true;
|
925
|
-
}
|
926
|
-
return res;
|
927
|
-
}
|
928
|
-
|
929
|
-
static bool parse_footnote_definition_block_prefix(cmark_parser *parser, cmark_chunk *input,
|
930
|
-
cmark_node *container) {
|
931
|
-
if (parser->indent >= 4) {
|
932
|
-
S_advance_offset(parser, input, 4, true);
|
933
|
-
return true;
|
934
|
-
} else if (input->len > 0 && (input->data[0] == '\n' || (input->data[0] == '\r' && input->data[1] == '\n'))) {
|
935
|
-
return true;
|
936
|
-
}
|
937
|
-
|
938
|
-
return false;
|
939
|
-
}
|
940
|
-
|
941
|
-
static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
|
942
|
-
cmark_node *container) {
|
943
|
-
bool res = false;
|
944
|
-
|
945
|
-
if (parser->indent >=
|
946
|
-
container->as.list.marker_offset + container->as.list.padding) {
|
947
|
-
S_advance_offset(parser, input, container->as.list.marker_offset +
|
948
|
-
container->as.list.padding,
|
949
|
-
true);
|
950
|
-
res = true;
|
951
|
-
} else if (parser->blank && container->first_child != NULL) {
|
952
|
-
// if container->first_child is NULL, then the opening line
|
953
|
-
// of the list item was blank after the list marker; in this
|
954
|
-
// case, we are done with the list item.
|
955
|
-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
956
|
-
false);
|
957
|
-
res = true;
|
958
|
-
}
|
959
|
-
return res;
|
960
|
-
}
|
961
|
-
|
962
|
-
static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
|
963
|
-
cmark_node *container,
|
964
|
-
bool *should_continue) {
|
965
|
-
bool res = false;
|
966
|
-
|
967
|
-
if (!container->as.code.fenced) { // indented
|
968
|
-
if (parser->indent >= CODE_INDENT) {
|
969
|
-
S_advance_offset(parser, input, CODE_INDENT, true);
|
970
|
-
res = true;
|
971
|
-
} else if (parser->blank) {
|
972
|
-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
973
|
-
false);
|
974
|
-
res = true;
|
975
|
-
}
|
976
|
-
} else { // fenced
|
977
|
-
bufsize_t matched = 0;
|
978
|
-
|
979
|
-
if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) ==
|
980
|
-
container->as.code.fence_char)) {
|
981
|
-
matched = scan_close_code_fence(input, parser->first_nonspace);
|
982
|
-
}
|
983
|
-
|
984
|
-
if (matched >= container->as.code.fence_length) {
|
985
|
-
// closing fence - and since we're at
|
986
|
-
// the end of a line, we can stop processing it:
|
987
|
-
*should_continue = false;
|
988
|
-
S_advance_offset(parser, input, matched, false);
|
989
|
-
parser->current = finalize(parser, container);
|
990
|
-
} else {
|
991
|
-
// skip opt. spaces of fence parser->offset
|
992
|
-
int i = container->as.code.fence_offset;
|
993
|
-
|
994
|
-
while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
|
995
|
-
S_advance_offset(parser, input, 1, true);
|
996
|
-
i--;
|
997
|
-
}
|
998
|
-
res = true;
|
999
|
-
}
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
return res;
|
1003
|
-
}
|
1004
|
-
|
1005
|
-
static bool parse_html_block_prefix(cmark_parser *parser,
|
1006
|
-
cmark_node *container) {
|
1007
|
-
bool res = false;
|
1008
|
-
int html_block_type = container->as.html_block_type;
|
1009
|
-
|
1010
|
-
assert(html_block_type >= 1 && html_block_type <= 7);
|
1011
|
-
switch (html_block_type) {
|
1012
|
-
case 1:
|
1013
|
-
case 2:
|
1014
|
-
case 3:
|
1015
|
-
case 4:
|
1016
|
-
case 5:
|
1017
|
-
// these types of blocks can accept blanks
|
1018
|
-
res = true;
|
1019
|
-
break;
|
1020
|
-
case 6:
|
1021
|
-
case 7:
|
1022
|
-
res = !parser->blank;
|
1023
|
-
break;
|
1024
|
-
}
|
1025
|
-
|
1026
|
-
return res;
|
1027
|
-
}
|
1028
|
-
|
1029
|
-
static bool parse_extension_block(cmark_parser *parser,
|
1030
|
-
cmark_node *container,
|
1031
|
-
cmark_chunk *input)
|
1032
|
-
{
|
1033
|
-
bool res = false;
|
1034
|
-
|
1035
|
-
if (container->extension->last_block_matches) {
|
1036
|
-
if (container->extension->last_block_matches(
|
1037
|
-
container->extension, parser, input->data, input->len, container))
|
1038
|
-
res = true;
|
1039
|
-
}
|
1040
|
-
|
1041
|
-
return res;
|
1042
|
-
}
|
1043
|
-
|
1044
|
-
/**
|
1045
|
-
* For each containing node, try to parse the associated line start.
|
1046
|
-
*
|
1047
|
-
* Will not close unmatched blocks, as we may have a lazy continuation
|
1048
|
-
* line -> http://spec.commonmark.org/0.24/#lazy-continuation-line
|
1049
|
-
*
|
1050
|
-
* Returns: The last matching node, or NULL
|
1051
|
-
*/
|
1052
|
-
static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
|
1053
|
-
bool *all_matched) {
|
1054
|
-
bool should_continue = true;
|
1055
|
-
*all_matched = false;
|
1056
|
-
cmark_node *container = parser->root;
|
1057
|
-
cmark_node_type cont_type;
|
1058
|
-
|
1059
|
-
while (S_last_child_is_open(container)) {
|
1060
|
-
container = container->last_child;
|
1061
|
-
cont_type = S_type(container);
|
1062
|
-
|
1063
|
-
S_find_first_nonspace(parser, input);
|
1064
|
-
|
1065
|
-
if (container->extension) {
|
1066
|
-
if (!parse_extension_block(parser, container, input))
|
1067
|
-
goto done;
|
1068
|
-
continue;
|
1069
|
-
}
|
1070
|
-
|
1071
|
-
switch (cont_type) {
|
1072
|
-
case CMARK_NODE_BLOCK_QUOTE:
|
1073
|
-
if (!parse_block_quote_prefix(parser, input))
|
1074
|
-
goto done;
|
1075
|
-
break;
|
1076
|
-
case CMARK_NODE_ITEM:
|
1077
|
-
if (!parse_node_item_prefix(parser, input, container))
|
1078
|
-
goto done;
|
1079
|
-
break;
|
1080
|
-
case CMARK_NODE_CODE_BLOCK:
|
1081
|
-
if (!parse_code_block_prefix(parser, input, container, &should_continue))
|
1082
|
-
goto done;
|
1083
|
-
break;
|
1084
|
-
case CMARK_NODE_HEADING:
|
1085
|
-
// a heading can never contain more than one line
|
1086
|
-
goto done;
|
1087
|
-
case CMARK_NODE_HTML_BLOCK:
|
1088
|
-
if (!parse_html_block_prefix(parser, container))
|
1089
|
-
goto done;
|
1090
|
-
break;
|
1091
|
-
case CMARK_NODE_PARAGRAPH:
|
1092
|
-
if (parser->blank)
|
1093
|
-
goto done;
|
1094
|
-
break;
|
1095
|
-
case CMARK_NODE_FOOTNOTE_DEFINITION:
|
1096
|
-
if (!parse_footnote_definition_block_prefix(parser, input, container))
|
1097
|
-
goto done;
|
1098
|
-
break;
|
1099
|
-
default:
|
1100
|
-
break;
|
1101
|
-
}
|
1102
|
-
}
|
1103
|
-
|
1104
|
-
*all_matched = true;
|
1105
|
-
|
1106
|
-
done:
|
1107
|
-
if (!*all_matched) {
|
1108
|
-
container = container->parent; // back up to last matching node
|
1109
|
-
}
|
1110
|
-
|
1111
|
-
if (!should_continue) {
|
1112
|
-
container = NULL;
|
1113
|
-
}
|
1114
|
-
|
1115
|
-
return container;
|
1116
|
-
}
|
1117
|
-
|
1118
|
-
static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
1119
|
-
cmark_chunk *input, bool all_matched) {
|
1120
|
-
bool indented;
|
1121
|
-
cmark_list *data = NULL;
|
1122
|
-
bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
|
1123
|
-
cmark_node_type cont_type = S_type(*container);
|
1124
|
-
bufsize_t matched = 0;
|
1125
|
-
int lev = 0;
|
1126
|
-
bool save_partially_consumed_tab;
|
1127
|
-
bool has_content;
|
1128
|
-
int save_offset;
|
1129
|
-
int save_column;
|
1130
|
-
size_t depth = 0;
|
1131
|
-
|
1132
|
-
while (cont_type != CMARK_NODE_CODE_BLOCK &&
|
1133
|
-
cont_type != CMARK_NODE_HTML_BLOCK) {
|
1134
|
-
depth++;
|
1135
|
-
S_find_first_nonspace(parser, input);
|
1136
|
-
indented = parser->indent >= CODE_INDENT;
|
1137
|
-
|
1138
|
-
if (!indented && peek_at(input, parser->first_nonspace) == '>') {
|
1139
|
-
|
1140
|
-
bufsize_t blockquote_startpos = parser->first_nonspace;
|
1141
|
-
|
1142
|
-
S_advance_offset(parser, input,
|
1143
|
-
parser->first_nonspace + 1 - parser->offset, false);
|
1144
|
-
// optional following character
|
1145
|
-
if (S_is_space_or_tab(peek_at(input, parser->offset))) {
|
1146
|
-
S_advance_offset(parser, input, 1, true);
|
1147
|
-
}
|
1148
|
-
*container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
|
1149
|
-
blockquote_startpos + 1);
|
1150
|
-
|
1151
|
-
} else if (!indented && (matched = scan_atx_heading_start(
|
1152
|
-
input, parser->first_nonspace))) {
|
1153
|
-
bufsize_t hashpos;
|
1154
|
-
int level = 0;
|
1155
|
-
bufsize_t heading_startpos = parser->first_nonspace;
|
1156
|
-
|
1157
|
-
S_advance_offset(parser, input,
|
1158
|
-
parser->first_nonspace + matched - parser->offset,
|
1159
|
-
false);
|
1160
|
-
*container = add_child(parser, *container, CMARK_NODE_HEADING,
|
1161
|
-
heading_startpos + 1);
|
1162
|
-
|
1163
|
-
hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
|
1164
|
-
|
1165
|
-
while (peek_at(input, hashpos) == '#') {
|
1166
|
-
level++;
|
1167
|
-
hashpos++;
|
1168
|
-
}
|
1169
|
-
|
1170
|
-
(*container)->as.heading.level = level;
|
1171
|
-
(*container)->as.heading.setext = false;
|
1172
|
-
(*container)->internal_offset = matched;
|
1173
|
-
|
1174
|
-
} else if (!indented && (matched = scan_open_code_fence(
|
1175
|
-
input, parser->first_nonspace))) {
|
1176
|
-
*container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
|
1177
|
-
parser->first_nonspace + 1);
|
1178
|
-
(*container)->as.code.fenced = true;
|
1179
|
-
(*container)->as.code.fence_char = peek_at(input, parser->first_nonspace);
|
1180
|
-
(*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched;
|
1181
|
-
(*container)->as.code.fence_offset =
|
1182
|
-
(int8_t)(parser->first_nonspace - parser->offset);
|
1183
|
-
(*container)->as.code.info = cmark_chunk_literal("");
|
1184
|
-
S_advance_offset(parser, input,
|
1185
|
-
parser->first_nonspace + matched - parser->offset,
|
1186
|
-
false);
|
1187
|
-
|
1188
|
-
} else if (!indented && ((matched = scan_html_block_start(
|
1189
|
-
input, parser->first_nonspace)) ||
|
1190
|
-
(cont_type != CMARK_NODE_PARAGRAPH &&
|
1191
|
-
(matched = scan_html_block_start_7(
|
1192
|
-
input, parser->first_nonspace))))) {
|
1193
|
-
*container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK,
|
1194
|
-
parser->first_nonspace + 1);
|
1195
|
-
(*container)->as.html_block_type = matched;
|
1196
|
-
// note, we don't adjust parser->offset because the tag is part of the
|
1197
|
-
// text
|
1198
|
-
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
|
1199
|
-
(lev =
|
1200
|
-
scan_setext_heading_line(input, parser->first_nonspace))) {
|
1201
|
-
// finalize paragraph, resolving reference links
|
1202
|
-
has_content = resolve_reference_link_definitions(parser, *container);
|
1203
|
-
|
1204
|
-
if (has_content) {
|
1205
|
-
|
1206
|
-
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
|
1207
|
-
(*container)->as.heading.level = lev;
|
1208
|
-
(*container)->as.heading.setext = true;
|
1209
|
-
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
|
1210
|
-
}
|
1211
|
-
} else if (!indented &&
|
1212
|
-
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
|
1213
|
-
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
|
1214
|
-
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
|
1215
|
-
// it's only now that we know the line is not part of a setext heading:
|
1216
|
-
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
|
1217
|
-
parser->first_nonspace + 1);
|
1218
|
-
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
|
1219
|
-
} else if (!indented &&
|
1220
|
-
(parser->options & CMARK_OPT_FOOTNOTES) &&
|
1221
|
-
depth < MAX_LIST_DEPTH &&
|
1222
|
-
(matched = scan_footnote_definition(input, parser->first_nonspace))) {
|
1223
|
-
cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
|
1224
|
-
|
1225
|
-
while (c.data[c.len - 1] != ']')
|
1226
|
-
--c.len;
|
1227
|
-
--c.len;
|
1228
|
-
|
1229
|
-
cmark_chunk_to_cstr(parser->mem, &c);
|
1230
|
-
|
1231
|
-
S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
|
1232
|
-
*container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
|
1233
|
-
(*container)->as.literal = c;
|
1234
|
-
|
1235
|
-
(*container)->internal_offset = matched;
|
1236
|
-
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
|
1237
|
-
parser->indent < 4 &&
|
1238
|
-
depth < MAX_LIST_DEPTH &&
|
1239
|
-
(matched = parse_list_marker(
|
1240
|
-
parser->mem, input, parser->first_nonspace,
|
1241
|
-
(*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
|
1242
|
-
|
1243
|
-
// Note that we can have new list items starting with >= 4
|
1244
|
-
// spaces indent, as long as the list container is still open.
|
1245
|
-
int i = 0;
|
1246
|
-
|
1247
|
-
// compute padding:
|
1248
|
-
S_advance_offset(parser, input,
|
1249
|
-
parser->first_nonspace + matched - parser->offset,
|
1250
|
-
false);
|
1251
|
-
|
1252
|
-
save_partially_consumed_tab = parser->partially_consumed_tab;
|
1253
|
-
save_offset = parser->offset;
|
1254
|
-
save_column = parser->column;
|
1255
|
-
|
1256
|
-
while (parser->column - save_column <= 5 &&
|
1257
|
-
S_is_space_or_tab(peek_at(input, parser->offset))) {
|
1258
|
-
S_advance_offset(parser, input, 1, true);
|
1259
|
-
}
|
1260
|
-
|
1261
|
-
i = parser->column - save_column;
|
1262
|
-
if (i >= 5 || i < 1 ||
|
1263
|
-
// only spaces after list marker:
|
1264
|
-
S_is_line_end_char(peek_at(input, parser->offset))) {
|
1265
|
-
data->padding = matched + 1;
|
1266
|
-
parser->offset = save_offset;
|
1267
|
-
parser->column = save_column;
|
1268
|
-
parser->partially_consumed_tab = save_partially_consumed_tab;
|
1269
|
-
if (i > 0) {
|
1270
|
-
S_advance_offset(parser, input, 1, true);
|
1271
|
-
}
|
1272
|
-
} else {
|
1273
|
-
data->padding = matched + i;
|
1274
|
-
}
|
1275
|
-
|
1276
|
-
// check container; if it's a list, see if this list item
|
1277
|
-
// can continue the list; otherwise, create a list container.
|
1278
|
-
|
1279
|
-
data->marker_offset = parser->indent;
|
1280
|
-
|
1281
|
-
if (cont_type != CMARK_NODE_LIST ||
|
1282
|
-
!lists_match(&((*container)->as.list), data)) {
|
1283
|
-
*container = add_child(parser, *container, CMARK_NODE_LIST,
|
1284
|
-
parser->first_nonspace + 1);
|
1285
|
-
|
1286
|
-
memcpy(&((*container)->as.list), data, sizeof(*data));
|
1287
|
-
}
|
1288
|
-
|
1289
|
-
// add the list item
|
1290
|
-
*container = add_child(parser, *container, CMARK_NODE_ITEM,
|
1291
|
-
parser->first_nonspace + 1);
|
1292
|
-
/* TODO: static */
|
1293
|
-
memcpy(&((*container)->as.list), data, sizeof(*data));
|
1294
|
-
parser->mem->free(data);
|
1295
|
-
} else if (indented && !maybe_lazy && !parser->blank) {
|
1296
|
-
S_advance_offset(parser, input, CODE_INDENT, true);
|
1297
|
-
*container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
|
1298
|
-
parser->offset + 1);
|
1299
|
-
(*container)->as.code.fenced = false;
|
1300
|
-
(*container)->as.code.fence_char = 0;
|
1301
|
-
(*container)->as.code.fence_length = 0;
|
1302
|
-
(*container)->as.code.fence_offset = 0;
|
1303
|
-
(*container)->as.code.info = cmark_chunk_literal("");
|
1304
|
-
} else {
|
1305
|
-
cmark_llist *tmp;
|
1306
|
-
cmark_node *new_container = NULL;
|
1307
|
-
|
1308
|
-
for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) {
|
1309
|
-
cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
|
1310
|
-
|
1311
|
-
if (ext->try_opening_block) {
|
1312
|
-
new_container = ext->try_opening_block(
|
1313
|
-
ext, indented, parser, *container, input->data, input->len);
|
1314
|
-
|
1315
|
-
if (new_container) {
|
1316
|
-
*container = new_container;
|
1317
|
-
break;
|
1318
|
-
}
|
1319
|
-
}
|
1320
|
-
}
|
1321
|
-
|
1322
|
-
if (!new_container) {
|
1323
|
-
break;
|
1324
|
-
}
|
1325
|
-
}
|
1326
|
-
|
1327
|
-
if (accepts_lines(S_type(*container))) {
|
1328
|
-
// if it's a line container, it can't contain other containers
|
1329
|
-
break;
|
1330
|
-
}
|
1331
|
-
|
1332
|
-
cont_type = S_type(*container);
|
1333
|
-
maybe_lazy = false;
|
1334
|
-
}
|
1335
|
-
}
|
1336
|
-
|
1337
|
-
static void add_text_to_container(cmark_parser *parser, cmark_node *container,
|
1338
|
-
cmark_node *last_matched_container,
|
1339
|
-
cmark_chunk *input) {
|
1340
|
-
cmark_node *tmp;
|
1341
|
-
// what remains at parser->offset is a text line. add the text to the
|
1342
|
-
// appropriate container.
|
1343
|
-
|
1344
|
-
S_find_first_nonspace(parser, input);
|
1345
|
-
|
1346
|
-
if (parser->blank && container->last_child)
|
1347
|
-
S_set_last_line_blank(container->last_child, true);
|
1348
|
-
|
1349
|
-
// block quote lines are never blank as they start with >
|
1350
|
-
// and we don't count blanks in fenced code for purposes of tight/loose
|
1351
|
-
// lists or breaking out of lists. we also don't set last_line_blank
|
1352
|
-
// on an empty list item.
|
1353
|
-
const cmark_node_type ctype = S_type(container);
|
1354
|
-
const bool last_line_blank =
|
1355
|
-
(parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
|
1356
|
-
ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
|
1357
|
-
!(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) &&
|
1358
|
-
!(ctype == CMARK_NODE_ITEM && container->first_child == NULL &&
|
1359
|
-
container->start_line == parser->line_number));
|
1360
|
-
|
1361
|
-
S_set_last_line_blank(container, last_line_blank);
|
1362
|
-
|
1363
|
-
tmp = container;
|
1364
|
-
while (tmp->parent) {
|
1365
|
-
S_set_last_line_blank(tmp->parent, false);
|
1366
|
-
tmp = tmp->parent;
|
1367
|
-
}
|
1368
|
-
|
1369
|
-
// If the last line processed belonged to a paragraph node,
|
1370
|
-
// and we didn't match all of the line prefixes for the open containers,
|
1371
|
-
// and we didn't start any new containers,
|
1372
|
-
// and the line isn't blank,
|
1373
|
-
// then treat this as a "lazy continuation line" and add it to
|
1374
|
-
// the open paragraph.
|
1375
|
-
if (parser->current != last_matched_container &&
|
1376
|
-
container == last_matched_container && !parser->blank &&
|
1377
|
-
S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
|
1378
|
-
add_line(parser->current, input, parser);
|
1379
|
-
} else { // not a lazy continuation
|
1380
|
-
// Finalize any blocks that were not matched and set cur to container:
|
1381
|
-
while (parser->current != last_matched_container) {
|
1382
|
-
parser->current = finalize(parser, parser->current);
|
1383
|
-
assert(parser->current != NULL);
|
1384
|
-
}
|
1385
|
-
|
1386
|
-
if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
|
1387
|
-
add_line(container, input, parser);
|
1388
|
-
} else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
|
1389
|
-
add_line(container, input, parser);
|
1390
|
-
|
1391
|
-
int matches_end_condition;
|
1392
|
-
switch (container->as.html_block_type) {
|
1393
|
-
case 1:
|
1394
|
-
// </script>, </style>, </pre>
|
1395
|
-
matches_end_condition =
|
1396
|
-
scan_html_block_end_1(input, parser->first_nonspace);
|
1397
|
-
break;
|
1398
|
-
case 2:
|
1399
|
-
// -->
|
1400
|
-
matches_end_condition =
|
1401
|
-
scan_html_block_end_2(input, parser->first_nonspace);
|
1402
|
-
break;
|
1403
|
-
case 3:
|
1404
|
-
// ?>
|
1405
|
-
matches_end_condition =
|
1406
|
-
scan_html_block_end_3(input, parser->first_nonspace);
|
1407
|
-
break;
|
1408
|
-
case 4:
|
1409
|
-
// >
|
1410
|
-
matches_end_condition =
|
1411
|
-
scan_html_block_end_4(input, parser->first_nonspace);
|
1412
|
-
break;
|
1413
|
-
case 5:
|
1414
|
-
// ]]>
|
1415
|
-
matches_end_condition =
|
1416
|
-
scan_html_block_end_5(input, parser->first_nonspace);
|
1417
|
-
break;
|
1418
|
-
default:
|
1419
|
-
matches_end_condition = 0;
|
1420
|
-
break;
|
1421
|
-
}
|
1422
|
-
|
1423
|
-
if (matches_end_condition) {
|
1424
|
-
container = finalize(parser, container);
|
1425
|
-
assert(parser->current != NULL);
|
1426
|
-
}
|
1427
|
-
} else if (parser->blank) {
|
1428
|
-
// ??? do nothing
|
1429
|
-
} else if (accepts_lines(S_type(container))) {
|
1430
|
-
if (S_type(container) == CMARK_NODE_HEADING &&
|
1431
|
-
container->as.heading.setext == false) {
|
1432
|
-
chop_trailing_hashtags(input);
|
1433
|
-
}
|
1434
|
-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
1435
|
-
false);
|
1436
|
-
add_line(container, input, parser);
|
1437
|
-
} else {
|
1438
|
-
// create paragraph container for line
|
1439
|
-
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
|
1440
|
-
parser->first_nonspace + 1);
|
1441
|
-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
|
1442
|
-
false);
|
1443
|
-
add_line(container, input, parser);
|
1444
|
-
}
|
1445
|
-
|
1446
|
-
parser->current = container;
|
1447
|
-
}
|
1448
|
-
}
|
1449
|
-
|
1450
|
-
/* See http://spec.commonmark.org/0.24/#phase-1-block-structure */
|
1451
|
-
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
|
1452
|
-
bufsize_t bytes) {
|
1453
|
-
cmark_node *last_matched_container;
|
1454
|
-
bool all_matched = true;
|
1455
|
-
cmark_node *container;
|
1456
|
-
cmark_chunk input;
|
1457
|
-
cmark_node *current;
|
1458
|
-
|
1459
|
-
cmark_strbuf_clear(&parser->curline);
|
1460
|
-
|
1461
|
-
if (parser->options & CMARK_OPT_VALIDATE_UTF8)
|
1462
|
-
cmark_utf8proc_check(&parser->curline, buffer, bytes);
|
1463
|
-
else
|
1464
|
-
cmark_strbuf_put(&parser->curline, buffer, bytes);
|
1465
|
-
|
1466
|
-
bytes = parser->curline.size;
|
1467
|
-
|
1468
|
-
// ensure line ends with a newline:
|
1469
|
-
if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))
|
1470
|
-
cmark_strbuf_putc(&parser->curline, '\n');
|
1471
|
-
|
1472
|
-
parser->offset = 0;
|
1473
|
-
parser->column = 0;
|
1474
|
-
parser->first_nonspace = 0;
|
1475
|
-
parser->first_nonspace_column = 0;
|
1476
|
-
parser->thematic_break_kill_pos = 0;
|
1477
|
-
parser->indent = 0;
|
1478
|
-
parser->blank = false;
|
1479
|
-
parser->partially_consumed_tab = false;
|
1480
|
-
|
1481
|
-
input.data = parser->curline.ptr;
|
1482
|
-
input.len = parser->curline.size;
|
1483
|
-
input.alloc = 0;
|
1484
|
-
|
1485
|
-
// Skip UTF-8 BOM.
|
1486
|
-
if (parser->line_number == 0 &&
|
1487
|
-
input.len >= 3 &&
|
1488
|
-
memcmp(input.data, "\xef\xbb\xbf", 3) == 0)
|
1489
|
-
parser->offset += 3;
|
1490
|
-
|
1491
|
-
parser->line_number++;
|
1492
|
-
|
1493
|
-
last_matched_container = check_open_blocks(parser, &input, &all_matched);
|
1494
|
-
|
1495
|
-
if (!last_matched_container)
|
1496
|
-
goto finished;
|
1497
|
-
|
1498
|
-
container = last_matched_container;
|
1499
|
-
|
1500
|
-
current = parser->current;
|
1501
|
-
|
1502
|
-
open_new_blocks(parser, &container, &input, all_matched);
|
1503
|
-
|
1504
|
-
/* parser->current might have changed if feed_reentrant was called */
|
1505
|
-
if (current == parser->current)
|
1506
|
-
add_text_to_container(parser, container, last_matched_container, &input);
|
1507
|
-
|
1508
|
-
finished:
|
1509
|
-
parser->last_line_length = input.len;
|
1510
|
-
if (parser->last_line_length &&
|
1511
|
-
input.data[parser->last_line_length - 1] == '\n')
|
1512
|
-
parser->last_line_length -= 1;
|
1513
|
-
if (parser->last_line_length &&
|
1514
|
-
input.data[parser->last_line_length - 1] == '\r')
|
1515
|
-
parser->last_line_length -= 1;
|
1516
|
-
|
1517
|
-
cmark_strbuf_clear(&parser->curline);
|
1518
|
-
}
|
1519
|
-
|
1520
|
-
cmark_node *cmark_parser_finish(cmark_parser *parser) {
|
1521
|
-
cmark_node *res;
|
1522
|
-
cmark_llist *extensions;
|
1523
|
-
|
1524
|
-
/* Parser was already finished once */
|
1525
|
-
if (parser->root == NULL)
|
1526
|
-
return NULL;
|
1527
|
-
|
1528
|
-
if (parser->linebuf.size) {
|
1529
|
-
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
|
1530
|
-
cmark_strbuf_clear(&parser->linebuf);
|
1531
|
-
}
|
1532
|
-
|
1533
|
-
finalize_document(parser);
|
1534
|
-
|
1535
|
-
cmark_consolidate_text_nodes(parser->root);
|
1536
|
-
|
1537
|
-
cmark_strbuf_free(&parser->curline);
|
1538
|
-
cmark_strbuf_free(&parser->linebuf);
|
1539
|
-
|
1540
|
-
#if CMARK_DEBUG_NODES
|
1541
|
-
if (cmark_node_check(parser->root, stderr)) {
|
1542
|
-
abort();
|
1543
|
-
}
|
1544
|
-
#endif
|
1545
|
-
|
1546
|
-
for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) {
|
1547
|
-
cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data;
|
1548
|
-
if (ext->postprocess_func) {
|
1549
|
-
cmark_node *processed = ext->postprocess_func(ext, parser, parser->root);
|
1550
|
-
if (processed)
|
1551
|
-
parser->root = processed;
|
1552
|
-
}
|
1553
|
-
}
|
1554
|
-
|
1555
|
-
res = parser->root;
|
1556
|
-
parser->root = NULL;
|
1557
|
-
|
1558
|
-
cmark_parser_reset(parser);
|
1559
|
-
|
1560
|
-
return res;
|
1561
|
-
}
|
1562
|
-
|
1563
|
-
int cmark_parser_get_line_number(cmark_parser *parser) {
|
1564
|
-
return parser->line_number;
|
1565
|
-
}
|
1566
|
-
|
1567
|
-
bufsize_t cmark_parser_get_offset(cmark_parser *parser) {
|
1568
|
-
return parser->offset;
|
1569
|
-
}
|
1570
|
-
|
1571
|
-
bufsize_t cmark_parser_get_column(cmark_parser *parser) {
|
1572
|
-
return parser->column;
|
1573
|
-
}
|
1574
|
-
|
1575
|
-
int cmark_parser_get_first_nonspace(cmark_parser *parser) {
|
1576
|
-
return parser->first_nonspace;
|
1577
|
-
}
|
1578
|
-
|
1579
|
-
int cmark_parser_get_first_nonspace_column(cmark_parser *parser) {
|
1580
|
-
return parser->first_nonspace_column;
|
1581
|
-
}
|
1582
|
-
|
1583
|
-
int cmark_parser_get_indent(cmark_parser *parser) {
|
1584
|
-
return parser->indent;
|
1585
|
-
}
|
1586
|
-
|
1587
|
-
int cmark_parser_is_blank(cmark_parser *parser) {
|
1588
|
-
return parser->blank;
|
1589
|
-
}
|
1590
|
-
|
1591
|
-
int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) {
|
1592
|
-
return parser->partially_consumed_tab;
|
1593
|
-
}
|
1594
|
-
|
1595
|
-
int cmark_parser_get_last_line_length(cmark_parser *parser) {
|
1596
|
-
return parser->last_line_length;
|
1597
|
-
}
|
1598
|
-
|
1599
|
-
cmark_node *cmark_parser_add_child(cmark_parser *parser,
|
1600
|
-
cmark_node *parent,
|
1601
|
-
cmark_node_type block_type,
|
1602
|
-
int start_column) {
|
1603
|
-
return add_child(parser, parent, block_type, start_column);
|
1604
|
-
}
|
1605
|
-
|
1606
|
-
void cmark_parser_advance_offset(cmark_parser *parser,
|
1607
|
-
const char *input,
|
1608
|
-
int count,
|
1609
|
-
int columns) {
|
1610
|
-
cmark_chunk input_chunk = cmark_chunk_literal(input);
|
1611
|
-
|
1612
|
-
S_advance_offset(parser, &input_chunk, count, columns != 0);
|
1613
|
-
}
|
1614
|
-
|
1615
|
-
void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
|
1616
|
-
cmark_ispunct_func func) {
|
1617
|
-
parser->backslash_ispunct = func;
|
1618
|
-
}
|
1619
|
-
|
1620
|
-
cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) {
|
1621
|
-
return parser->syntax_extensions;
|
1622
|
-
}
|