commonmarker 0.23.10 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1156 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +237 -172
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1221 -0
  9. data/ext/commonmarker/src/options.rs +220 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +92 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -191
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,1788 +0,0 @@
1
- #include <stdlib.h>
2
- #include <string.h>
3
- #include <stdio.h>
4
-
5
- #include "cmark_ctype.h"
6
- #include "config.h"
7
- #include "node.h"
8
- #include "parser.h"
9
- #include "references.h"
10
- #include "cmark-gfm.h"
11
- #include "houdini.h"
12
- #include "utf8.h"
13
- #include "scanners.h"
14
- #include "inlines.h"
15
- #include "syntax_extension.h"
16
-
17
- static const char *EMDASH = "\xE2\x80\x94";
18
- static const char *ENDASH = "\xE2\x80\x93";
19
- static const char *ELLIPSES = "\xE2\x80\xA6";
20
- static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
21
- static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
22
- static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
23
- static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
24
-
25
- // Macros for creating various kinds of simple.
26
- #define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
27
- #define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
28
- #define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
29
- #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
30
- #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
31
- #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
32
- #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
33
-
34
- #define MAXBACKTICKS 80
35
-
36
- typedef struct bracket {
37
- struct bracket *previous;
38
- cmark_node *inl_text;
39
- bufsize_t position;
40
- bool image;
41
- bool active;
42
- bool bracket_after;
43
- bool in_bracket_image0;
44
- bool in_bracket_image1;
45
- } bracket;
46
-
47
- #define FLAG_SKIP_HTML_CDATA (1u << 0)
48
- #define FLAG_SKIP_HTML_DECLARATION (1u << 1)
49
- #define FLAG_SKIP_HTML_PI (1u << 2)
50
- #define FLAG_SKIP_HTML_COMMENT (1u << 3)
51
-
52
- typedef struct subject{
53
- cmark_mem *mem;
54
- cmark_chunk input;
55
- unsigned flags;
56
- int line;
57
- bufsize_t pos;
58
- int block_offset;
59
- int column_offset;
60
- cmark_map *refmap;
61
- delimiter *last_delim;
62
- bracket *last_bracket;
63
- bufsize_t backticks[MAXBACKTICKS + 1];
64
- bool scanned_for_backticks;
65
- bool no_link_openers;
66
- } subject;
67
-
68
- // Extensions may populate this.
69
- static int8_t SKIP_CHARS[256];
70
-
71
- static CMARK_INLINE bool S_is_line_end_char(char c) {
72
- return (c == '\n' || c == '\r');
73
- }
74
-
75
- static delimiter *S_insert_emph(subject *subj, delimiter *opener,
76
- delimiter *closer);
77
-
78
- static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options);
79
-
80
- static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
81
- cmark_chunk *buffer, cmark_map *refmap);
82
- static bufsize_t subject_find_special_char(subject *subj, int options);
83
-
84
- // Create an inline with a literal string value.
85
- static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
86
- int start_column, int end_column,
87
- cmark_chunk s) {
88
- cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
89
- cmark_strbuf_init(subj->mem, &e->content, 0);
90
- e->type = (uint16_t)t;
91
- e->as.literal = s;
92
- e->start_line = e->end_line = subj->line;
93
- // columns are 1 based.
94
- e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
95
- e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
96
- return e;
97
- }
98
-
99
- // Create an inline with no value.
100
- static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
101
- cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
102
- cmark_strbuf_init(mem, &e->content, 0);
103
- e->type = (uint16_t)t;
104
- return e;
105
- }
106
-
107
- // Like make_str, but parses entities.
108
- static cmark_node *make_str_with_entities(subject *subj,
109
- int start_column, int end_column,
110
- cmark_chunk *content) {
111
- cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
112
-
113
- if (houdini_unescape_html(&unescaped, content->data, content->len)) {
114
- return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
115
- } else {
116
- return make_str(subj, start_column, end_column, *content);
117
- }
118
- }
119
-
120
- // Like cmark_node_append_child but without costly sanity checks.
121
- // Assumes that child was newly created.
122
- static void append_child(cmark_node *node, cmark_node *child) {
123
- cmark_node *old_last_child = node->last_child;
124
-
125
- child->next = NULL;
126
- child->prev = old_last_child;
127
- child->parent = node;
128
- node->last_child = child;
129
-
130
- if (old_last_child) {
131
- old_last_child->next = child;
132
- } else {
133
- // Also set first_child if node previously had no children.
134
- node->first_child = child;
135
- }
136
- }
137
-
138
- // Duplicate a chunk by creating a copy of the buffer not by reusing the
139
- // buffer like cmark_chunk_dup does.
140
- static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
141
- cmark_chunk c;
142
- bufsize_t len = src->len;
143
-
144
- c.len = len;
145
- c.data = (unsigned char *)mem->calloc(len + 1, 1);
146
- c.alloc = 1;
147
- if (len)
148
- memcpy(c.data, src->data, len);
149
- c.data[len] = '\0';
150
-
151
- return c;
152
- }
153
-
154
- static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
155
- int is_email) {
156
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
157
-
158
- cmark_chunk_trim(url);
159
-
160
- if (url->len == 0) {
161
- cmark_chunk result = CMARK_CHUNK_EMPTY;
162
- return result;
163
- }
164
-
165
- if (is_email)
166
- cmark_strbuf_puts(&buf, "mailto:");
167
-
168
- houdini_unescape_html_f(&buf, url->data, url->len);
169
- return cmark_chunk_buf_detach(&buf);
170
- }
171
-
172
- static CMARK_INLINE cmark_node *make_autolink(subject *subj,
173
- int start_column, int end_column,
174
- cmark_chunk url, int is_email) {
175
- cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
176
- link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
177
- link->as.link.title = cmark_chunk_literal("");
178
- link->start_line = link->end_line = subj->line;
179
- link->start_column = start_column + 1;
180
- link->end_column = end_column + 1;
181
- append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
182
- return link;
183
- }
184
-
185
- static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
186
- cmark_chunk *chunk, cmark_map *refmap) {
187
- int i;
188
- e->mem = mem;
189
- e->input = *chunk;
190
- e->flags = 0;
191
- e->line = line_number;
192
- e->pos = 0;
193
- e->block_offset = block_offset;
194
- e->column_offset = 0;
195
- e->refmap = refmap;
196
- e->last_delim = NULL;
197
- e->last_bracket = NULL;
198
- for (i = 0; i <= MAXBACKTICKS; i++) {
199
- e->backticks[i] = 0;
200
- }
201
- e->scanned_for_backticks = false;
202
- e->no_link_openers = true;
203
- }
204
-
205
- static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
206
-
207
- static CMARK_INLINE unsigned char peek_char_n(subject *subj, bufsize_t n) {
208
- // NULL bytes should have been stripped out by now. If they're
209
- // present, it's a programming error:
210
- assert(!(subj->pos + n < subj->input.len && subj->input.data[subj->pos + n] == 0));
211
- return (subj->pos + n < subj->input.len) ? subj->input.data[subj->pos + n] : 0;
212
- }
213
-
214
- static CMARK_INLINE unsigned char peek_char(subject *subj) {
215
- return peek_char_n(subj, 0);
216
- }
217
-
218
- static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
219
- return subj->input.data[pos];
220
- }
221
-
222
- // Return true if there are more characters in the subject.
223
- static CMARK_INLINE int is_eof(subject *subj) {
224
- return (subj->pos >= subj->input.len);
225
- }
226
-
227
- // Advance the subject. Doesn't check for eof.
228
- #define advance(subj) (subj)->pos += 1
229
-
230
- static CMARK_INLINE bool skip_spaces(subject *subj) {
231
- bool skipped = false;
232
- while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
233
- advance(subj);
234
- skipped = true;
235
- }
236
- return skipped;
237
- }
238
-
239
- static CMARK_INLINE bool skip_line_end(subject *subj) {
240
- bool seen_line_end_char = false;
241
- if (peek_char(subj) == '\r') {
242
- advance(subj);
243
- seen_line_end_char = true;
244
- }
245
- if (peek_char(subj) == '\n') {
246
- advance(subj);
247
- seen_line_end_char = true;
248
- }
249
- return seen_line_end_char || is_eof(subj);
250
- }
251
-
252
- // Take characters while a predicate holds, and return a string.
253
- static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
254
- unsigned char c;
255
- bufsize_t startpos = subj->pos;
256
- bufsize_t len = 0;
257
-
258
- while ((c = peek_char(subj)) && (*f)(c)) {
259
- advance(subj);
260
- len++;
261
- }
262
-
263
- return cmark_chunk_dup(&subj->input, startpos, len);
264
- }
265
-
266
- // Return the number of newlines in a given span of text in a subject. If
267
- // the number is greater than zero, also return the number of characters
268
- // between the last newline and the end of the span in `since_newline`.
269
- static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
270
- int nls = 0;
271
- int since_nl = 0;
272
-
273
- while (len--) {
274
- if (subj->input.data[from++] == '\n') {
275
- ++nls;
276
- since_nl = 0;
277
- } else {
278
- ++since_nl;
279
- }
280
- }
281
-
282
- if (!nls)
283
- return 0;
284
-
285
- *since_newline = since_nl;
286
- return nls;
287
- }
288
-
289
- // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
290
- // `column_offset` according to the number of newlines in a just-matched span
291
- // of text in `subj`.
292
- static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
293
- if (!(options & CMARK_OPT_SOURCEPOS)) {
294
- return;
295
- }
296
-
297
- int since_newline;
298
- int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
299
- if (newlines) {
300
- subj->line += newlines;
301
- node->end_line += newlines;
302
- node->end_column = since_newline;
303
- subj->column_offset = -subj->pos + since_newline + extra;
304
- }
305
- }
306
-
307
- // Try to process a backtick code span that began with a
308
- // span of ticks of length openticklength length (already
309
- // parsed). Return 0 if you don't find matching closing
310
- // backticks, otherwise return the position in the subject
311
- // after the closing backticks.
312
- static bufsize_t scan_to_closing_backticks(subject *subj,
313
- bufsize_t openticklength) {
314
-
315
- bool found = false;
316
- if (openticklength > MAXBACKTICKS) {
317
- // we limit backtick string length because of the array subj->backticks:
318
- return 0;
319
- }
320
- if (subj->scanned_for_backticks &&
321
- subj->backticks[openticklength] <= subj->pos) {
322
- // return if we already know there's no closer
323
- return 0;
324
- }
325
- while (!found) {
326
- // read non backticks
327
- unsigned char c;
328
- while ((c = peek_char(subj)) && c != '`') {
329
- advance(subj);
330
- }
331
- if (is_eof(subj)) {
332
- break;
333
- }
334
- bufsize_t numticks = 0;
335
- while (peek_char(subj) == '`') {
336
- advance(subj);
337
- numticks++;
338
- }
339
- // store position of ender
340
- if (numticks <= MAXBACKTICKS) {
341
- subj->backticks[numticks] = subj->pos - numticks;
342
- }
343
- if (numticks == openticklength) {
344
- return (subj->pos);
345
- }
346
- }
347
- // got through whole input without finding closer
348
- subj->scanned_for_backticks = true;
349
- return 0;
350
- }
351
-
352
- // Destructively modify string, converting newlines to
353
- // spaces, then removing a single leading + trailing space,
354
- // unless the code span consists entirely of space characters.
355
- static void S_normalize_code(cmark_strbuf *s) {
356
- bufsize_t r, w;
357
- bool contains_nonspace = false;
358
-
359
- for (r = 0, w = 0; r < s->size; ++r) {
360
- switch (s->ptr[r]) {
361
- case '\r':
362
- if (s->ptr[r + 1] != '\n') {
363
- s->ptr[w++] = ' ';
364
- }
365
- break;
366
- case '\n':
367
- s->ptr[w++] = ' ';
368
- break;
369
- default:
370
- s->ptr[w++] = s->ptr[r];
371
- }
372
- if (s->ptr[r] != ' ') {
373
- contains_nonspace = true;
374
- }
375
- }
376
-
377
- // begins and ends with space?
378
- if (contains_nonspace &&
379
- s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
380
- cmark_strbuf_drop(s, 1);
381
- cmark_strbuf_truncate(s, w - 2);
382
- } else {
383
- cmark_strbuf_truncate(s, w);
384
- }
385
-
386
- }
387
-
388
-
389
- // Parse backtick code section or raw backticks, return an inline.
390
- // Assumes that the subject has a backtick at the current position.
391
- static cmark_node *handle_backticks(subject *subj, int options) {
392
- cmark_chunk openticks = take_while(subj, isbacktick);
393
- bufsize_t startpos = subj->pos;
394
- bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
395
-
396
- if (endpos == 0) { // not found
397
- subj->pos = startpos; // rewind
398
- return make_str(subj, subj->pos, subj->pos, openticks);
399
- } else {
400
- cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
401
-
402
- cmark_strbuf_set(&buf, subj->input.data + startpos,
403
- endpos - startpos - openticks.len);
404
- S_normalize_code(&buf);
405
-
406
- cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
407
- adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
408
- return node;
409
- }
410
- }
411
-
412
-
413
- // Scan ***, **, or * and return number scanned, or 0.
414
- // Advances position.
415
- static int scan_delims(subject *subj, unsigned char c, bool *can_open,
416
- bool *can_close) {
417
- int numdelims = 0;
418
- bufsize_t before_char_pos, after_char_pos;
419
- int32_t after_char = 0;
420
- int32_t before_char = 0;
421
- int len;
422
- bool left_flanking, right_flanking;
423
-
424
- if (subj->pos == 0) {
425
- before_char = 10;
426
- } else {
427
- before_char_pos = subj->pos - 1;
428
- // walk back to the beginning of the UTF_8 sequence:
429
- while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
430
- before_char_pos -= 1;
431
- }
432
- len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
433
- subj->pos - before_char_pos, &before_char);
434
- if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
435
- before_char = 10;
436
- }
437
- }
438
-
439
- if (c == '\'' || c == '"') {
440
- numdelims++;
441
- advance(subj); // limit to 1 delim for quotes
442
- } else {
443
- while (peek_char(subj) == c) {
444
- numdelims++;
445
- advance(subj);
446
- }
447
- }
448
-
449
- if (subj->pos == subj->input.len) {
450
- after_char = 10;
451
- } else {
452
- after_char_pos = subj->pos;
453
- while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
454
- after_char_pos += 1;
455
- }
456
- len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
457
- subj->input.len - after_char_pos, &after_char);
458
- if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
459
- after_char = 10;
460
- }
461
- }
462
-
463
- left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
464
- (!cmark_utf8proc_is_punctuation(after_char) ||
465
- cmark_utf8proc_is_space(before_char) ||
466
- cmark_utf8proc_is_punctuation(before_char));
467
- right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
468
- (!cmark_utf8proc_is_punctuation(before_char) ||
469
- cmark_utf8proc_is_space(after_char) ||
470
- cmark_utf8proc_is_punctuation(after_char));
471
- if (c == '_') {
472
- *can_open = left_flanking &&
473
- (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
474
- *can_close = right_flanking &&
475
- (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
476
- } else if (c == '\'' || c == '"') {
477
- *can_open = left_flanking && !right_flanking &&
478
- before_char != ']' && before_char != ')';
479
- *can_close = right_flanking;
480
- } else {
481
- *can_open = left_flanking;
482
- *can_close = right_flanking;
483
- }
484
- return numdelims;
485
- }
486
-
487
- /*
488
- static void print_delimiters(subject *subj)
489
- {
490
- delimiter *delim;
491
- delim = subj->last_delim;
492
- while (delim != NULL) {
493
- printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
494
- (void*)delim, delim->delim_char,
495
- delim->can_open, delim->can_close,
496
- (void*)delim->next, (void*)delim->previous);
497
- delim = delim->previous;
498
- }
499
- }
500
- */
501
-
502
- static void remove_delimiter(subject *subj, delimiter *delim) {
503
- if (delim == NULL)
504
- return;
505
- if (delim->next == NULL) {
506
- // end of list:
507
- assert(delim == subj->last_delim);
508
- subj->last_delim = delim->previous;
509
- } else {
510
- delim->next->previous = delim->previous;
511
- }
512
- if (delim->previous != NULL) {
513
- delim->previous->next = delim->next;
514
- }
515
- subj->mem->free(delim);
516
- }
517
-
518
- static void pop_bracket(subject *subj) {
519
- bracket *b;
520
- if (subj->last_bracket == NULL)
521
- return;
522
- b = subj->last_bracket;
523
- subj->last_bracket = subj->last_bracket->previous;
524
- subj->mem->free(b);
525
- }
526
-
527
- static void push_delimiter(subject *subj, unsigned char c, bool can_open,
528
- bool can_close, cmark_node *inl_text) {
529
- delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
530
- delim->delim_char = c;
531
- delim->can_open = can_open;
532
- delim->can_close = can_close;
533
- delim->inl_text = inl_text;
534
- delim->position = subj->pos;
535
- delim->length = inl_text->as.literal.len;
536
- delim->previous = subj->last_delim;
537
- delim->next = NULL;
538
- if (delim->previous != NULL) {
539
- delim->previous->next = delim;
540
- }
541
- subj->last_delim = delim;
542
- }
543
-
544
- static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
545
- bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
546
- if (subj->last_bracket != NULL) {
547
- subj->last_bracket->bracket_after = true;
548
- b->in_bracket_image0 = subj->last_bracket->in_bracket_image0;
549
- b->in_bracket_image1 = subj->last_bracket->in_bracket_image1;
550
- }
551
- b->image = image;
552
- b->active = true;
553
- b->inl_text = inl_text;
554
- b->previous = subj->last_bracket;
555
- b->position = subj->pos;
556
- b->bracket_after = false;
557
- if (image) {
558
- b->in_bracket_image1 = true;
559
- } else {
560
- b->in_bracket_image0 = true;
561
- }
562
- subj->last_bracket = b;
563
- if (!image) {
564
- subj->no_link_openers = false;
565
- }
566
- }
567
-
568
- // Assumes the subject has a c at the current position.
569
- static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
570
- bufsize_t numdelims;
571
- cmark_node *inl_text;
572
- bool can_open, can_close;
573
- cmark_chunk contents;
574
-
575
- numdelims = scan_delims(subj, c, &can_open, &can_close);
576
-
577
- if (c == '\'' && smart) {
578
- contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
579
- } else if (c == '"' && smart) {
580
- contents =
581
- cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
582
- } else {
583
- contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
584
- }
585
-
586
- inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
587
-
588
- if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
589
- push_delimiter(subj, c, can_open, can_close, inl_text);
590
- }
591
-
592
- return inl_text;
593
- }
594
-
595
- // Assumes we have a hyphen at the current position.
596
- static cmark_node *handle_hyphen(subject *subj, bool smart) {
597
- int startpos = subj->pos;
598
-
599
- advance(subj);
600
-
601
- if (!smart || peek_char(subj) != '-') {
602
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
603
- }
604
-
605
- while (smart && peek_char(subj) == '-') {
606
- advance(subj);
607
- }
608
-
609
- int numhyphens = subj->pos - startpos;
610
- int en_count = 0;
611
- int em_count = 0;
612
- int i;
613
- cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
614
-
615
- if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
616
- em_count = numhyphens / 3;
617
- } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
618
- en_count = numhyphens / 2;
619
- } else if (numhyphens % 3 == 2) { // use one en dash at end
620
- en_count = 1;
621
- em_count = (numhyphens - 2) / 3;
622
- } else { // use two en dashes at the end
623
- en_count = 2;
624
- em_count = (numhyphens - 4) / 3;
625
- }
626
-
627
- for (i = em_count; i > 0; i--) {
628
- cmark_strbuf_puts(&buf, EMDASH);
629
- }
630
-
631
- for (i = en_count; i > 0; i--) {
632
- cmark_strbuf_puts(&buf, ENDASH);
633
- }
634
-
635
- return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
636
- }
637
-
638
- // Assumes we have a period at the current position.
639
- static cmark_node *handle_period(subject *subj, bool smart) {
640
- advance(subj);
641
- if (smart && peek_char(subj) == '.') {
642
- advance(subj);
643
- if (peek_char(subj) == '.') {
644
- advance(subj);
645
- return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
646
- } else {
647
- return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
648
- }
649
- } else {
650
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
651
- }
652
- }
653
-
654
- static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) {
655
- cmark_llist *tmp_ext;
656
-
657
- for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
658
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
659
- cmark_llist *tmp_char;
660
- for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
661
- unsigned char tmp_c = (unsigned char)(size_t)tmp_char->data;
662
-
663
- if (tmp_c == c) {
664
- return ext;
665
- }
666
- }
667
- }
668
-
669
- return NULL;
670
- }
671
-
672
- static void process_emphasis(cmark_parser *parser, subject *subj, bufsize_t stack_bottom) {
673
- delimiter *candidate;
674
- delimiter *closer = NULL;
675
- delimiter *opener;
676
- delimiter *old_closer;
677
- bool opener_found;
678
- bufsize_t openers_bottom[3][128];
679
- int i;
680
-
681
- // initialize openers_bottom:
682
- memset(&openers_bottom, 0, sizeof(openers_bottom));
683
- for (i=0; i < 3; i++) {
684
- openers_bottom[i]['*'] = stack_bottom;
685
- openers_bottom[i]['_'] = stack_bottom;
686
- openers_bottom[i]['\''] = stack_bottom;
687
- openers_bottom[i]['"'] = stack_bottom;
688
- }
689
-
690
- // move back to first relevant delim.
691
- candidate = subj->last_delim;
692
- while (candidate != NULL && candidate->position >= stack_bottom) {
693
- closer = candidate;
694
- candidate = candidate->previous;
695
- }
696
-
697
- // now move forward, looking for closers, and handling each
698
- while (closer != NULL) {
699
- cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char);
700
- if (closer->can_close) {
701
- // Now look backwards for first matching opener:
702
- opener = closer->previous;
703
- opener_found = false;
704
- while (opener != NULL && opener->position >= stack_bottom &&
705
- opener->position >= openers_bottom[closer->length % 3][closer->delim_char]) {
706
- if (opener->can_open && opener->delim_char == closer->delim_char) {
707
- // interior closer of size 2 can't match opener of size 1
708
- // or of size 1 can't match 2
709
- if (!(closer->can_open || opener->can_close) ||
710
- closer->length % 3 == 0 ||
711
- (opener->length + closer->length) % 3 != 0) {
712
- opener_found = true;
713
- break;
714
- }
715
- }
716
- opener = opener->previous;
717
- }
718
- old_closer = closer;
719
-
720
- if (extension) {
721
- if (opener_found)
722
- closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer);
723
- else
724
- closer = closer->next;
725
- } else if (closer->delim_char == '*' || closer->delim_char == '_') {
726
- if (opener_found) {
727
- closer = S_insert_emph(subj, opener, closer);
728
- } else {
729
- closer = closer->next;
730
- }
731
- } else if (closer->delim_char == '\'' || closer->delim_char == '"') {
732
- cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
733
- if (closer->delim_char == '\'') {
734
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
735
- } else {
736
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
737
- }
738
- closer = closer->next;
739
- if (opener_found) {
740
- cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
741
- if (old_closer->delim_char == '\'') {
742
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
743
- } else {
744
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
745
- }
746
- remove_delimiter(subj, opener);
747
- remove_delimiter(subj, old_closer);
748
- }
749
- }
750
- if (!opener_found) {
751
- // set lower bound for future searches for openers
752
- openers_bottom[old_closer->length % 3][old_closer->delim_char] =
753
- old_closer->position;
754
- if (!old_closer->can_open) {
755
- // we can remove a closer that can't be an
756
- // opener, once we've seen there's no
757
- // matching opener:
758
- remove_delimiter(subj, old_closer);
759
- }
760
- }
761
- } else {
762
- closer = closer->next;
763
- }
764
- }
765
- // free all delimiters in list until stack_bottom:
766
- while (subj->last_delim != NULL &&
767
- subj->last_delim->position >= stack_bottom) {
768
- remove_delimiter(subj, subj->last_delim);
769
- }
770
- }
771
-
772
- static delimiter *S_insert_emph(subject *subj, delimiter *opener,
773
- delimiter *closer) {
774
- delimiter *delim, *tmp_delim;
775
- bufsize_t use_delims;
776
- cmark_node *opener_inl = opener->inl_text;
777
- cmark_node *closer_inl = closer->inl_text;
778
- bufsize_t opener_num_chars = opener_inl->as.literal.len;
779
- bufsize_t closer_num_chars = closer_inl->as.literal.len;
780
- cmark_node *tmp, *tmpnext, *emph;
781
-
782
- // calculate the actual number of characters used from this closer
783
- use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
784
-
785
- // remove used characters from associated inlines.
786
- opener_num_chars -= use_delims;
787
- closer_num_chars -= use_delims;
788
- opener_inl->as.literal.len = opener_num_chars;
789
- closer_inl->as.literal.len = closer_num_chars;
790
-
791
- // free delimiters between opener and closer
792
- delim = closer->previous;
793
- while (delim != NULL && delim != opener) {
794
- tmp_delim = delim->previous;
795
- remove_delimiter(subj, delim);
796
- delim = tmp_delim;
797
- }
798
-
799
- // create new emph or strong, and splice it in to our inlines
800
- // between the opener and closer
801
- emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
802
-
803
- tmp = opener_inl->next;
804
- while (tmp && tmp != closer_inl) {
805
- tmpnext = tmp->next;
806
- cmark_node_unlink(tmp);
807
- append_child(emph, tmp);
808
- tmp = tmpnext;
809
- }
810
- cmark_node_insert_after(opener_inl, emph);
811
-
812
- emph->start_line = opener_inl->start_line;
813
- emph->end_line = closer_inl->end_line;
814
- emph->start_column = opener_inl->start_column;
815
- emph->end_column = closer_inl->end_column;
816
-
817
- // if opener has 0 characters, remove it and its associated inline
818
- if (opener_num_chars == 0) {
819
- cmark_node_free(opener_inl);
820
- remove_delimiter(subj, opener);
821
- }
822
-
823
- // if closer has 0 characters, remove it and its associated inline
824
- if (closer_num_chars == 0) {
825
- // remove empty closer inline
826
- cmark_node_free(closer_inl);
827
- // remove closer from list
828
- tmp_delim = closer->next;
829
- remove_delimiter(subj, closer);
830
- closer = tmp_delim;
831
- }
832
-
833
- return closer;
834
- }
835
-
836
- // Parse backslash-escape or just a backslash, returning an inline.
837
- static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) {
838
- advance(subj);
839
- unsigned char nextchar = peek_char(subj);
840
- if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) {
841
- // only ascii symbols and newline can be escaped
842
- advance(subj);
843
- return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
844
- } else if (!is_eof(subj) && skip_line_end(subj)) {
845
- return make_linebreak(subj->mem);
846
- } else {
847
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
848
- }
849
- }
850
-
851
- // Parse an entity or a regular "&" string.
852
- // Assumes the subject has an '&' character at the current position.
853
- static cmark_node *handle_entity(subject *subj) {
854
- cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
855
- bufsize_t len;
856
-
857
- advance(subj);
858
-
859
- len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
860
- subj->input.len - subj->pos);
861
-
862
- if (len == 0)
863
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
864
-
865
- subj->pos += len;
866
- return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
867
- }
868
-
869
- // Clean a URL: remove surrounding whitespace, and remove \ that escape
870
- // punctuation.
871
- cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
872
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
873
-
874
- cmark_chunk_trim(url);
875
-
876
- if (url->len == 0) {
877
- cmark_chunk result = CMARK_CHUNK_EMPTY;
878
- return result;
879
- }
880
-
881
- houdini_unescape_html_f(&buf, url->data, url->len);
882
-
883
- cmark_strbuf_unescape(&buf);
884
- return cmark_chunk_buf_detach(&buf);
885
- }
886
-
887
- cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
888
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
889
- unsigned char first, last;
890
-
891
- if (title->len == 0) {
892
- cmark_chunk result = CMARK_CHUNK_EMPTY;
893
- return result;
894
- }
895
-
896
- first = title->data[0];
897
- last = title->data[title->len - 1];
898
-
899
- // remove surrounding quotes if any:
900
- if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
901
- (first == '"' && last == '"')) {
902
- houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
903
- } else {
904
- houdini_unescape_html_f(&buf, title->data, title->len);
905
- }
906
-
907
- cmark_strbuf_unescape(&buf);
908
- return cmark_chunk_buf_detach(&buf);
909
- }
910
-
911
- // Parse an autolink or HTML tag.
912
- // Assumes the subject has a '<' character at the current position.
913
- static cmark_node *handle_pointy_brace(subject *subj, int options) {
914
- bufsize_t matchlen = 0;
915
- cmark_chunk contents;
916
-
917
- advance(subj); // advance past first <
918
-
919
- // first try to match a URL autolink
920
- matchlen = scan_autolink_uri(&subj->input, subj->pos);
921
- if (matchlen > 0) {
922
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
923
- subj->pos += matchlen;
924
-
925
- return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
926
- }
927
-
928
- // next try to match an email autolink
929
- matchlen = scan_autolink_email(&subj->input, subj->pos);
930
- if (matchlen > 0) {
931
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
932
- subj->pos += matchlen;
933
-
934
- return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
935
- }
936
-
937
- // finally, try to match an html tag
938
- if (subj->pos + 2 <= subj->input.len) {
939
- int c = subj->input.data[subj->pos];
940
- if (c == '!' && (subj->flags & FLAG_SKIP_HTML_COMMENT) == 0) {
941
- c = subj->input.data[subj->pos+1];
942
- if (c == '-' && subj->input.data[subj->pos+2] == '-') {
943
- if (subj->input.data[subj->pos+3] == '>') {
944
- matchlen = 4;
945
- } else if (subj->input.data[subj->pos+3] == '-' &&
946
- subj->input.data[subj->pos+4] == '>') {
947
- matchlen = 5;
948
- } else {
949
- matchlen = scan_html_comment(&subj->input, subj->pos + 1);
950
- if (matchlen > 0) {
951
- matchlen += 1; // prefix "<"
952
- } else { // no match through end of input: set a flag so
953
- // we don't reparse looking for -->:
954
- subj->flags |= FLAG_SKIP_HTML_COMMENT;
955
- }
956
- }
957
- } else if (c == '[') {
958
- if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
959
- matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
960
- if (matchlen > 0) {
961
- // The regex doesn't require the final "]]>". But if we're not at
962
- // the end of input, it must come after the match. Otherwise,
963
- // disable subsequent scans to avoid quadratic behavior.
964
- matchlen += 5; // prefix "![", suffix "]]>"
965
- if (subj->pos + matchlen > subj->input.len) {
966
- subj->flags |= FLAG_SKIP_HTML_CDATA;
967
- matchlen = 0;
968
- }
969
- }
970
- }
971
- } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
972
- matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
973
- if (matchlen > 0) {
974
- matchlen += 2; // prefix "!", suffix ">"
975
- if (subj->pos + matchlen > subj->input.len) {
976
- subj->flags |= FLAG_SKIP_HTML_DECLARATION;
977
- matchlen = 0;
978
- }
979
- }
980
- }
981
- } else if (c == '?') {
982
- if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
983
- // Note that we allow an empty match.
984
- matchlen = scan_html_pi(&subj->input, subj->pos + 1);
985
- matchlen += 3; // prefix "?", suffix "?>"
986
- if (subj->pos + matchlen > subj->input.len) {
987
- subj->flags |= FLAG_SKIP_HTML_PI;
988
- matchlen = 0;
989
- }
990
- }
991
- } else {
992
- matchlen = scan_html_tag(&subj->input, subj->pos);
993
- }
994
- }
995
- if (matchlen > 0) {
996
- contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
997
- subj->pos += matchlen;
998
- cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
999
- adjust_subj_node_newlines(subj, node, matchlen, 1, options);
1000
- return node;
1001
- }
1002
-
1003
- if (options & CMARK_OPT_LIBERAL_HTML_TAG) {
1004
- matchlen = scan_liberal_html_tag(&subj->input, subj->pos);
1005
- if (matchlen > 0) {
1006
- contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
1007
- subj->pos += matchlen;
1008
- cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
1009
- adjust_subj_node_newlines(subj, node, matchlen, 1, options);
1010
- return node;
1011
- }
1012
- }
1013
-
1014
- // if nothing matches, just return the opening <:
1015
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
1016
- }
1017
-
1018
- // Parse a link label. Returns 1 if successful.
1019
- // Note: unescaped brackets are not allowed in labels.
1020
- // The label begins with `[` and ends with the first `]` character
1021
- // encountered. Backticks in labels do not start code spans.
1022
- static int link_label(subject *subj, cmark_chunk *raw_label) {
1023
- bufsize_t startpos = subj->pos;
1024
- int length = 0;
1025
- unsigned char c;
1026
-
1027
- // advance past [
1028
- if (peek_char(subj) == '[') {
1029
- advance(subj);
1030
- } else {
1031
- return 0;
1032
- }
1033
-
1034
- while ((c = peek_char(subj)) && c != '[' && c != ']') {
1035
- if (c == '\\') {
1036
- advance(subj);
1037
- length++;
1038
- if (cmark_ispunct(peek_char(subj))) {
1039
- advance(subj);
1040
- length++;
1041
- }
1042
- } else {
1043
- advance(subj);
1044
- length++;
1045
- }
1046
- if (length > MAX_LINK_LABEL_LENGTH) {
1047
- goto noMatch;
1048
- }
1049
- }
1050
-
1051
- if (c == ']') { // match found
1052
- *raw_label =
1053
- cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
1054
- cmark_chunk_trim(raw_label);
1055
- advance(subj); // advance past ]
1056
- return 1;
1057
- }
1058
-
1059
- noMatch:
1060
- subj->pos = startpos; // rewind
1061
- return 0;
1062
- }
1063
-
1064
- static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
1065
- cmark_chunk *output) {
1066
- bufsize_t i = offset;
1067
- size_t nb_p = 0;
1068
-
1069
- while (i < input->len) {
1070
- if (input->data[i] == '\\' &&
1071
- i + 1 < input-> len &&
1072
- cmark_ispunct(input->data[i+1]))
1073
- i += 2;
1074
- else if (input->data[i] == '(') {
1075
- ++nb_p;
1076
- ++i;
1077
- if (nb_p > 32)
1078
- return -1;
1079
- } else if (input->data[i] == ')') {
1080
- if (nb_p == 0)
1081
- break;
1082
- --nb_p;
1083
- ++i;
1084
- } else if (cmark_isspace(input->data[i])) {
1085
- if (i == offset) {
1086
- return -1;
1087
- }
1088
- break;
1089
- } else {
1090
- ++i;
1091
- }
1092
- }
1093
-
1094
- if (i >= input->len)
1095
- return -1;
1096
-
1097
- {
1098
- cmark_chunk result = {input->data + offset, i - offset, 0};
1099
- *output = result;
1100
- }
1101
- return i - offset;
1102
- }
1103
-
1104
- static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
1105
- cmark_chunk *output) {
1106
- bufsize_t i = offset;
1107
-
1108
- if (i < input->len && input->data[i] == '<') {
1109
- ++i;
1110
- while (i < input->len) {
1111
- if (input->data[i] == '>') {
1112
- ++i;
1113
- break;
1114
- } else if (input->data[i] == '\\')
1115
- i += 2;
1116
- else if (input->data[i] == '\n' || input->data[i] == '<')
1117
- return -1;
1118
- else
1119
- ++i;
1120
- }
1121
- } else {
1122
- return manual_scan_link_url_2(input, offset, output);
1123
- }
1124
-
1125
- if (i >= input->len)
1126
- return -1;
1127
-
1128
- {
1129
- cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
1130
- *output = result;
1131
- }
1132
- return i - offset;
1133
- }
1134
-
1135
- // Return a link, an image, or a literal close bracket.
1136
- static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
1137
- bufsize_t initial_pos, after_link_text_pos;
1138
- bufsize_t endurl, starttitle, endtitle, endall;
1139
- bufsize_t sps, n;
1140
- cmark_reference *ref = NULL;
1141
- cmark_chunk url_chunk, title_chunk;
1142
- cmark_chunk url, title;
1143
- bracket *opener;
1144
- cmark_node *inl;
1145
- cmark_chunk raw_label;
1146
- int found_label;
1147
- cmark_node *tmp, *tmpnext;
1148
- bool is_image;
1149
-
1150
- advance(subj); // advance past ]
1151
- initial_pos = subj->pos;
1152
-
1153
- // get last [ or ![
1154
- opener = subj->last_bracket;
1155
-
1156
- if (opener == NULL) {
1157
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1158
- }
1159
-
1160
- // If we got here, we matched a potential link/image text.
1161
- // Now we check to see if it's a link/image.
1162
- is_image = opener->image;
1163
-
1164
- if (!is_image && subj->no_link_openers) {
1165
- // take delimiter off stack
1166
- pop_bracket(subj);
1167
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1168
- }
1169
-
1170
- after_link_text_pos = subj->pos;
1171
-
1172
- // First, look for an inline link.
1173
- if (peek_char(subj) == '(' &&
1174
- ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
1175
- ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
1176
- &url_chunk)) > -1)) {
1177
-
1178
- // try to parse an explicit link:
1179
- endurl = subj->pos + 1 + sps + n;
1180
- starttitle = endurl + scan_spacechars(&subj->input, endurl);
1181
-
1182
- // ensure there are spaces btw url and title
1183
- endtitle = (starttitle == endurl)
1184
- ? starttitle
1185
- : starttitle + scan_link_title(&subj->input, starttitle);
1186
-
1187
- endall = endtitle + scan_spacechars(&subj->input, endtitle);
1188
-
1189
- if (peek_at(subj, endall) == ')') {
1190
- subj->pos = endall + 1;
1191
-
1192
- title_chunk =
1193
- cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
1194
- url = cmark_clean_url(subj->mem, &url_chunk);
1195
- title = cmark_clean_title(subj->mem, &title_chunk);
1196
- cmark_chunk_free(subj->mem, &url_chunk);
1197
- cmark_chunk_free(subj->mem, &title_chunk);
1198
- goto match;
1199
-
1200
- } else {
1201
- // it could still be a shortcut reference link
1202
- subj->pos = after_link_text_pos;
1203
- }
1204
- }
1205
-
1206
- // Next, look for a following [link label] that matches in refmap.
1207
- // skip spaces
1208
- raw_label = cmark_chunk_literal("");
1209
- found_label = link_label(subj, &raw_label);
1210
- if (!found_label) {
1211
- // If we have a shortcut reference link, back up
1212
- // to before the spacse we skipped.
1213
- subj->pos = initial_pos;
1214
- }
1215
-
1216
- if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
1217
- cmark_chunk_free(subj->mem, &raw_label);
1218
- raw_label = cmark_chunk_dup(&subj->input, opener->position,
1219
- initial_pos - opener->position - 1);
1220
- found_label = true;
1221
- }
1222
-
1223
- if (found_label) {
1224
- ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label);
1225
- cmark_chunk_free(subj->mem, &raw_label);
1226
- }
1227
-
1228
- if (ref != NULL) { // found
1229
- url = chunk_clone(subj->mem, &ref->url);
1230
- title = chunk_clone(subj->mem, &ref->title);
1231
- goto match;
1232
- } else {
1233
- goto noMatch;
1234
- }
1235
-
1236
- noMatch:
1237
- // If we fall through to here, it means we didn't match a link.
1238
- // What if we're a footnote link?
1239
- if (parser->options & CMARK_OPT_FOOTNOTES &&
1240
- opener->inl_text->next &&
1241
- opener->inl_text->next->type == CMARK_NODE_TEXT) {
1242
-
1243
- cmark_chunk *literal = &opener->inl_text->next->as.literal;
1244
-
1245
- // look back to the opening '[', and skip ahead to the next character
1246
- // if we're looking at a '[^' sequence, and there is other text or nodes
1247
- // after the ^, let's call it a footnote reference.
1248
- if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1249
-
1250
- // Before we got this far, the `handle_close_bracket` function may have
1251
- // advanced the current state beyond our footnote's actual closing
1252
- // bracket, ie if it went looking for a `link_label`.
1253
- // Let's just rewind the subject's position:
1254
- subj->pos = initial_pos;
1255
-
1256
- cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1257
-
1258
- // the start and end of the footnote ref is the opening and closing brace
1259
- // i.e. the subject's current position, and the opener's start_column
1260
- int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1261
- int fnref_start_column = opener->inl_text->start_column;
1262
-
1263
- // any given node delineates a substring of the line being processed,
1264
- // with the remainder of the line being pointed to thru its 'literal'
1265
- // struct member.
1266
- // here, we copy the literal's pointer, moving it past the '^' character
1267
- // for a length equal to the size of footnote reference text.
1268
- // i.e. end_col minus start_col, minus the [ and the ^ characters
1269
- //
1270
- // this copies the footnote reference string, even if between the
1271
- // `opener` and the subject's current position there are other nodes
1272
- //
1273
- // (first, check for underflows)
1274
- if ((fnref_start_column + 2) <= fnref_end_column) {
1275
- fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1276
- } else {
1277
- fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1278
- }
1279
-
1280
- fnref->start_line = fnref->end_line = subj->line;
1281
- fnref->start_column = fnref_start_column;
1282
- fnref->end_column = fnref_end_column;
1283
-
1284
- // we then replace the opener with this new fnref node, the net effect
1285
- // being replacing the opening '[' text node with a `^footnote-ref]` node.
1286
- cmark_node_insert_before(opener->inl_text, fnref);
1287
-
1288
- process_emphasis(parser, subj, opener->position);
1289
- // sometimes, the footnote reference text gets parsed into multiple nodes
1290
- // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1291
- // this happens for ex with the autolink extension. when the autolinker
1292
- // finds the 'w' character, it will split the text into multiple nodes
1293
- // in hopes of being able to match a 'www.' substring.
1294
- //
1295
- // because this function is called one character at a time via the
1296
- // `parse_inlines` function, and the current subj->pos is pointing at the
1297
- // closing ] brace, and because we copy all the text between the [ ]
1298
- // braces, we should be able to safely ignore and delete any nodes after
1299
- // the opener->inl_text->next.
1300
- //
1301
- // therefore, here we walk thru the list and free them all up
1302
- cmark_node *next_node;
1303
- cmark_node *current_node = opener->inl_text->next;
1304
- while(current_node) {
1305
- next_node = current_node->next;
1306
- cmark_node_free(current_node);
1307
- current_node = next_node;
1308
- }
1309
-
1310
- cmark_node_free(opener->inl_text);
1311
-
1312
- pop_bracket(subj);
1313
- return NULL;
1314
- }
1315
- }
1316
-
1317
- pop_bracket(subj); // remove this opener from delimiter list
1318
- subj->pos = initial_pos;
1319
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1320
-
1321
- match:
1322
- inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1323
- inl->as.link.url = url;
1324
- inl->as.link.title = title;
1325
- inl->start_line = inl->end_line = subj->line;
1326
- inl->start_column = opener->inl_text->start_column;
1327
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1328
- cmark_node_insert_before(opener->inl_text, inl);
1329
- // Add link text:
1330
- tmp = opener->inl_text->next;
1331
- while (tmp) {
1332
- tmpnext = tmp->next;
1333
- cmark_node_unlink(tmp);
1334
- append_child(inl, tmp);
1335
- tmp = tmpnext;
1336
- }
1337
-
1338
- // Free the bracket [:
1339
- cmark_node_free(opener->inl_text);
1340
-
1341
- process_emphasis(parser, subj, opener->position);
1342
- pop_bracket(subj);
1343
-
1344
- // Now, if we have a link, we also want to deactivate links until
1345
- // we get a new opener. (This code can be removed if we decide to allow links
1346
- // inside links.)
1347
- if (!is_image) {
1348
- subj->no_link_openers = true;
1349
- }
1350
-
1351
- return NULL;
1352
- }
1353
-
1354
- // Parse a hard or soft linebreak, returning an inline.
1355
- // Assumes the subject has a cr or newline at the current position.
1356
- static cmark_node *handle_newline(subject *subj) {
1357
- bufsize_t nlpos = subj->pos;
1358
- // skip over cr, crlf, or lf:
1359
- if (peek_at(subj, subj->pos) == '\r') {
1360
- advance(subj);
1361
- }
1362
- if (peek_at(subj, subj->pos) == '\n') {
1363
- advance(subj);
1364
- }
1365
- ++subj->line;
1366
- subj->column_offset = -subj->pos;
1367
- // skip spaces at beginning of line
1368
- skip_spaces(subj);
1369
- if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
1370
- peek_at(subj, nlpos - 2) == ' ') {
1371
- return make_linebreak(subj->mem);
1372
- } else {
1373
- return make_softbreak(subj->mem);
1374
- }
1375
- }
1376
-
1377
- // "\r\n\\`&_*[]<!"
1378
- static int8_t SPECIAL_CHARS[256] = {
1379
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1380
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1381
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1382
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1383
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1384
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1385
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1386
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1387
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1388
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1389
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1390
-
1391
- // " ' . -
1392
- static char SMART_PUNCT_CHARS[] = {
1393
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1394
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1395
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1396
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1397
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1398
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1399
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1400
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1401
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1402
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1403
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1404
- };
1405
-
1406
- static bufsize_t subject_find_special_char(subject *subj, int options) {
1407
- bufsize_t n = subj->pos + 1;
1408
-
1409
- while (n < subj->input.len) {
1410
- if (SPECIAL_CHARS[subj->input.data[n]])
1411
- return n;
1412
- if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1413
- return n;
1414
- n++;
1415
- }
1416
-
1417
- return subj->input.len;
1418
- }
1419
-
1420
- void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
1421
- SPECIAL_CHARS[c] = 1;
1422
- if (emphasis)
1423
- SKIP_CHARS[c] = 1;
1424
- }
1425
-
1426
- void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
1427
- SPECIAL_CHARS[c] = 0;
1428
- if (emphasis)
1429
- SKIP_CHARS[c] = 0;
1430
- }
1431
-
1432
- static cmark_node *try_extensions(cmark_parser *parser,
1433
- cmark_node *parent,
1434
- unsigned char c,
1435
- subject *subj) {
1436
- cmark_node *res = NULL;
1437
- cmark_llist *tmp;
1438
-
1439
- for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) {
1440
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
1441
- res = ext->match_inline(ext, parser, parent, c, subj);
1442
-
1443
- if (res)
1444
- break;
1445
- }
1446
-
1447
- return res;
1448
- }
1449
-
1450
- // Parse an inline, advancing subject, and add it as a child of parent.
1451
- // Return 0 if no inline can be parsed, 1 otherwise.
1452
- static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) {
1453
- cmark_node *new_inl = NULL;
1454
- cmark_chunk contents;
1455
- unsigned char c;
1456
- bufsize_t startpos, endpos;
1457
- c = peek_char(subj);
1458
- if (c == 0) {
1459
- return 0;
1460
- }
1461
- switch (c) {
1462
- case '\r':
1463
- case '\n':
1464
- new_inl = handle_newline(subj);
1465
- break;
1466
- case '`':
1467
- new_inl = handle_backticks(subj, options);
1468
- break;
1469
- case '\\':
1470
- new_inl = handle_backslash(parser, subj);
1471
- break;
1472
- case '&':
1473
- new_inl = handle_entity(subj);
1474
- break;
1475
- case '<':
1476
- new_inl = handle_pointy_brace(subj, options);
1477
- break;
1478
- case '*':
1479
- case '_':
1480
- case '\'':
1481
- case '"':
1482
- new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1483
- break;
1484
- case '-':
1485
- new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
1486
- break;
1487
- case '.':
1488
- new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
1489
- break;
1490
- case '[':
1491
- advance(subj);
1492
- new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
1493
- push_bracket(subj, false, new_inl);
1494
- break;
1495
- case ']':
1496
- new_inl = handle_close_bracket(parser, subj);
1497
- break;
1498
- case '!':
1499
- advance(subj);
1500
- if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') {
1501
- advance(subj);
1502
- new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
1503
- push_bracket(subj, true, new_inl);
1504
- } else {
1505
- new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
1506
- }
1507
- break;
1508
- default:
1509
- new_inl = try_extensions(parser, parent, c, subj);
1510
- if (new_inl != NULL)
1511
- break;
1512
-
1513
- endpos = subject_find_special_char(subj, options);
1514
- contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1515
- startpos = subj->pos;
1516
- subj->pos = endpos;
1517
-
1518
- // if we're at a newline, strip trailing spaces.
1519
- if (S_is_line_end_char(peek_char(subj))) {
1520
- cmark_chunk_rtrim(&contents);
1521
- }
1522
-
1523
- new_inl = make_str(subj, startpos, endpos - 1, contents);
1524
- }
1525
- if (new_inl != NULL) {
1526
- append_child(parent, new_inl);
1527
- }
1528
-
1529
- return 1;
1530
- }
1531
-
1532
- // Parse inlines from parent's string_content, adding as children of parent.
1533
- void cmark_parse_inlines(cmark_parser *parser,
1534
- cmark_node *parent,
1535
- cmark_map *refmap,
1536
- int options) {
1537
- subject subj;
1538
- cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
1539
- subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
1540
- cmark_chunk_rtrim(&subj.input);
1541
-
1542
- while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
1543
- ;
1544
-
1545
- process_emphasis(parser, &subj, 0);
1546
- // free bracket and delim stack
1547
- while (subj.last_delim) {
1548
- remove_delimiter(&subj, subj.last_delim);
1549
- }
1550
- while (subj.last_bracket) {
1551
- pop_bracket(&subj);
1552
- }
1553
- }
1554
-
1555
- // Parse zero or more space characters, including at most one newline.
1556
- static void spnl(subject *subj) {
1557
- skip_spaces(subj);
1558
- if (skip_line_end(subj)) {
1559
- skip_spaces(subj);
1560
- }
1561
- }
1562
-
1563
- // Parse reference. Assumes string begins with '[' character.
1564
- // Modify refmap if a reference is encountered.
1565
- // Return 0 if no reference found, otherwise position of subject
1566
- // after reference is parsed.
1567
- bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1568
- cmark_map *refmap) {
1569
- subject subj;
1570
-
1571
- cmark_chunk lab;
1572
- cmark_chunk url;
1573
- cmark_chunk title;
1574
-
1575
- bufsize_t matchlen = 0;
1576
- bufsize_t beforetitle;
1577
-
1578
- subject_from_buf(mem, -1, 0, &subj, input, NULL);
1579
-
1580
- // parse label:
1581
- if (!link_label(&subj, &lab) || lab.len == 0)
1582
- return 0;
1583
-
1584
- // colon:
1585
- if (peek_char(&subj) == ':') {
1586
- advance(&subj);
1587
- } else {
1588
- return 0;
1589
- }
1590
-
1591
- // parse link url:
1592
- spnl(&subj);
1593
- if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1594
- subj.pos += matchlen;
1595
- } else {
1596
- return 0;
1597
- }
1598
-
1599
- // parse optional link_title
1600
- beforetitle = subj.pos;
1601
- spnl(&subj);
1602
- matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
1603
- if (matchlen) {
1604
- title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1605
- subj.pos += matchlen;
1606
- } else {
1607
- subj.pos = beforetitle;
1608
- title = cmark_chunk_literal("");
1609
- }
1610
-
1611
- // parse final spaces and newline:
1612
- skip_spaces(&subj);
1613
- if (!skip_line_end(&subj)) {
1614
- if (matchlen) { // try rewinding before title
1615
- subj.pos = beforetitle;
1616
- skip_spaces(&subj);
1617
- if (!skip_line_end(&subj)) {
1618
- return 0;
1619
- }
1620
- } else {
1621
- return 0;
1622
- }
1623
- }
1624
- // insert reference into refmap
1625
- cmark_reference_create(refmap, &lab, &url, &title);
1626
- return subj.pos;
1627
- }
1628
-
1629
- unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) {
1630
- return peek_char(parser);
1631
- }
1632
-
1633
- unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) {
1634
- return peek_at(parser, pos);
1635
- }
1636
-
1637
- int cmark_inline_parser_is_eof(cmark_inline_parser *parser) {
1638
- return is_eof(parser);
1639
- }
1640
-
1641
- static char *
1642
- my_strndup (const char *s, size_t n)
1643
- {
1644
- char *result;
1645
- size_t len = strlen (s);
1646
-
1647
- if (n < len)
1648
- len = n;
1649
-
1650
- result = (char *) malloc (len + 1);
1651
- if (!result)
1652
- return 0;
1653
-
1654
- result[len] = '\0';
1655
- return (char *) memcpy (result, s, len);
1656
- }
1657
-
1658
- char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) {
1659
- unsigned char c;
1660
- bufsize_t startpos = parser->pos;
1661
- bufsize_t len = 0;
1662
-
1663
- while ((c = peek_char(parser)) && (*pred)(c)) {
1664
- advance(parser);
1665
- len++;
1666
- }
1667
-
1668
- return my_strndup((const char *) parser->input.data + startpos, len);
1669
- }
1670
-
1671
- void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser,
1672
- unsigned char c,
1673
- int can_open,
1674
- int can_close,
1675
- cmark_node *inl_text) {
1676
- push_delimiter(parser, c, can_open != 0, can_close != 0, inl_text);
1677
- }
1678
-
1679
- void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) {
1680
- remove_delimiter(parser, delim);
1681
- }
1682
-
1683
- int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
1684
- int max_delims,
1685
- unsigned char c,
1686
- int *left_flanking,
1687
- int *right_flanking,
1688
- int *punct_before,
1689
- int *punct_after) {
1690
- int numdelims = 0;
1691
- bufsize_t before_char_pos;
1692
- int32_t after_char = 0;
1693
- int32_t before_char = 0;
1694
- int len;
1695
- bool space_before, space_after;
1696
-
1697
- if (parser->pos == 0) {
1698
- before_char = 10;
1699
- } else {
1700
- before_char_pos = parser->pos - 1;
1701
- // walk back to the beginning of the UTF_8 sequence:
1702
- while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
1703
- before_char_pos -= 1;
1704
- }
1705
- len = cmark_utf8proc_iterate(parser->input.data + before_char_pos,
1706
- parser->pos - before_char_pos, &before_char);
1707
- if (len == -1) {
1708
- before_char = 10;
1709
- }
1710
- }
1711
-
1712
- while (peek_char(parser) == c && numdelims < max_delims) {
1713
- numdelims++;
1714
- advance(parser);
1715
- }
1716
-
1717
- len = cmark_utf8proc_iterate(parser->input.data + parser->pos,
1718
- parser->input.len - parser->pos, &after_char);
1719
- if (len == -1) {
1720
- after_char = 10;
1721
- }
1722
-
1723
- *punct_before = cmark_utf8proc_is_punctuation(before_char);
1724
- *punct_after = cmark_utf8proc_is_punctuation(after_char);
1725
- space_before = cmark_utf8proc_is_space(before_char) != 0;
1726
- space_after = cmark_utf8proc_is_space(after_char) != 0;
1727
-
1728
- *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
1729
- !(*punct_after && !space_before && !*punct_before);
1730
- *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
1731
- !(*punct_before && !space_after && !*punct_after);
1732
-
1733
- return numdelims;
1734
- }
1735
-
1736
- void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) {
1737
- advance(parser);
1738
- }
1739
-
1740
- int cmark_inline_parser_get_offset(cmark_inline_parser *parser) {
1741
- return parser->pos;
1742
- }
1743
-
1744
- void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) {
1745
- parser->pos = offset;
1746
- }
1747
-
1748
- int cmark_inline_parser_get_column(cmark_inline_parser *parser) {
1749
- return parser->pos + 1 + parser->column_offset + parser->block_offset;
1750
- }
1751
-
1752
- cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) {
1753
- return &parser->input;
1754
- }
1755
-
1756
- int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) {
1757
- bracket *b = parser->last_bracket;
1758
- if (!b) {
1759
- return 0;
1760
- }
1761
- if (image != 0) {
1762
- return b->in_bracket_image1;
1763
- } else {
1764
- return b->in_bracket_image0;
1765
- }
1766
- }
1767
-
1768
- void cmark_node_unput(cmark_node *node, int n) {
1769
- node = node->last_child;
1770
- while (n > 0 && node && node->type == CMARK_NODE_TEXT) {
1771
- if (node->as.literal.len < n) {
1772
- n -= node->as.literal.len;
1773
- node->as.literal.len = 0;
1774
- } else {
1775
- node->as.literal.len -= n;
1776
- n = 0;
1777
- }
1778
- node = node->prev;
1779
- }
1780
- }
1781
-
1782
- delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) {
1783
- return parser->last_delim;
1784
- }
1785
-
1786
- int cmark_inline_parser_get_line(cmark_inline_parser *parser) {
1787
- return parser->line;
1788
- }