commonmarker 0.23.10 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +1221 -0
  3. data/Cargo.toml +7 -0
  4. data/README.md +233 -172
  5. data/ext/commonmarker/Cargo.toml +20 -0
  6. data/ext/commonmarker/extconf.rb +3 -6
  7. data/ext/commonmarker/src/lib.rs +103 -0
  8. data/ext/commonmarker/src/node.rs +1160 -0
  9. data/ext/commonmarker/src/options.rs +216 -0
  10. data/ext/commonmarker/src/plugins/syntax_highlighting.rs +166 -0
  11. data/ext/commonmarker/src/plugins.rs +6 -0
  12. data/ext/commonmarker/src/utils.rs +8 -0
  13. data/lib/commonmarker/config.rb +91 -40
  14. data/lib/commonmarker/constants.rb +7 -0
  15. data/lib/commonmarker/extension.rb +14 -0
  16. data/lib/commonmarker/node/ast.rb +8 -0
  17. data/lib/commonmarker/node/inspect.rb +14 -4
  18. data/lib/commonmarker/node.rb +29 -47
  19. data/lib/commonmarker/renderer.rb +1 -127
  20. data/lib/commonmarker/utils.rb +22 -0
  21. data/lib/commonmarker/version.rb +2 -2
  22. data/lib/commonmarker.rb +27 -25
  23. metadata +38 -186
  24. data/Rakefile +0 -109
  25. data/bin/commonmarker +0 -118
  26. data/commonmarker.gemspec +0 -38
  27. data/ext/commonmarker/arena.c +0 -104
  28. data/ext/commonmarker/autolink.c +0 -508
  29. data/ext/commonmarker/autolink.h +0 -8
  30. data/ext/commonmarker/blocks.c +0 -1622
  31. data/ext/commonmarker/buffer.c +0 -278
  32. data/ext/commonmarker/buffer.h +0 -116
  33. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  34. data/ext/commonmarker/chunk.h +0 -135
  35. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  36. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -737
  37. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  38. data/ext/commonmarker/cmark-gfm.h +0 -833
  39. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  40. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  41. data/ext/commonmarker/cmark.c +0 -55
  42. data/ext/commonmarker/cmark_ctype.c +0 -44
  43. data/ext/commonmarker/cmark_ctype.h +0 -33
  44. data/ext/commonmarker/commonmark.c +0 -514
  45. data/ext/commonmarker/commonmarker.c +0 -1308
  46. data/ext/commonmarker/commonmarker.h +0 -16
  47. data/ext/commonmarker/config.h +0 -76
  48. data/ext/commonmarker/core-extensions.c +0 -27
  49. data/ext/commonmarker/entities.inc +0 -2138
  50. data/ext/commonmarker/ext_scanners.c +0 -879
  51. data/ext/commonmarker/ext_scanners.h +0 -24
  52. data/ext/commonmarker/footnotes.c +0 -63
  53. data/ext/commonmarker/footnotes.h +0 -27
  54. data/ext/commonmarker/houdini.h +0 -57
  55. data/ext/commonmarker/houdini_href_e.c +0 -100
  56. data/ext/commonmarker/houdini_html_e.c +0 -66
  57. data/ext/commonmarker/houdini_html_u.c +0 -149
  58. data/ext/commonmarker/html.c +0 -502
  59. data/ext/commonmarker/html.h +0 -27
  60. data/ext/commonmarker/inlines.c +0 -1788
  61. data/ext/commonmarker/inlines.h +0 -29
  62. data/ext/commonmarker/iterator.c +0 -159
  63. data/ext/commonmarker/iterator.h +0 -26
  64. data/ext/commonmarker/latex.c +0 -468
  65. data/ext/commonmarker/linked_list.c +0 -37
  66. data/ext/commonmarker/man.c +0 -274
  67. data/ext/commonmarker/map.c +0 -129
  68. data/ext/commonmarker/map.h +0 -44
  69. data/ext/commonmarker/node.c +0 -1045
  70. data/ext/commonmarker/node.h +0 -167
  71. data/ext/commonmarker/parser.h +0 -59
  72. data/ext/commonmarker/plaintext.c +0 -218
  73. data/ext/commonmarker/plugin.c +0 -36
  74. data/ext/commonmarker/plugin.h +0 -34
  75. data/ext/commonmarker/references.c +0 -43
  76. data/ext/commonmarker/references.h +0 -26
  77. data/ext/commonmarker/registry.c +0 -63
  78. data/ext/commonmarker/registry.h +0 -24
  79. data/ext/commonmarker/render.c +0 -213
  80. data/ext/commonmarker/render.h +0 -62
  81. data/ext/commonmarker/scanners.c +0 -14056
  82. data/ext/commonmarker/scanners.h +0 -70
  83. data/ext/commonmarker/scanners.re +0 -341
  84. data/ext/commonmarker/strikethrough.c +0 -167
  85. data/ext/commonmarker/strikethrough.h +0 -9
  86. data/ext/commonmarker/syntax_extension.c +0 -149
  87. data/ext/commonmarker/syntax_extension.h +0 -34
  88. data/ext/commonmarker/table.c +0 -917
  89. data/ext/commonmarker/table.h +0 -12
  90. data/ext/commonmarker/tagfilter.c +0 -60
  91. data/ext/commonmarker/tagfilter.h +0 -8
  92. data/ext/commonmarker/tasklist.c +0 -156
  93. data/ext/commonmarker/tasklist.h +0 -8
  94. data/ext/commonmarker/utf8.c +0 -317
  95. data/ext/commonmarker/utf8.h +0 -35
  96. data/ext/commonmarker/xml.c +0 -182
  97. data/lib/commonmarker/renderer/html_renderer.rb +0 -256
@@ -1,1788 +0,0 @@
1
- #include <stdlib.h>
2
- #include <string.h>
3
- #include <stdio.h>
4
-
5
- #include "cmark_ctype.h"
6
- #include "config.h"
7
- #include "node.h"
8
- #include "parser.h"
9
- #include "references.h"
10
- #include "cmark-gfm.h"
11
- #include "houdini.h"
12
- #include "utf8.h"
13
- #include "scanners.h"
14
- #include "inlines.h"
15
- #include "syntax_extension.h"
16
-
17
- static const char *EMDASH = "\xE2\x80\x94";
18
- static const char *ENDASH = "\xE2\x80\x93";
19
- static const char *ELLIPSES = "\xE2\x80\xA6";
20
- static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
21
- static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
22
- static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
23
- static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
24
-
25
- // Macros for creating various kinds of simple.
26
- #define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
27
- #define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
28
- #define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
29
- #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
30
- #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
31
- #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
32
- #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
33
-
34
- #define MAXBACKTICKS 80
35
-
36
- typedef struct bracket {
37
- struct bracket *previous;
38
- cmark_node *inl_text;
39
- bufsize_t position;
40
- bool image;
41
- bool active;
42
- bool bracket_after;
43
- bool in_bracket_image0;
44
- bool in_bracket_image1;
45
- } bracket;
46
-
47
- #define FLAG_SKIP_HTML_CDATA (1u << 0)
48
- #define FLAG_SKIP_HTML_DECLARATION (1u << 1)
49
- #define FLAG_SKIP_HTML_PI (1u << 2)
50
- #define FLAG_SKIP_HTML_COMMENT (1u << 3)
51
-
52
- typedef struct subject{
53
- cmark_mem *mem;
54
- cmark_chunk input;
55
- unsigned flags;
56
- int line;
57
- bufsize_t pos;
58
- int block_offset;
59
- int column_offset;
60
- cmark_map *refmap;
61
- delimiter *last_delim;
62
- bracket *last_bracket;
63
- bufsize_t backticks[MAXBACKTICKS + 1];
64
- bool scanned_for_backticks;
65
- bool no_link_openers;
66
- } subject;
67
-
68
- // Extensions may populate this.
69
- static int8_t SKIP_CHARS[256];
70
-
71
- static CMARK_INLINE bool S_is_line_end_char(char c) {
72
- return (c == '\n' || c == '\r');
73
- }
74
-
75
- static delimiter *S_insert_emph(subject *subj, delimiter *opener,
76
- delimiter *closer);
77
-
78
- static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options);
79
-
80
- static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
81
- cmark_chunk *buffer, cmark_map *refmap);
82
- static bufsize_t subject_find_special_char(subject *subj, int options);
83
-
84
- // Create an inline with a literal string value.
85
- static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
86
- int start_column, int end_column,
87
- cmark_chunk s) {
88
- cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
89
- cmark_strbuf_init(subj->mem, &e->content, 0);
90
- e->type = (uint16_t)t;
91
- e->as.literal = s;
92
- e->start_line = e->end_line = subj->line;
93
- // columns are 1 based.
94
- e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
95
- e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
96
- return e;
97
- }
98
-
99
- // Create an inline with no value.
100
- static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
101
- cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
102
- cmark_strbuf_init(mem, &e->content, 0);
103
- e->type = (uint16_t)t;
104
- return e;
105
- }
106
-
107
- // Like make_str, but parses entities.
108
- static cmark_node *make_str_with_entities(subject *subj,
109
- int start_column, int end_column,
110
- cmark_chunk *content) {
111
- cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
112
-
113
- if (houdini_unescape_html(&unescaped, content->data, content->len)) {
114
- return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
115
- } else {
116
- return make_str(subj, start_column, end_column, *content);
117
- }
118
- }
119
-
120
- // Like cmark_node_append_child but without costly sanity checks.
121
- // Assumes that child was newly created.
122
- static void append_child(cmark_node *node, cmark_node *child) {
123
- cmark_node *old_last_child = node->last_child;
124
-
125
- child->next = NULL;
126
- child->prev = old_last_child;
127
- child->parent = node;
128
- node->last_child = child;
129
-
130
- if (old_last_child) {
131
- old_last_child->next = child;
132
- } else {
133
- // Also set first_child if node previously had no children.
134
- node->first_child = child;
135
- }
136
- }
137
-
138
- // Duplicate a chunk by creating a copy of the buffer not by reusing the
139
- // buffer like cmark_chunk_dup does.
140
- static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
141
- cmark_chunk c;
142
- bufsize_t len = src->len;
143
-
144
- c.len = len;
145
- c.data = (unsigned char *)mem->calloc(len + 1, 1);
146
- c.alloc = 1;
147
- if (len)
148
- memcpy(c.data, src->data, len);
149
- c.data[len] = '\0';
150
-
151
- return c;
152
- }
153
-
154
- static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
155
- int is_email) {
156
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
157
-
158
- cmark_chunk_trim(url);
159
-
160
- if (url->len == 0) {
161
- cmark_chunk result = CMARK_CHUNK_EMPTY;
162
- return result;
163
- }
164
-
165
- if (is_email)
166
- cmark_strbuf_puts(&buf, "mailto:");
167
-
168
- houdini_unescape_html_f(&buf, url->data, url->len);
169
- return cmark_chunk_buf_detach(&buf);
170
- }
171
-
172
- static CMARK_INLINE cmark_node *make_autolink(subject *subj,
173
- int start_column, int end_column,
174
- cmark_chunk url, int is_email) {
175
- cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
176
- link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
177
- link->as.link.title = cmark_chunk_literal("");
178
- link->start_line = link->end_line = subj->line;
179
- link->start_column = start_column + 1;
180
- link->end_column = end_column + 1;
181
- append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
182
- return link;
183
- }
184
-
185
- static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
186
- cmark_chunk *chunk, cmark_map *refmap) {
187
- int i;
188
- e->mem = mem;
189
- e->input = *chunk;
190
- e->flags = 0;
191
- e->line = line_number;
192
- e->pos = 0;
193
- e->block_offset = block_offset;
194
- e->column_offset = 0;
195
- e->refmap = refmap;
196
- e->last_delim = NULL;
197
- e->last_bracket = NULL;
198
- for (i = 0; i <= MAXBACKTICKS; i++) {
199
- e->backticks[i] = 0;
200
- }
201
- e->scanned_for_backticks = false;
202
- e->no_link_openers = true;
203
- }
204
-
205
- static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
206
-
207
- static CMARK_INLINE unsigned char peek_char_n(subject *subj, bufsize_t n) {
208
- // NULL bytes should have been stripped out by now. If they're
209
- // present, it's a programming error:
210
- assert(!(subj->pos + n < subj->input.len && subj->input.data[subj->pos + n] == 0));
211
- return (subj->pos + n < subj->input.len) ? subj->input.data[subj->pos + n] : 0;
212
- }
213
-
214
- static CMARK_INLINE unsigned char peek_char(subject *subj) {
215
- return peek_char_n(subj, 0);
216
- }
217
-
218
- static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
219
- return subj->input.data[pos];
220
- }
221
-
222
- // Return true if there are more characters in the subject.
223
- static CMARK_INLINE int is_eof(subject *subj) {
224
- return (subj->pos >= subj->input.len);
225
- }
226
-
227
- // Advance the subject. Doesn't check for eof.
228
- #define advance(subj) (subj)->pos += 1
229
-
230
- static CMARK_INLINE bool skip_spaces(subject *subj) {
231
- bool skipped = false;
232
- while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
233
- advance(subj);
234
- skipped = true;
235
- }
236
- return skipped;
237
- }
238
-
239
- static CMARK_INLINE bool skip_line_end(subject *subj) {
240
- bool seen_line_end_char = false;
241
- if (peek_char(subj) == '\r') {
242
- advance(subj);
243
- seen_line_end_char = true;
244
- }
245
- if (peek_char(subj) == '\n') {
246
- advance(subj);
247
- seen_line_end_char = true;
248
- }
249
- return seen_line_end_char || is_eof(subj);
250
- }
251
-
252
- // Take characters while a predicate holds, and return a string.
253
- static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
254
- unsigned char c;
255
- bufsize_t startpos = subj->pos;
256
- bufsize_t len = 0;
257
-
258
- while ((c = peek_char(subj)) && (*f)(c)) {
259
- advance(subj);
260
- len++;
261
- }
262
-
263
- return cmark_chunk_dup(&subj->input, startpos, len);
264
- }
265
-
266
- // Return the number of newlines in a given span of text in a subject. If
267
- // the number is greater than zero, also return the number of characters
268
- // between the last newline and the end of the span in `since_newline`.
269
- static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
270
- int nls = 0;
271
- int since_nl = 0;
272
-
273
- while (len--) {
274
- if (subj->input.data[from++] == '\n') {
275
- ++nls;
276
- since_nl = 0;
277
- } else {
278
- ++since_nl;
279
- }
280
- }
281
-
282
- if (!nls)
283
- return 0;
284
-
285
- *since_newline = since_nl;
286
- return nls;
287
- }
288
-
289
- // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
290
- // `column_offset` according to the number of newlines in a just-matched span
291
- // of text in `subj`.
292
- static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
293
- if (!(options & CMARK_OPT_SOURCEPOS)) {
294
- return;
295
- }
296
-
297
- int since_newline;
298
- int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
299
- if (newlines) {
300
- subj->line += newlines;
301
- node->end_line += newlines;
302
- node->end_column = since_newline;
303
- subj->column_offset = -subj->pos + since_newline + extra;
304
- }
305
- }
306
-
307
- // Try to process a backtick code span that began with a
308
- // span of ticks of length openticklength length (already
309
- // parsed). Return 0 if you don't find matching closing
310
- // backticks, otherwise return the position in the subject
311
- // after the closing backticks.
312
- static bufsize_t scan_to_closing_backticks(subject *subj,
313
- bufsize_t openticklength) {
314
-
315
- bool found = false;
316
- if (openticklength > MAXBACKTICKS) {
317
- // we limit backtick string length because of the array subj->backticks:
318
- return 0;
319
- }
320
- if (subj->scanned_for_backticks &&
321
- subj->backticks[openticklength] <= subj->pos) {
322
- // return if we already know there's no closer
323
- return 0;
324
- }
325
- while (!found) {
326
- // read non backticks
327
- unsigned char c;
328
- while ((c = peek_char(subj)) && c != '`') {
329
- advance(subj);
330
- }
331
- if (is_eof(subj)) {
332
- break;
333
- }
334
- bufsize_t numticks = 0;
335
- while (peek_char(subj) == '`') {
336
- advance(subj);
337
- numticks++;
338
- }
339
- // store position of ender
340
- if (numticks <= MAXBACKTICKS) {
341
- subj->backticks[numticks] = subj->pos - numticks;
342
- }
343
- if (numticks == openticklength) {
344
- return (subj->pos);
345
- }
346
- }
347
- // got through whole input without finding closer
348
- subj->scanned_for_backticks = true;
349
- return 0;
350
- }
351
-
352
- // Destructively modify string, converting newlines to
353
- // spaces, then removing a single leading + trailing space,
354
- // unless the code span consists entirely of space characters.
355
- static void S_normalize_code(cmark_strbuf *s) {
356
- bufsize_t r, w;
357
- bool contains_nonspace = false;
358
-
359
- for (r = 0, w = 0; r < s->size; ++r) {
360
- switch (s->ptr[r]) {
361
- case '\r':
362
- if (s->ptr[r + 1] != '\n') {
363
- s->ptr[w++] = ' ';
364
- }
365
- break;
366
- case '\n':
367
- s->ptr[w++] = ' ';
368
- break;
369
- default:
370
- s->ptr[w++] = s->ptr[r];
371
- }
372
- if (s->ptr[r] != ' ') {
373
- contains_nonspace = true;
374
- }
375
- }
376
-
377
- // begins and ends with space?
378
- if (contains_nonspace &&
379
- s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
380
- cmark_strbuf_drop(s, 1);
381
- cmark_strbuf_truncate(s, w - 2);
382
- } else {
383
- cmark_strbuf_truncate(s, w);
384
- }
385
-
386
- }
387
-
388
-
389
- // Parse backtick code section or raw backticks, return an inline.
390
- // Assumes that the subject has a backtick at the current position.
391
- static cmark_node *handle_backticks(subject *subj, int options) {
392
- cmark_chunk openticks = take_while(subj, isbacktick);
393
- bufsize_t startpos = subj->pos;
394
- bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
395
-
396
- if (endpos == 0) { // not found
397
- subj->pos = startpos; // rewind
398
- return make_str(subj, subj->pos, subj->pos, openticks);
399
- } else {
400
- cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
401
-
402
- cmark_strbuf_set(&buf, subj->input.data + startpos,
403
- endpos - startpos - openticks.len);
404
- S_normalize_code(&buf);
405
-
406
- cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
407
- adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
408
- return node;
409
- }
410
- }
411
-
412
-
413
- // Scan ***, **, or * and return number scanned, or 0.
414
- // Advances position.
415
- static int scan_delims(subject *subj, unsigned char c, bool *can_open,
416
- bool *can_close) {
417
- int numdelims = 0;
418
- bufsize_t before_char_pos, after_char_pos;
419
- int32_t after_char = 0;
420
- int32_t before_char = 0;
421
- int len;
422
- bool left_flanking, right_flanking;
423
-
424
- if (subj->pos == 0) {
425
- before_char = 10;
426
- } else {
427
- before_char_pos = subj->pos - 1;
428
- // walk back to the beginning of the UTF_8 sequence:
429
- while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
430
- before_char_pos -= 1;
431
- }
432
- len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
433
- subj->pos - before_char_pos, &before_char);
434
- if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
435
- before_char = 10;
436
- }
437
- }
438
-
439
- if (c == '\'' || c == '"') {
440
- numdelims++;
441
- advance(subj); // limit to 1 delim for quotes
442
- } else {
443
- while (peek_char(subj) == c) {
444
- numdelims++;
445
- advance(subj);
446
- }
447
- }
448
-
449
- if (subj->pos == subj->input.len) {
450
- after_char = 10;
451
- } else {
452
- after_char_pos = subj->pos;
453
- while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
454
- after_char_pos += 1;
455
- }
456
- len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
457
- subj->input.len - after_char_pos, &after_char);
458
- if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
459
- after_char = 10;
460
- }
461
- }
462
-
463
- left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
464
- (!cmark_utf8proc_is_punctuation(after_char) ||
465
- cmark_utf8proc_is_space(before_char) ||
466
- cmark_utf8proc_is_punctuation(before_char));
467
- right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
468
- (!cmark_utf8proc_is_punctuation(before_char) ||
469
- cmark_utf8proc_is_space(after_char) ||
470
- cmark_utf8proc_is_punctuation(after_char));
471
- if (c == '_') {
472
- *can_open = left_flanking &&
473
- (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
474
- *can_close = right_flanking &&
475
- (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
476
- } else if (c == '\'' || c == '"') {
477
- *can_open = left_flanking && !right_flanking &&
478
- before_char != ']' && before_char != ')';
479
- *can_close = right_flanking;
480
- } else {
481
- *can_open = left_flanking;
482
- *can_close = right_flanking;
483
- }
484
- return numdelims;
485
- }
486
-
487
- /*
488
- static void print_delimiters(subject *subj)
489
- {
490
- delimiter *delim;
491
- delim = subj->last_delim;
492
- while (delim != NULL) {
493
- printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
494
- (void*)delim, delim->delim_char,
495
- delim->can_open, delim->can_close,
496
- (void*)delim->next, (void*)delim->previous);
497
- delim = delim->previous;
498
- }
499
- }
500
- */
501
-
502
- static void remove_delimiter(subject *subj, delimiter *delim) {
503
- if (delim == NULL)
504
- return;
505
- if (delim->next == NULL) {
506
- // end of list:
507
- assert(delim == subj->last_delim);
508
- subj->last_delim = delim->previous;
509
- } else {
510
- delim->next->previous = delim->previous;
511
- }
512
- if (delim->previous != NULL) {
513
- delim->previous->next = delim->next;
514
- }
515
- subj->mem->free(delim);
516
- }
517
-
518
- static void pop_bracket(subject *subj) {
519
- bracket *b;
520
- if (subj->last_bracket == NULL)
521
- return;
522
- b = subj->last_bracket;
523
- subj->last_bracket = subj->last_bracket->previous;
524
- subj->mem->free(b);
525
- }
526
-
527
- static void push_delimiter(subject *subj, unsigned char c, bool can_open,
528
- bool can_close, cmark_node *inl_text) {
529
- delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
530
- delim->delim_char = c;
531
- delim->can_open = can_open;
532
- delim->can_close = can_close;
533
- delim->inl_text = inl_text;
534
- delim->position = subj->pos;
535
- delim->length = inl_text->as.literal.len;
536
- delim->previous = subj->last_delim;
537
- delim->next = NULL;
538
- if (delim->previous != NULL) {
539
- delim->previous->next = delim;
540
- }
541
- subj->last_delim = delim;
542
- }
543
-
544
- static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
545
- bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
546
- if (subj->last_bracket != NULL) {
547
- subj->last_bracket->bracket_after = true;
548
- b->in_bracket_image0 = subj->last_bracket->in_bracket_image0;
549
- b->in_bracket_image1 = subj->last_bracket->in_bracket_image1;
550
- }
551
- b->image = image;
552
- b->active = true;
553
- b->inl_text = inl_text;
554
- b->previous = subj->last_bracket;
555
- b->position = subj->pos;
556
- b->bracket_after = false;
557
- if (image) {
558
- b->in_bracket_image1 = true;
559
- } else {
560
- b->in_bracket_image0 = true;
561
- }
562
- subj->last_bracket = b;
563
- if (!image) {
564
- subj->no_link_openers = false;
565
- }
566
- }
567
-
568
- // Assumes the subject has a c at the current position.
569
- static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
570
- bufsize_t numdelims;
571
- cmark_node *inl_text;
572
- bool can_open, can_close;
573
- cmark_chunk contents;
574
-
575
- numdelims = scan_delims(subj, c, &can_open, &can_close);
576
-
577
- if (c == '\'' && smart) {
578
- contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
579
- } else if (c == '"' && smart) {
580
- contents =
581
- cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
582
- } else {
583
- contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
584
- }
585
-
586
- inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
587
-
588
- if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
589
- push_delimiter(subj, c, can_open, can_close, inl_text);
590
- }
591
-
592
- return inl_text;
593
- }
594
-
595
- // Assumes we have a hyphen at the current position.
596
- static cmark_node *handle_hyphen(subject *subj, bool smart) {
597
- int startpos = subj->pos;
598
-
599
- advance(subj);
600
-
601
- if (!smart || peek_char(subj) != '-') {
602
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
603
- }
604
-
605
- while (smart && peek_char(subj) == '-') {
606
- advance(subj);
607
- }
608
-
609
- int numhyphens = subj->pos - startpos;
610
- int en_count = 0;
611
- int em_count = 0;
612
- int i;
613
- cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
614
-
615
- if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
616
- em_count = numhyphens / 3;
617
- } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
618
- en_count = numhyphens / 2;
619
- } else if (numhyphens % 3 == 2) { // use one en dash at end
620
- en_count = 1;
621
- em_count = (numhyphens - 2) / 3;
622
- } else { // use two en dashes at the end
623
- en_count = 2;
624
- em_count = (numhyphens - 4) / 3;
625
- }
626
-
627
- for (i = em_count; i > 0; i--) {
628
- cmark_strbuf_puts(&buf, EMDASH);
629
- }
630
-
631
- for (i = en_count; i > 0; i--) {
632
- cmark_strbuf_puts(&buf, ENDASH);
633
- }
634
-
635
- return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
636
- }
637
-
638
- // Assumes we have a period at the current position.
639
- static cmark_node *handle_period(subject *subj, bool smart) {
640
- advance(subj);
641
- if (smart && peek_char(subj) == '.') {
642
- advance(subj);
643
- if (peek_char(subj) == '.') {
644
- advance(subj);
645
- return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
646
- } else {
647
- return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
648
- }
649
- } else {
650
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
651
- }
652
- }
653
-
654
- static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) {
655
- cmark_llist *tmp_ext;
656
-
657
- for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) {
658
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data;
659
- cmark_llist *tmp_char;
660
- for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
661
- unsigned char tmp_c = (unsigned char)(size_t)tmp_char->data;
662
-
663
- if (tmp_c == c) {
664
- return ext;
665
- }
666
- }
667
- }
668
-
669
- return NULL;
670
- }
671
-
672
- static void process_emphasis(cmark_parser *parser, subject *subj, bufsize_t stack_bottom) {
673
- delimiter *candidate;
674
- delimiter *closer = NULL;
675
- delimiter *opener;
676
- delimiter *old_closer;
677
- bool opener_found;
678
- bufsize_t openers_bottom[3][128];
679
- int i;
680
-
681
- // initialize openers_bottom:
682
- memset(&openers_bottom, 0, sizeof(openers_bottom));
683
- for (i=0; i < 3; i++) {
684
- openers_bottom[i]['*'] = stack_bottom;
685
- openers_bottom[i]['_'] = stack_bottom;
686
- openers_bottom[i]['\''] = stack_bottom;
687
- openers_bottom[i]['"'] = stack_bottom;
688
- }
689
-
690
- // move back to first relevant delim.
691
- candidate = subj->last_delim;
692
- while (candidate != NULL && candidate->position >= stack_bottom) {
693
- closer = candidate;
694
- candidate = candidate->previous;
695
- }
696
-
697
- // now move forward, looking for closers, and handling each
698
- while (closer != NULL) {
699
- cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char);
700
- if (closer->can_close) {
701
- // Now look backwards for first matching opener:
702
- opener = closer->previous;
703
- opener_found = false;
704
- while (opener != NULL && opener->position >= stack_bottom &&
705
- opener->position >= openers_bottom[closer->length % 3][closer->delim_char]) {
706
- if (opener->can_open && opener->delim_char == closer->delim_char) {
707
- // interior closer of size 2 can't match opener of size 1
708
- // or of size 1 can't match 2
709
- if (!(closer->can_open || opener->can_close) ||
710
- closer->length % 3 == 0 ||
711
- (opener->length + closer->length) % 3 != 0) {
712
- opener_found = true;
713
- break;
714
- }
715
- }
716
- opener = opener->previous;
717
- }
718
- old_closer = closer;
719
-
720
- if (extension) {
721
- if (opener_found)
722
- closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer);
723
- else
724
- closer = closer->next;
725
- } else if (closer->delim_char == '*' || closer->delim_char == '_') {
726
- if (opener_found) {
727
- closer = S_insert_emph(subj, opener, closer);
728
- } else {
729
- closer = closer->next;
730
- }
731
- } else if (closer->delim_char == '\'' || closer->delim_char == '"') {
732
- cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
733
- if (closer->delim_char == '\'') {
734
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
735
- } else {
736
- closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
737
- }
738
- closer = closer->next;
739
- if (opener_found) {
740
- cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
741
- if (old_closer->delim_char == '\'') {
742
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
743
- } else {
744
- opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
745
- }
746
- remove_delimiter(subj, opener);
747
- remove_delimiter(subj, old_closer);
748
- }
749
- }
750
- if (!opener_found) {
751
- // set lower bound for future searches for openers
752
- openers_bottom[old_closer->length % 3][old_closer->delim_char] =
753
- old_closer->position;
754
- if (!old_closer->can_open) {
755
- // we can remove a closer that can't be an
756
- // opener, once we've seen there's no
757
- // matching opener:
758
- remove_delimiter(subj, old_closer);
759
- }
760
- }
761
- } else {
762
- closer = closer->next;
763
- }
764
- }
765
- // free all delimiters in list until stack_bottom:
766
- while (subj->last_delim != NULL &&
767
- subj->last_delim->position >= stack_bottom) {
768
- remove_delimiter(subj, subj->last_delim);
769
- }
770
- }
771
-
772
- static delimiter *S_insert_emph(subject *subj, delimiter *opener,
773
- delimiter *closer) {
774
- delimiter *delim, *tmp_delim;
775
- bufsize_t use_delims;
776
- cmark_node *opener_inl = opener->inl_text;
777
- cmark_node *closer_inl = closer->inl_text;
778
- bufsize_t opener_num_chars = opener_inl->as.literal.len;
779
- bufsize_t closer_num_chars = closer_inl->as.literal.len;
780
- cmark_node *tmp, *tmpnext, *emph;
781
-
782
- // calculate the actual number of characters used from this closer
783
- use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
784
-
785
- // remove used characters from associated inlines.
786
- opener_num_chars -= use_delims;
787
- closer_num_chars -= use_delims;
788
- opener_inl->as.literal.len = opener_num_chars;
789
- closer_inl->as.literal.len = closer_num_chars;
790
-
791
- // free delimiters between opener and closer
792
- delim = closer->previous;
793
- while (delim != NULL && delim != opener) {
794
- tmp_delim = delim->previous;
795
- remove_delimiter(subj, delim);
796
- delim = tmp_delim;
797
- }
798
-
799
- // create new emph or strong, and splice it in to our inlines
800
- // between the opener and closer
801
- emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
802
-
803
- tmp = opener_inl->next;
804
- while (tmp && tmp != closer_inl) {
805
- tmpnext = tmp->next;
806
- cmark_node_unlink(tmp);
807
- append_child(emph, tmp);
808
- tmp = tmpnext;
809
- }
810
- cmark_node_insert_after(opener_inl, emph);
811
-
812
- emph->start_line = opener_inl->start_line;
813
- emph->end_line = closer_inl->end_line;
814
- emph->start_column = opener_inl->start_column;
815
- emph->end_column = closer_inl->end_column;
816
-
817
- // if opener has 0 characters, remove it and its associated inline
818
- if (opener_num_chars == 0) {
819
- cmark_node_free(opener_inl);
820
- remove_delimiter(subj, opener);
821
- }
822
-
823
- // if closer has 0 characters, remove it and its associated inline
824
- if (closer_num_chars == 0) {
825
- // remove empty closer inline
826
- cmark_node_free(closer_inl);
827
- // remove closer from list
828
- tmp_delim = closer->next;
829
- remove_delimiter(subj, closer);
830
- closer = tmp_delim;
831
- }
832
-
833
- return closer;
834
- }
835
-
836
- // Parse backslash-escape or just a backslash, returning an inline.
837
- static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) {
838
- advance(subj);
839
- unsigned char nextchar = peek_char(subj);
840
- if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) {
841
- // only ascii symbols and newline can be escaped
842
- advance(subj);
843
- return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
844
- } else if (!is_eof(subj) && skip_line_end(subj)) {
845
- return make_linebreak(subj->mem);
846
- } else {
847
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
848
- }
849
- }
850
-
851
- // Parse an entity or a regular "&" string.
852
- // Assumes the subject has an '&' character at the current position.
853
- static cmark_node *handle_entity(subject *subj) {
854
- cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
855
- bufsize_t len;
856
-
857
- advance(subj);
858
-
859
- len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
860
- subj->input.len - subj->pos);
861
-
862
- if (len == 0)
863
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
864
-
865
- subj->pos += len;
866
- return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
867
- }
868
-
869
- // Clean a URL: remove surrounding whitespace, and remove \ that escape
870
- // punctuation.
871
- cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
872
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
873
-
874
- cmark_chunk_trim(url);
875
-
876
- if (url->len == 0) {
877
- cmark_chunk result = CMARK_CHUNK_EMPTY;
878
- return result;
879
- }
880
-
881
- houdini_unescape_html_f(&buf, url->data, url->len);
882
-
883
- cmark_strbuf_unescape(&buf);
884
- return cmark_chunk_buf_detach(&buf);
885
- }
886
-
887
- cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
888
- cmark_strbuf buf = CMARK_BUF_INIT(mem);
889
- unsigned char first, last;
890
-
891
- if (title->len == 0) {
892
- cmark_chunk result = CMARK_CHUNK_EMPTY;
893
- return result;
894
- }
895
-
896
- first = title->data[0];
897
- last = title->data[title->len - 1];
898
-
899
- // remove surrounding quotes if any:
900
- if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
901
- (first == '"' && last == '"')) {
902
- houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
903
- } else {
904
- houdini_unescape_html_f(&buf, title->data, title->len);
905
- }
906
-
907
- cmark_strbuf_unescape(&buf);
908
- return cmark_chunk_buf_detach(&buf);
909
- }
910
-
911
- // Parse an autolink or HTML tag.
912
- // Assumes the subject has a '<' character at the current position.
913
- static cmark_node *handle_pointy_brace(subject *subj, int options) {
914
- bufsize_t matchlen = 0;
915
- cmark_chunk contents;
916
-
917
- advance(subj); // advance past first <
918
-
919
- // first try to match a URL autolink
920
- matchlen = scan_autolink_uri(&subj->input, subj->pos);
921
- if (matchlen > 0) {
922
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
923
- subj->pos += matchlen;
924
-
925
- return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
926
- }
927
-
928
- // next try to match an email autolink
929
- matchlen = scan_autolink_email(&subj->input, subj->pos);
930
- if (matchlen > 0) {
931
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
932
- subj->pos += matchlen;
933
-
934
- return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
935
- }
936
-
937
- // finally, try to match an html tag
938
- if (subj->pos + 2 <= subj->input.len) {
939
- int c = subj->input.data[subj->pos];
940
- if (c == '!' && (subj->flags & FLAG_SKIP_HTML_COMMENT) == 0) {
941
- c = subj->input.data[subj->pos+1];
942
- if (c == '-' && subj->input.data[subj->pos+2] == '-') {
943
- if (subj->input.data[subj->pos+3] == '>') {
944
- matchlen = 4;
945
- } else if (subj->input.data[subj->pos+3] == '-' &&
946
- subj->input.data[subj->pos+4] == '>') {
947
- matchlen = 5;
948
- } else {
949
- matchlen = scan_html_comment(&subj->input, subj->pos + 1);
950
- if (matchlen > 0) {
951
- matchlen += 1; // prefix "<"
952
- } else { // no match through end of input: set a flag so
953
- // we don't reparse looking for -->:
954
- subj->flags |= FLAG_SKIP_HTML_COMMENT;
955
- }
956
- }
957
- } else if (c == '[') {
958
- if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
959
- matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
960
- if (matchlen > 0) {
961
- // The regex doesn't require the final "]]>". But if we're not at
962
- // the end of input, it must come after the match. Otherwise,
963
- // disable subsequent scans to avoid quadratic behavior.
964
- matchlen += 5; // prefix "![", suffix "]]>"
965
- if (subj->pos + matchlen > subj->input.len) {
966
- subj->flags |= FLAG_SKIP_HTML_CDATA;
967
- matchlen = 0;
968
- }
969
- }
970
- }
971
- } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
972
- matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
973
- if (matchlen > 0) {
974
- matchlen += 2; // prefix "!", suffix ">"
975
- if (subj->pos + matchlen > subj->input.len) {
976
- subj->flags |= FLAG_SKIP_HTML_DECLARATION;
977
- matchlen = 0;
978
- }
979
- }
980
- }
981
- } else if (c == '?') {
982
- if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
983
- // Note that we allow an empty match.
984
- matchlen = scan_html_pi(&subj->input, subj->pos + 1);
985
- matchlen += 3; // prefix "?", suffix "?>"
986
- if (subj->pos + matchlen > subj->input.len) {
987
- subj->flags |= FLAG_SKIP_HTML_PI;
988
- matchlen = 0;
989
- }
990
- }
991
- } else {
992
- matchlen = scan_html_tag(&subj->input, subj->pos);
993
- }
994
- }
995
- if (matchlen > 0) {
996
- contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
997
- subj->pos += matchlen;
998
- cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
999
- adjust_subj_node_newlines(subj, node, matchlen, 1, options);
1000
- return node;
1001
- }
1002
-
1003
- if (options & CMARK_OPT_LIBERAL_HTML_TAG) {
1004
- matchlen = scan_liberal_html_tag(&subj->input, subj->pos);
1005
- if (matchlen > 0) {
1006
- contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
1007
- subj->pos += matchlen;
1008
- cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
1009
- adjust_subj_node_newlines(subj, node, matchlen, 1, options);
1010
- return node;
1011
- }
1012
- }
1013
-
1014
- // if nothing matches, just return the opening <:
1015
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
1016
- }
1017
-
1018
- // Parse a link label. Returns 1 if successful.
1019
- // Note: unescaped brackets are not allowed in labels.
1020
- // The label begins with `[` and ends with the first `]` character
1021
- // encountered. Backticks in labels do not start code spans.
1022
- static int link_label(subject *subj, cmark_chunk *raw_label) {
1023
- bufsize_t startpos = subj->pos;
1024
- int length = 0;
1025
- unsigned char c;
1026
-
1027
- // advance past [
1028
- if (peek_char(subj) == '[') {
1029
- advance(subj);
1030
- } else {
1031
- return 0;
1032
- }
1033
-
1034
- while ((c = peek_char(subj)) && c != '[' && c != ']') {
1035
- if (c == '\\') {
1036
- advance(subj);
1037
- length++;
1038
- if (cmark_ispunct(peek_char(subj))) {
1039
- advance(subj);
1040
- length++;
1041
- }
1042
- } else {
1043
- advance(subj);
1044
- length++;
1045
- }
1046
- if (length > MAX_LINK_LABEL_LENGTH) {
1047
- goto noMatch;
1048
- }
1049
- }
1050
-
1051
- if (c == ']') { // match found
1052
- *raw_label =
1053
- cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
1054
- cmark_chunk_trim(raw_label);
1055
- advance(subj); // advance past ]
1056
- return 1;
1057
- }
1058
-
1059
- noMatch:
1060
- subj->pos = startpos; // rewind
1061
- return 0;
1062
- }
1063
-
1064
- static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
1065
- cmark_chunk *output) {
1066
- bufsize_t i = offset;
1067
- size_t nb_p = 0;
1068
-
1069
- while (i < input->len) {
1070
- if (input->data[i] == '\\' &&
1071
- i + 1 < input-> len &&
1072
- cmark_ispunct(input->data[i+1]))
1073
- i += 2;
1074
- else if (input->data[i] == '(') {
1075
- ++nb_p;
1076
- ++i;
1077
- if (nb_p > 32)
1078
- return -1;
1079
- } else if (input->data[i] == ')') {
1080
- if (nb_p == 0)
1081
- break;
1082
- --nb_p;
1083
- ++i;
1084
- } else if (cmark_isspace(input->data[i])) {
1085
- if (i == offset) {
1086
- return -1;
1087
- }
1088
- break;
1089
- } else {
1090
- ++i;
1091
- }
1092
- }
1093
-
1094
- if (i >= input->len)
1095
- return -1;
1096
-
1097
- {
1098
- cmark_chunk result = {input->data + offset, i - offset, 0};
1099
- *output = result;
1100
- }
1101
- return i - offset;
1102
- }
1103
-
1104
- static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
1105
- cmark_chunk *output) {
1106
- bufsize_t i = offset;
1107
-
1108
- if (i < input->len && input->data[i] == '<') {
1109
- ++i;
1110
- while (i < input->len) {
1111
- if (input->data[i] == '>') {
1112
- ++i;
1113
- break;
1114
- } else if (input->data[i] == '\\')
1115
- i += 2;
1116
- else if (input->data[i] == '\n' || input->data[i] == '<')
1117
- return -1;
1118
- else
1119
- ++i;
1120
- }
1121
- } else {
1122
- return manual_scan_link_url_2(input, offset, output);
1123
- }
1124
-
1125
- if (i >= input->len)
1126
- return -1;
1127
-
1128
- {
1129
- cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
1130
- *output = result;
1131
- }
1132
- return i - offset;
1133
- }
1134
-
1135
- // Return a link, an image, or a literal close bracket.
1136
- static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
1137
- bufsize_t initial_pos, after_link_text_pos;
1138
- bufsize_t endurl, starttitle, endtitle, endall;
1139
- bufsize_t sps, n;
1140
- cmark_reference *ref = NULL;
1141
- cmark_chunk url_chunk, title_chunk;
1142
- cmark_chunk url, title;
1143
- bracket *opener;
1144
- cmark_node *inl;
1145
- cmark_chunk raw_label;
1146
- int found_label;
1147
- cmark_node *tmp, *tmpnext;
1148
- bool is_image;
1149
-
1150
- advance(subj); // advance past ]
1151
- initial_pos = subj->pos;
1152
-
1153
- // get last [ or ![
1154
- opener = subj->last_bracket;
1155
-
1156
- if (opener == NULL) {
1157
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1158
- }
1159
-
1160
- // If we got here, we matched a potential link/image text.
1161
- // Now we check to see if it's a link/image.
1162
- is_image = opener->image;
1163
-
1164
- if (!is_image && subj->no_link_openers) {
1165
- // take delimiter off stack
1166
- pop_bracket(subj);
1167
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1168
- }
1169
-
1170
- after_link_text_pos = subj->pos;
1171
-
1172
- // First, look for an inline link.
1173
- if (peek_char(subj) == '(' &&
1174
- ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
1175
- ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
1176
- &url_chunk)) > -1)) {
1177
-
1178
- // try to parse an explicit link:
1179
- endurl = subj->pos + 1 + sps + n;
1180
- starttitle = endurl + scan_spacechars(&subj->input, endurl);
1181
-
1182
- // ensure there are spaces btw url and title
1183
- endtitle = (starttitle == endurl)
1184
- ? starttitle
1185
- : starttitle + scan_link_title(&subj->input, starttitle);
1186
-
1187
- endall = endtitle + scan_spacechars(&subj->input, endtitle);
1188
-
1189
- if (peek_at(subj, endall) == ')') {
1190
- subj->pos = endall + 1;
1191
-
1192
- title_chunk =
1193
- cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
1194
- url = cmark_clean_url(subj->mem, &url_chunk);
1195
- title = cmark_clean_title(subj->mem, &title_chunk);
1196
- cmark_chunk_free(subj->mem, &url_chunk);
1197
- cmark_chunk_free(subj->mem, &title_chunk);
1198
- goto match;
1199
-
1200
- } else {
1201
- // it could still be a shortcut reference link
1202
- subj->pos = after_link_text_pos;
1203
- }
1204
- }
1205
-
1206
- // Next, look for a following [link label] that matches in refmap.
1207
- // skip spaces
1208
- raw_label = cmark_chunk_literal("");
1209
- found_label = link_label(subj, &raw_label);
1210
- if (!found_label) {
1211
- // If we have a shortcut reference link, back up
1212
- // to before the spacse we skipped.
1213
- subj->pos = initial_pos;
1214
- }
1215
-
1216
- if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
1217
- cmark_chunk_free(subj->mem, &raw_label);
1218
- raw_label = cmark_chunk_dup(&subj->input, opener->position,
1219
- initial_pos - opener->position - 1);
1220
- found_label = true;
1221
- }
1222
-
1223
- if (found_label) {
1224
- ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label);
1225
- cmark_chunk_free(subj->mem, &raw_label);
1226
- }
1227
-
1228
- if (ref != NULL) { // found
1229
- url = chunk_clone(subj->mem, &ref->url);
1230
- title = chunk_clone(subj->mem, &ref->title);
1231
- goto match;
1232
- } else {
1233
- goto noMatch;
1234
- }
1235
-
1236
- noMatch:
1237
- // If we fall through to here, it means we didn't match a link.
1238
- // What if we're a footnote link?
1239
- if (parser->options & CMARK_OPT_FOOTNOTES &&
1240
- opener->inl_text->next &&
1241
- opener->inl_text->next->type == CMARK_NODE_TEXT) {
1242
-
1243
- cmark_chunk *literal = &opener->inl_text->next->as.literal;
1244
-
1245
- // look back to the opening '[', and skip ahead to the next character
1246
- // if we're looking at a '[^' sequence, and there is other text or nodes
1247
- // after the ^, let's call it a footnote reference.
1248
- if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1249
-
1250
- // Before we got this far, the `handle_close_bracket` function may have
1251
- // advanced the current state beyond our footnote's actual closing
1252
- // bracket, ie if it went looking for a `link_label`.
1253
- // Let's just rewind the subject's position:
1254
- subj->pos = initial_pos;
1255
-
1256
- cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1257
-
1258
- // the start and end of the footnote ref is the opening and closing brace
1259
- // i.e. the subject's current position, and the opener's start_column
1260
- int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1261
- int fnref_start_column = opener->inl_text->start_column;
1262
-
1263
- // any given node delineates a substring of the line being processed,
1264
- // with the remainder of the line being pointed to thru its 'literal'
1265
- // struct member.
1266
- // here, we copy the literal's pointer, moving it past the '^' character
1267
- // for a length equal to the size of footnote reference text.
1268
- // i.e. end_col minus start_col, minus the [ and the ^ characters
1269
- //
1270
- // this copies the footnote reference string, even if between the
1271
- // `opener` and the subject's current position there are other nodes
1272
- //
1273
- // (first, check for underflows)
1274
- if ((fnref_start_column + 2) <= fnref_end_column) {
1275
- fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1276
- } else {
1277
- fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1278
- }
1279
-
1280
- fnref->start_line = fnref->end_line = subj->line;
1281
- fnref->start_column = fnref_start_column;
1282
- fnref->end_column = fnref_end_column;
1283
-
1284
- // we then replace the opener with this new fnref node, the net effect
1285
- // being replacing the opening '[' text node with a `^footnote-ref]` node.
1286
- cmark_node_insert_before(opener->inl_text, fnref);
1287
-
1288
- process_emphasis(parser, subj, opener->position);
1289
- // sometimes, the footnote reference text gets parsed into multiple nodes
1290
- // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1291
- // this happens for ex with the autolink extension. when the autolinker
1292
- // finds the 'w' character, it will split the text into multiple nodes
1293
- // in hopes of being able to match a 'www.' substring.
1294
- //
1295
- // because this function is called one character at a time via the
1296
- // `parse_inlines` function, and the current subj->pos is pointing at the
1297
- // closing ] brace, and because we copy all the text between the [ ]
1298
- // braces, we should be able to safely ignore and delete any nodes after
1299
- // the opener->inl_text->next.
1300
- //
1301
- // therefore, here we walk thru the list and free them all up
1302
- cmark_node *next_node;
1303
- cmark_node *current_node = opener->inl_text->next;
1304
- while(current_node) {
1305
- next_node = current_node->next;
1306
- cmark_node_free(current_node);
1307
- current_node = next_node;
1308
- }
1309
-
1310
- cmark_node_free(opener->inl_text);
1311
-
1312
- pop_bracket(subj);
1313
- return NULL;
1314
- }
1315
- }
1316
-
1317
- pop_bracket(subj); // remove this opener from delimiter list
1318
- subj->pos = initial_pos;
1319
- return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1320
-
1321
- match:
1322
- inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1323
- inl->as.link.url = url;
1324
- inl->as.link.title = title;
1325
- inl->start_line = inl->end_line = subj->line;
1326
- inl->start_column = opener->inl_text->start_column;
1327
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1328
- cmark_node_insert_before(opener->inl_text, inl);
1329
- // Add link text:
1330
- tmp = opener->inl_text->next;
1331
- while (tmp) {
1332
- tmpnext = tmp->next;
1333
- cmark_node_unlink(tmp);
1334
- append_child(inl, tmp);
1335
- tmp = tmpnext;
1336
- }
1337
-
1338
- // Free the bracket [:
1339
- cmark_node_free(opener->inl_text);
1340
-
1341
- process_emphasis(parser, subj, opener->position);
1342
- pop_bracket(subj);
1343
-
1344
- // Now, if we have a link, we also want to deactivate links until
1345
- // we get a new opener. (This code can be removed if we decide to allow links
1346
- // inside links.)
1347
- if (!is_image) {
1348
- subj->no_link_openers = true;
1349
- }
1350
-
1351
- return NULL;
1352
- }
1353
-
1354
- // Parse a hard or soft linebreak, returning an inline.
1355
- // Assumes the subject has a cr or newline at the current position.
1356
- static cmark_node *handle_newline(subject *subj) {
1357
- bufsize_t nlpos = subj->pos;
1358
- // skip over cr, crlf, or lf:
1359
- if (peek_at(subj, subj->pos) == '\r') {
1360
- advance(subj);
1361
- }
1362
- if (peek_at(subj, subj->pos) == '\n') {
1363
- advance(subj);
1364
- }
1365
- ++subj->line;
1366
- subj->column_offset = -subj->pos;
1367
- // skip spaces at beginning of line
1368
- skip_spaces(subj);
1369
- if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
1370
- peek_at(subj, nlpos - 2) == ' ') {
1371
- return make_linebreak(subj->mem);
1372
- } else {
1373
- return make_softbreak(subj->mem);
1374
- }
1375
- }
1376
-
1377
- // "\r\n\\`&_*[]<!"
1378
- static int8_t SPECIAL_CHARS[256] = {
1379
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1380
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1381
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1382
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1383
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1384
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1385
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1386
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1387
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1388
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1389
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1390
-
1391
- // " ' . -
1392
- static char SMART_PUNCT_CHARS[] = {
1393
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1394
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1395
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1396
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1397
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1398
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1399
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1400
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1401
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1402
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1403
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1404
- };
1405
-
1406
- static bufsize_t subject_find_special_char(subject *subj, int options) {
1407
- bufsize_t n = subj->pos + 1;
1408
-
1409
- while (n < subj->input.len) {
1410
- if (SPECIAL_CHARS[subj->input.data[n]])
1411
- return n;
1412
- if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1413
- return n;
1414
- n++;
1415
- }
1416
-
1417
- return subj->input.len;
1418
- }
1419
-
1420
- void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
1421
- SPECIAL_CHARS[c] = 1;
1422
- if (emphasis)
1423
- SKIP_CHARS[c] = 1;
1424
- }
1425
-
1426
- void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
1427
- SPECIAL_CHARS[c] = 0;
1428
- if (emphasis)
1429
- SKIP_CHARS[c] = 0;
1430
- }
1431
-
1432
- static cmark_node *try_extensions(cmark_parser *parser,
1433
- cmark_node *parent,
1434
- unsigned char c,
1435
- subject *subj) {
1436
- cmark_node *res = NULL;
1437
- cmark_llist *tmp;
1438
-
1439
- for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) {
1440
- cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
1441
- res = ext->match_inline(ext, parser, parent, c, subj);
1442
-
1443
- if (res)
1444
- break;
1445
- }
1446
-
1447
- return res;
1448
- }
1449
-
1450
- // Parse an inline, advancing subject, and add it as a child of parent.
1451
- // Return 0 if no inline can be parsed, 1 otherwise.
1452
- static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) {
1453
- cmark_node *new_inl = NULL;
1454
- cmark_chunk contents;
1455
- unsigned char c;
1456
- bufsize_t startpos, endpos;
1457
- c = peek_char(subj);
1458
- if (c == 0) {
1459
- return 0;
1460
- }
1461
- switch (c) {
1462
- case '\r':
1463
- case '\n':
1464
- new_inl = handle_newline(subj);
1465
- break;
1466
- case '`':
1467
- new_inl = handle_backticks(subj, options);
1468
- break;
1469
- case '\\':
1470
- new_inl = handle_backslash(parser, subj);
1471
- break;
1472
- case '&':
1473
- new_inl = handle_entity(subj);
1474
- break;
1475
- case '<':
1476
- new_inl = handle_pointy_brace(subj, options);
1477
- break;
1478
- case '*':
1479
- case '_':
1480
- case '\'':
1481
- case '"':
1482
- new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1483
- break;
1484
- case '-':
1485
- new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
1486
- break;
1487
- case '.':
1488
- new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
1489
- break;
1490
- case '[':
1491
- advance(subj);
1492
- new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
1493
- push_bracket(subj, false, new_inl);
1494
- break;
1495
- case ']':
1496
- new_inl = handle_close_bracket(parser, subj);
1497
- break;
1498
- case '!':
1499
- advance(subj);
1500
- if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') {
1501
- advance(subj);
1502
- new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
1503
- push_bracket(subj, true, new_inl);
1504
- } else {
1505
- new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
1506
- }
1507
- break;
1508
- default:
1509
- new_inl = try_extensions(parser, parent, c, subj);
1510
- if (new_inl != NULL)
1511
- break;
1512
-
1513
- endpos = subject_find_special_char(subj, options);
1514
- contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1515
- startpos = subj->pos;
1516
- subj->pos = endpos;
1517
-
1518
- // if we're at a newline, strip trailing spaces.
1519
- if (S_is_line_end_char(peek_char(subj))) {
1520
- cmark_chunk_rtrim(&contents);
1521
- }
1522
-
1523
- new_inl = make_str(subj, startpos, endpos - 1, contents);
1524
- }
1525
- if (new_inl != NULL) {
1526
- append_child(parent, new_inl);
1527
- }
1528
-
1529
- return 1;
1530
- }
1531
-
1532
- // Parse inlines from parent's string_content, adding as children of parent.
1533
- void cmark_parse_inlines(cmark_parser *parser,
1534
- cmark_node *parent,
1535
- cmark_map *refmap,
1536
- int options) {
1537
- subject subj;
1538
- cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
1539
- subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
1540
- cmark_chunk_rtrim(&subj.input);
1541
-
1542
- while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
1543
- ;
1544
-
1545
- process_emphasis(parser, &subj, 0);
1546
- // free bracket and delim stack
1547
- while (subj.last_delim) {
1548
- remove_delimiter(&subj, subj.last_delim);
1549
- }
1550
- while (subj.last_bracket) {
1551
- pop_bracket(&subj);
1552
- }
1553
- }
1554
-
1555
- // Parse zero or more space characters, including at most one newline.
1556
- static void spnl(subject *subj) {
1557
- skip_spaces(subj);
1558
- if (skip_line_end(subj)) {
1559
- skip_spaces(subj);
1560
- }
1561
- }
1562
-
1563
- // Parse reference. Assumes string begins with '[' character.
1564
- // Modify refmap if a reference is encountered.
1565
- // Return 0 if no reference found, otherwise position of subject
1566
- // after reference is parsed.
1567
- bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1568
- cmark_map *refmap) {
1569
- subject subj;
1570
-
1571
- cmark_chunk lab;
1572
- cmark_chunk url;
1573
- cmark_chunk title;
1574
-
1575
- bufsize_t matchlen = 0;
1576
- bufsize_t beforetitle;
1577
-
1578
- subject_from_buf(mem, -1, 0, &subj, input, NULL);
1579
-
1580
- // parse label:
1581
- if (!link_label(&subj, &lab) || lab.len == 0)
1582
- return 0;
1583
-
1584
- // colon:
1585
- if (peek_char(&subj) == ':') {
1586
- advance(&subj);
1587
- } else {
1588
- return 0;
1589
- }
1590
-
1591
- // parse link url:
1592
- spnl(&subj);
1593
- if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1594
- subj.pos += matchlen;
1595
- } else {
1596
- return 0;
1597
- }
1598
-
1599
- // parse optional link_title
1600
- beforetitle = subj.pos;
1601
- spnl(&subj);
1602
- matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
1603
- if (matchlen) {
1604
- title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1605
- subj.pos += matchlen;
1606
- } else {
1607
- subj.pos = beforetitle;
1608
- title = cmark_chunk_literal("");
1609
- }
1610
-
1611
- // parse final spaces and newline:
1612
- skip_spaces(&subj);
1613
- if (!skip_line_end(&subj)) {
1614
- if (matchlen) { // try rewinding before title
1615
- subj.pos = beforetitle;
1616
- skip_spaces(&subj);
1617
- if (!skip_line_end(&subj)) {
1618
- return 0;
1619
- }
1620
- } else {
1621
- return 0;
1622
- }
1623
- }
1624
- // insert reference into refmap
1625
- cmark_reference_create(refmap, &lab, &url, &title);
1626
- return subj.pos;
1627
- }
1628
-
1629
- unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) {
1630
- return peek_char(parser);
1631
- }
1632
-
1633
- unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) {
1634
- return peek_at(parser, pos);
1635
- }
1636
-
1637
- int cmark_inline_parser_is_eof(cmark_inline_parser *parser) {
1638
- return is_eof(parser);
1639
- }
1640
-
1641
- static char *
1642
- my_strndup (const char *s, size_t n)
1643
- {
1644
- char *result;
1645
- size_t len = strlen (s);
1646
-
1647
- if (n < len)
1648
- len = n;
1649
-
1650
- result = (char *) malloc (len + 1);
1651
- if (!result)
1652
- return 0;
1653
-
1654
- result[len] = '\0';
1655
- return (char *) memcpy (result, s, len);
1656
- }
1657
-
1658
- char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) {
1659
- unsigned char c;
1660
- bufsize_t startpos = parser->pos;
1661
- bufsize_t len = 0;
1662
-
1663
- while ((c = peek_char(parser)) && (*pred)(c)) {
1664
- advance(parser);
1665
- len++;
1666
- }
1667
-
1668
- return my_strndup((const char *) parser->input.data + startpos, len);
1669
- }
1670
-
1671
- void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser,
1672
- unsigned char c,
1673
- int can_open,
1674
- int can_close,
1675
- cmark_node *inl_text) {
1676
- push_delimiter(parser, c, can_open != 0, can_close != 0, inl_text);
1677
- }
1678
-
1679
- void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) {
1680
- remove_delimiter(parser, delim);
1681
- }
1682
-
1683
- int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
1684
- int max_delims,
1685
- unsigned char c,
1686
- int *left_flanking,
1687
- int *right_flanking,
1688
- int *punct_before,
1689
- int *punct_after) {
1690
- int numdelims = 0;
1691
- bufsize_t before_char_pos;
1692
- int32_t after_char = 0;
1693
- int32_t before_char = 0;
1694
- int len;
1695
- bool space_before, space_after;
1696
-
1697
- if (parser->pos == 0) {
1698
- before_char = 10;
1699
- } else {
1700
- before_char_pos = parser->pos - 1;
1701
- // walk back to the beginning of the UTF_8 sequence:
1702
- while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
1703
- before_char_pos -= 1;
1704
- }
1705
- len = cmark_utf8proc_iterate(parser->input.data + before_char_pos,
1706
- parser->pos - before_char_pos, &before_char);
1707
- if (len == -1) {
1708
- before_char = 10;
1709
- }
1710
- }
1711
-
1712
- while (peek_char(parser) == c && numdelims < max_delims) {
1713
- numdelims++;
1714
- advance(parser);
1715
- }
1716
-
1717
- len = cmark_utf8proc_iterate(parser->input.data + parser->pos,
1718
- parser->input.len - parser->pos, &after_char);
1719
- if (len == -1) {
1720
- after_char = 10;
1721
- }
1722
-
1723
- *punct_before = cmark_utf8proc_is_punctuation(before_char);
1724
- *punct_after = cmark_utf8proc_is_punctuation(after_char);
1725
- space_before = cmark_utf8proc_is_space(before_char) != 0;
1726
- space_after = cmark_utf8proc_is_space(after_char) != 0;
1727
-
1728
- *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
1729
- !(*punct_after && !space_before && !*punct_before);
1730
- *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
1731
- !(*punct_before && !space_after && !*punct_after);
1732
-
1733
- return numdelims;
1734
- }
1735
-
1736
- void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) {
1737
- advance(parser);
1738
- }
1739
-
1740
- int cmark_inline_parser_get_offset(cmark_inline_parser *parser) {
1741
- return parser->pos;
1742
- }
1743
-
1744
- void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) {
1745
- parser->pos = offset;
1746
- }
1747
-
1748
- int cmark_inline_parser_get_column(cmark_inline_parser *parser) {
1749
- return parser->pos + 1 + parser->column_offset + parser->block_offset;
1750
- }
1751
-
1752
- cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) {
1753
- return &parser->input;
1754
- }
1755
-
1756
- int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) {
1757
- bracket *b = parser->last_bracket;
1758
- if (!b) {
1759
- return 0;
1760
- }
1761
- if (image != 0) {
1762
- return b->in_bracket_image1;
1763
- } else {
1764
- return b->in_bracket_image0;
1765
- }
1766
- }
1767
-
1768
- void cmark_node_unput(cmark_node *node, int n) {
1769
- node = node->last_child;
1770
- while (n > 0 && node && node->type == CMARK_NODE_TEXT) {
1771
- if (node->as.literal.len < n) {
1772
- n -= node->as.literal.len;
1773
- node->as.literal.len = 0;
1774
- } else {
1775
- node->as.literal.len -= n;
1776
- n = 0;
1777
- }
1778
- node = node->prev;
1779
- }
1780
- }
1781
-
1782
- delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) {
1783
- return parser->last_delim;
1784
- }
1785
-
1786
- int cmark_inline_parser_get_line(cmark_inline_parser *parser) {
1787
- return parser->line;
1788
- }