commonmarker 0.23.6 → 1.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +70 -212
  3. data/commonmarker.gemspec +34 -31
  4. data/ext/commonmarker/Cargo.toml +12 -0
  5. data/ext/commonmarker/_util.rb +102 -0
  6. data/ext/commonmarker/extconf.rb +4 -5
  7. data/ext/commonmarker/src/comrak_options.rs +107 -0
  8. data/ext/commonmarker/src/lib.rs +27 -0
  9. data/lib/commonmarker/config.rb +58 -37
  10. data/lib/commonmarker/extension.rb +14 -0
  11. data/lib/commonmarker/renderer.rb +1 -127
  12. data/lib/commonmarker/version.rb +2 -2
  13. data/lib/commonmarker.rb +19 -32
  14. metadata +33 -177
  15. data/Rakefile +0 -109
  16. data/bin/commonmarker +0 -118
  17. data/ext/commonmarker/arena.c +0 -103
  18. data/ext/commonmarker/autolink.c +0 -456
  19. data/ext/commonmarker/autolink.h +0 -8
  20. data/ext/commonmarker/blocks.c +0 -1596
  21. data/ext/commonmarker/buffer.c +0 -278
  22. data/ext/commonmarker/buffer.h +0 -116
  23. data/ext/commonmarker/case_fold_switch.inc +0 -4327
  24. data/ext/commonmarker/chunk.h +0 -135
  25. data/ext/commonmarker/cmark-gfm-core-extensions.h +0 -54
  26. data/ext/commonmarker/cmark-gfm-extension_api.h +0 -736
  27. data/ext/commonmarker/cmark-gfm-extensions_export.h +0 -42
  28. data/ext/commonmarker/cmark-gfm.h +0 -817
  29. data/ext/commonmarker/cmark-gfm_export.h +0 -42
  30. data/ext/commonmarker/cmark-gfm_version.h +0 -7
  31. data/ext/commonmarker/cmark.c +0 -55
  32. data/ext/commonmarker/cmark_ctype.c +0 -44
  33. data/ext/commonmarker/cmark_ctype.h +0 -33
  34. data/ext/commonmarker/commonmark.c +0 -529
  35. data/ext/commonmarker/commonmarker.c +0 -1307
  36. data/ext/commonmarker/commonmarker.h +0 -16
  37. data/ext/commonmarker/config.h +0 -76
  38. data/ext/commonmarker/core-extensions.c +0 -27
  39. data/ext/commonmarker/entities.inc +0 -2138
  40. data/ext/commonmarker/ext_scanners.c +0 -879
  41. data/ext/commonmarker/ext_scanners.h +0 -24
  42. data/ext/commonmarker/footnotes.c +0 -63
  43. data/ext/commonmarker/footnotes.h +0 -27
  44. data/ext/commonmarker/houdini.h +0 -57
  45. data/ext/commonmarker/houdini_href_e.c +0 -100
  46. data/ext/commonmarker/houdini_html_e.c +0 -66
  47. data/ext/commonmarker/houdini_html_u.c +0 -149
  48. data/ext/commonmarker/html.c +0 -486
  49. data/ext/commonmarker/html.h +0 -27
  50. data/ext/commonmarker/inlines.c +0 -1716
  51. data/ext/commonmarker/inlines.h +0 -29
  52. data/ext/commonmarker/iterator.c +0 -159
  53. data/ext/commonmarker/iterator.h +0 -26
  54. data/ext/commonmarker/latex.c +0 -466
  55. data/ext/commonmarker/linked_list.c +0 -37
  56. data/ext/commonmarker/man.c +0 -278
  57. data/ext/commonmarker/map.c +0 -122
  58. data/ext/commonmarker/map.h +0 -41
  59. data/ext/commonmarker/node.c +0 -979
  60. data/ext/commonmarker/node.h +0 -125
  61. data/ext/commonmarker/parser.h +0 -58
  62. data/ext/commonmarker/plaintext.c +0 -235
  63. data/ext/commonmarker/plugin.c +0 -36
  64. data/ext/commonmarker/plugin.h +0 -34
  65. data/ext/commonmarker/references.c +0 -42
  66. data/ext/commonmarker/references.h +0 -26
  67. data/ext/commonmarker/registry.c +0 -63
  68. data/ext/commonmarker/registry.h +0 -24
  69. data/ext/commonmarker/render.c +0 -205
  70. data/ext/commonmarker/render.h +0 -62
  71. data/ext/commonmarker/scanners.c +0 -10508
  72. data/ext/commonmarker/scanners.h +0 -62
  73. data/ext/commonmarker/scanners.re +0 -341
  74. data/ext/commonmarker/strikethrough.c +0 -167
  75. data/ext/commonmarker/strikethrough.h +0 -9
  76. data/ext/commonmarker/syntax_extension.c +0 -149
  77. data/ext/commonmarker/syntax_extension.h +0 -34
  78. data/ext/commonmarker/table.c +0 -848
  79. data/ext/commonmarker/table.h +0 -12
  80. data/ext/commonmarker/tagfilter.c +0 -60
  81. data/ext/commonmarker/tagfilter.h +0 -8
  82. data/ext/commonmarker/tasklist.c +0 -156
  83. data/ext/commonmarker/tasklist.h +0 -8
  84. data/ext/commonmarker/utf8.c +0 -317
  85. data/ext/commonmarker/utf8.h +0 -35
  86. data/ext/commonmarker/xml.c +0 -181
  87. data/lib/commonmarker/node/inspect.rb +0 -47
  88. data/lib/commonmarker/node.rb +0 -83
  89. data/lib/commonmarker/renderer/html_renderer.rb +0 -252
@@ -1,456 +0,0 @@
1
- #include "autolink.h"
2
- #include <parser.h>
3
- #include <string.h>
4
- #include <utf8.h>
5
-
6
- #if defined(_WIN32)
7
- #define strncasecmp _strnicmp
8
- #else
9
- #include <strings.h>
10
- #endif
11
-
12
- static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
13
- int32_t ch;
14
- int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
15
- if (r < 0)
16
- return 0;
17
- return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
18
- }
19
-
20
- static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
21
- static const size_t valid_uris_count = 3;
22
- static const char *valid_uris[] = {"http://", "https://", "ftp://"};
23
-
24
- size_t i;
25
-
26
- for (i = 0; i < valid_uris_count; ++i) {
27
- size_t len = strlen(valid_uris[i]);
28
-
29
- if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
30
- is_valid_hostchar(link + len, link_len - len))
31
- return 1;
32
- }
33
-
34
- return 0;
35
- }
36
-
37
- static size_t autolink_delim(uint8_t *data, size_t link_end) {
38
- uint8_t cclose, copen;
39
- size_t i;
40
-
41
- for (i = 0; i < link_end; ++i)
42
- if (data[i] == '<') {
43
- link_end = i;
44
- break;
45
- }
46
-
47
- while (link_end > 0) {
48
- cclose = data[link_end - 1];
49
-
50
- switch (cclose) {
51
- case ')':
52
- copen = '(';
53
- break;
54
- default:
55
- copen = 0;
56
- }
57
-
58
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
59
- link_end--;
60
-
61
- else if (data[link_end - 1] == ';') {
62
- size_t new_end = link_end - 2;
63
-
64
- while (new_end > 0 && cmark_isalpha(data[new_end]))
65
- new_end--;
66
-
67
- if (new_end < link_end - 2 && data[new_end] == '&')
68
- link_end = new_end;
69
- else
70
- link_end--;
71
- } else if (copen != 0) {
72
- size_t closing = 0;
73
- size_t opening = 0;
74
- i = 0;
75
-
76
- /* Allow any number of matching brackets (as recognised in copen/cclose)
77
- * at the end of the URL. If there is a greater number of closing
78
- * brackets than opening ones, we remove one character from the end of
79
- * the link.
80
- *
81
- * Examples (input text => output linked portion):
82
- *
83
- * http://www.pokemon.com/Pikachu_(Electric)
84
- * => http://www.pokemon.com/Pikachu_(Electric)
85
- *
86
- * http://www.pokemon.com/Pikachu_((Electric)
87
- * => http://www.pokemon.com/Pikachu_((Electric)
88
- *
89
- * http://www.pokemon.com/Pikachu_(Electric))
90
- * => http://www.pokemon.com/Pikachu_(Electric)
91
- *
92
- * http://www.pokemon.com/Pikachu_((Electric))
93
- * => http://www.pokemon.com/Pikachu_((Electric))
94
- */
95
-
96
- while (i < link_end) {
97
- if (data[i] == copen)
98
- opening++;
99
- else if (data[i] == cclose)
100
- closing++;
101
-
102
- i++;
103
- }
104
-
105
- if (closing <= opening)
106
- break;
107
-
108
- link_end--;
109
- } else
110
- break;
111
- }
112
-
113
- return link_end;
114
- }
115
-
116
- static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
117
- size_t i, np = 0, uscore1 = 0, uscore2 = 0;
118
-
119
- for (i = 1; i < size - 1; i++) {
120
- if (data[i] == '_')
121
- uscore2++;
122
- else if (data[i] == '.') {
123
- uscore1 = uscore2;
124
- uscore2 = 0;
125
- np++;
126
- } else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
127
- break;
128
- }
129
-
130
- if (uscore1 > 0 || uscore2 > 0)
131
- return 0;
132
-
133
- if (allow_short) {
134
- /* We don't need a valid domain in the strict sense (with
135
- * least one dot; so just make sure it's composed of valid
136
- * domain characters and return the length of the the valid
137
- * sequence. */
138
- return i;
139
- } else {
140
- /* a valid domain needs to have at least a dot.
141
- * that's as far as we get */
142
- return np ? i : 0;
143
- }
144
- }
145
-
146
- static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
147
- cmark_inline_parser *inline_parser) {
148
- cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
149
- size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
150
- uint8_t *data = chunk->data + max_rewind;
151
- size_t size = chunk->len - max_rewind;
152
- int start = cmark_inline_parser_get_column(inline_parser);
153
-
154
- size_t link_end;
155
-
156
- if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
157
- !cmark_isspace(data[-1]))
158
- return 0;
159
-
160
- if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
161
- return 0;
162
-
163
- link_end = check_domain(data, size, 0);
164
-
165
- if (link_end == 0)
166
- return NULL;
167
-
168
- while (link_end < size && !cmark_isspace(data[link_end]))
169
- link_end++;
170
-
171
- link_end = autolink_delim(data, link_end);
172
-
173
- if (link_end == 0)
174
- return NULL;
175
-
176
- cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
177
-
178
- cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
179
-
180
- cmark_strbuf buf;
181
- cmark_strbuf_init(parser->mem, &buf, 10);
182
- cmark_strbuf_puts(&buf, "http://");
183
- cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
184
- node->as.link.url = cmark_chunk_buf_detach(&buf);
185
-
186
- cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
187
- text->as.literal =
188
- cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
189
- cmark_node_append_child(node, text);
190
-
191
- node->start_line = text->start_line =
192
- node->end_line = text->end_line =
193
- cmark_inline_parser_get_line(inline_parser);
194
-
195
- node->start_column = text->start_column = start - 1;
196
- node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
197
-
198
- return node;
199
- }
200
-
201
- static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
202
- cmark_inline_parser *inline_parser) {
203
- size_t link_end, domain_len;
204
- int rewind = 0;
205
-
206
- cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
207
- int max_rewind = cmark_inline_parser_get_offset(inline_parser);
208
- uint8_t *data = chunk->data + max_rewind;
209
- size_t size = chunk->len - max_rewind;
210
-
211
- if (size < 4 || data[1] != '/' || data[2] != '/')
212
- return 0;
213
-
214
- while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
215
- rewind++;
216
-
217
- if (!sd_autolink_issafe(data - rewind, size + rewind))
218
- return 0;
219
-
220
- link_end = strlen("://");
221
-
222
- domain_len = check_domain(data + link_end, size - link_end, 1);
223
-
224
- if (domain_len == 0)
225
- return 0;
226
-
227
- link_end += domain_len;
228
- while (link_end < size && !cmark_isspace(data[link_end]))
229
- link_end++;
230
-
231
- link_end = autolink_delim(data, link_end);
232
-
233
- if (link_end == 0)
234
- return NULL;
235
-
236
- cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
237
- cmark_node_unput(parent, rewind);
238
-
239
- cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
240
-
241
- cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
242
- (bufsize_t)(link_end + rewind));
243
- node->as.link.url = url;
244
-
245
- cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
246
- text->as.literal = url;
247
- cmark_node_append_child(node, text);
248
-
249
- return node;
250
- }
251
-
252
- static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
253
- cmark_node *parent, unsigned char c,
254
- cmark_inline_parser *inline_parser) {
255
- if (cmark_inline_parser_in_bracket(inline_parser, false) ||
256
- cmark_inline_parser_in_bracket(inline_parser, true))
257
- return NULL;
258
-
259
- if (c == ':')
260
- return url_match(parser, parent, inline_parser);
261
-
262
- if (c == 'w')
263
- return www_match(parser, parent, inline_parser);
264
-
265
- return NULL;
266
-
267
- // note that we could end up re-consuming something already a
268
- // part of an inline, because we don't track when the last
269
- // inline was finished in inlines.c.
270
- }
271
-
272
- static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
273
- size_t len = strlen(protocol);
274
-
275
- // Check that the protocol matches
276
- for (int i = 1; i <= len; i++) {
277
- if (data[-rewind - i] != protocol[len - i]) {
278
- return false;
279
- }
280
- }
281
-
282
- char prev_char = data[-rewind - len - 1];
283
-
284
- // Make sure the character before the protocol is non-alphanumeric
285
- return !cmark_isalnum(prev_char);
286
- }
287
-
288
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
289
- // postprocess_text can recurse very deeply if there is a very long line of
290
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
291
- if (depth > 1000) return;
292
-
293
- size_t link_end;
294
- uint8_t *data = text->as.literal.data,
295
- *at;
296
- size_t size = text->as.literal.len;
297
- bool auto_mailto = true;
298
- bool is_xmpp = false;
299
- int rewind, max_rewind,
300
- nb = 0, np = 0, ns = 0;
301
-
302
- if (offset < 0 || (size_t)offset >= size)
303
- return;
304
-
305
- data += offset;
306
- size -= offset;
307
-
308
- at = (uint8_t *)memchr(data, '@', size);
309
- if (!at)
310
- return;
311
-
312
- max_rewind = (int)(at - data);
313
- data += max_rewind;
314
- size -= max_rewind;
315
-
316
- for (rewind = 0; rewind < max_rewind; ++rewind) {
317
- uint8_t c = data[-rewind - 1];
318
-
319
- if (cmark_isalnum(c))
320
- continue;
321
-
322
- if (strchr(".+-_", c) != NULL)
323
- continue;
324
-
325
- if (strchr(":", c) != NULL) {
326
- if (validate_protocol("mailto:", data, rewind)) {
327
- auto_mailto = false;
328
- continue;
329
- }
330
-
331
- if (validate_protocol("xmpp:", data, rewind)) {
332
- auto_mailto = false;
333
- is_xmpp = true;
334
- continue;
335
- }
336
- }
337
-
338
- break;
339
- }
340
-
341
- if (rewind == 0 || ns > 0) {
342
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
343
- return;
344
- }
345
-
346
- for (link_end = 0; link_end < size; ++link_end) {
347
- uint8_t c = data[link_end];
348
-
349
- if (cmark_isalnum(c))
350
- continue;
351
-
352
- if (c == '@')
353
- nb++;
354
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
355
- np++;
356
- else if (c == '/' && is_xmpp)
357
- continue;
358
- else if (c != '-' && c != '_')
359
- break;
360
- }
361
-
362
- if (link_end < 2 || nb != 1 || np == 0 ||
363
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
364
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
365
- return;
366
- }
367
-
368
- link_end = autolink_delim(data, link_end);
369
-
370
- if (link_end == 0) {
371
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
372
- return;
373
- }
374
-
375
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
376
-
377
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
378
- cmark_strbuf buf;
379
- cmark_strbuf_init(parser->mem, &buf, 10);
380
- if (auto_mailto)
381
- cmark_strbuf_puts(&buf, "mailto:");
382
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
383
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
384
-
385
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
386
- cmark_chunk email = cmark_chunk_dup(
387
- &text->as.literal,
388
- offset + max_rewind - rewind,
389
- (bufsize_t)(link_end + rewind));
390
- cmark_chunk_to_cstr(parser->mem, &email);
391
- link_text->as.literal = email;
392
- cmark_node_append_child(link_node, link_text);
393
-
394
- cmark_node_insert_after(text, link_node);
395
-
396
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
397
- post->as.literal = cmark_chunk_dup(&text->as.literal,
398
- (bufsize_t)(offset + max_rewind + link_end),
399
- (bufsize_t)(size - link_end));
400
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
401
-
402
- cmark_node_insert_after(link_node, post);
403
-
404
- text->as.literal.len = offset + max_rewind - rewind;
405
- text->as.literal.data[text->as.literal.len] = 0;
406
-
407
- postprocess_text(parser, post, 0, depth + 1);
408
- }
409
-
410
- static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
411
- cmark_iter *iter;
412
- cmark_event_type ev;
413
- cmark_node *node;
414
- bool in_link = false;
415
-
416
- cmark_consolidate_text_nodes(root);
417
- iter = cmark_iter_new(root);
418
-
419
- while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
420
- node = cmark_iter_get_node(iter);
421
- if (in_link) {
422
- if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
423
- in_link = false;
424
- }
425
- continue;
426
- }
427
-
428
- if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
429
- in_link = true;
430
- continue;
431
- }
432
-
433
- if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
434
- postprocess_text(parser, node, 0, /*depth*/0);
435
- }
436
- }
437
-
438
- cmark_iter_free(iter);
439
-
440
- return root;
441
- }
442
-
443
- cmark_syntax_extension *create_autolink_extension(void) {
444
- cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
445
- cmark_llist *special_chars = NULL;
446
-
447
- cmark_syntax_extension_set_match_inline_func(ext, match);
448
- cmark_syntax_extension_set_postprocess_func(ext, postprocess);
449
-
450
- cmark_mem *mem = cmark_get_default_mem_allocator();
451
- special_chars = cmark_llist_append(mem, special_chars, (void *)':');
452
- special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
453
- cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
454
-
455
- return ext;
456
- }
@@ -1,8 +0,0 @@
1
- #ifndef CMARK_GFM_AUTOLINK_H
2
- #define CMARK_GFM_AUTOLINK_H
3
-
4
- #include "cmark-gfm-core-extensions.h"
5
-
6
- cmark_syntax_extension *create_autolink_extension(void);
7
-
8
- #endif