markly 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/bin/markly +94 -0
  3. data/ext/markly/arena.c +103 -0
  4. data/ext/markly/autolink.c +425 -0
  5. data/ext/markly/autolink.h +8 -0
  6. data/ext/markly/blocks.c +1585 -0
  7. data/ext/markly/buffer.c +278 -0
  8. data/ext/markly/buffer.h +116 -0
  9. data/ext/markly/case_fold_switch.inc +4327 -0
  10. data/ext/markly/chunk.h +135 -0
  11. data/ext/markly/cmark-gfm-core-extensions.h +54 -0
  12. data/ext/markly/cmark-gfm-extension_api.h +736 -0
  13. data/ext/markly/cmark-gfm-extensions_export.h +42 -0
  14. data/ext/markly/cmark-gfm.h +817 -0
  15. data/ext/markly/cmark-gfm_export.h +42 -0
  16. data/ext/markly/cmark-gfm_version.h +7 -0
  17. data/ext/markly/cmark.c +55 -0
  18. data/ext/markly/cmark_ctype.c +44 -0
  19. data/ext/markly/cmark_ctype.h +33 -0
  20. data/ext/markly/commonmark.c +519 -0
  21. data/ext/markly/config.h +76 -0
  22. data/ext/markly/core-extensions.c +27 -0
  23. data/ext/markly/entities.inc +2138 -0
  24. data/ext/markly/ext_scanners.c +1159 -0
  25. data/ext/markly/ext_scanners.h +24 -0
  26. data/ext/markly/extconf.rb +7 -0
  27. data/ext/markly/footnotes.c +40 -0
  28. data/ext/markly/footnotes.h +25 -0
  29. data/ext/markly/houdini.h +57 -0
  30. data/ext/markly/houdini_href_e.c +100 -0
  31. data/ext/markly/houdini_html_e.c +66 -0
  32. data/ext/markly/houdini_html_u.c +149 -0
  33. data/ext/markly/html.c +465 -0
  34. data/ext/markly/html.h +27 -0
  35. data/ext/markly/inlines.c +1633 -0
  36. data/ext/markly/inlines.h +29 -0
  37. data/ext/markly/iterator.c +159 -0
  38. data/ext/markly/iterator.h +26 -0
  39. data/ext/markly/latex.c +466 -0
  40. data/ext/markly/linked_list.c +37 -0
  41. data/ext/markly/man.c +278 -0
  42. data/ext/markly/map.c +122 -0
  43. data/ext/markly/map.h +41 -0
  44. data/ext/markly/markly.c +1226 -0
  45. data/ext/markly/markly.h +16 -0
  46. data/ext/markly/node.c +979 -0
  47. data/ext/markly/node.h +118 -0
  48. data/ext/markly/parser.h +58 -0
  49. data/ext/markly/plaintext.c +235 -0
  50. data/ext/markly/plugin.c +36 -0
  51. data/ext/markly/plugin.h +34 -0
  52. data/ext/markly/references.c +42 -0
  53. data/ext/markly/references.h +26 -0
  54. data/ext/markly/registry.c +63 -0
  55. data/ext/markly/registry.h +24 -0
  56. data/ext/markly/render.c +205 -0
  57. data/ext/markly/render.h +62 -0
  58. data/ext/markly/scanners.c +20382 -0
  59. data/ext/markly/scanners.h +62 -0
  60. data/ext/markly/scanners.re +326 -0
  61. data/ext/markly/strikethrough.c +167 -0
  62. data/ext/markly/strikethrough.h +9 -0
  63. data/ext/markly/syntax_extension.c +149 -0
  64. data/ext/markly/syntax_extension.h +34 -0
  65. data/ext/markly/table.c +803 -0
  66. data/ext/markly/table.h +12 -0
  67. data/ext/markly/tagfilter.c +60 -0
  68. data/ext/markly/tagfilter.h +8 -0
  69. data/ext/markly/tasklist.c +156 -0
  70. data/ext/markly/tasklist.h +8 -0
  71. data/ext/markly/utf8.c +317 -0
  72. data/ext/markly/utf8.h +35 -0
  73. data/ext/markly/xml.c +181 -0
  74. data/lib/markly.rb +43 -0
  75. data/lib/markly/flags.rb +37 -0
  76. data/lib/markly/markly.so +0 -0
  77. data/lib/markly/node.rb +70 -0
  78. data/lib/markly/node/inspect.rb +59 -0
  79. data/lib/markly/renderer.rb +133 -0
  80. data/lib/markly/renderer/html_renderer.rb +252 -0
  81. data/lib/markly/version.rb +5 -0
  82. metadata +211 -0
@@ -0,0 +1,465 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <assert.h>
5
+ #include "cmark_ctype.h"
6
+ #include "config.h"
7
+ #include "cmark-gfm.h"
8
+ #include "houdini.h"
9
+ #include "scanners.h"
10
+ #include "syntax_extension.h"
11
+ #include "html.h"
12
+ #include "render.h"
13
+
14
+ // Functions to convert cmark_nodes to HTML strings.
15
+
16
+ static void escape_html(cmark_strbuf *dest, const unsigned char *source,
17
+ bufsize_t length) {
18
+ houdini_escape_html0(dest, source, length, 0);
19
+ }
20
+
21
+ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) {
22
+ cmark_strbuf *html = renderer->html;
23
+ cmark_llist *it;
24
+ cmark_syntax_extension *ext;
25
+ bool filtered;
26
+ uint8_t *match;
27
+
28
+ while (len) {
29
+ match = (uint8_t *) memchr(data, '<', len);
30
+ if (!match)
31
+ break;
32
+
33
+ if (match != data) {
34
+ cmark_strbuf_put(html, data, (bufsize_t)(match - data));
35
+ len -= (match - data);
36
+ data = match;
37
+ }
38
+
39
+ filtered = false;
40
+ for (it = renderer->filter_extensions; it; it = it->next) {
41
+ ext = ((cmark_syntax_extension *) it->data);
42
+ if (!ext->html_filter_func(ext, data, len)) {
43
+ filtered = true;
44
+ break;
45
+ }
46
+ }
47
+
48
+ if (!filtered) {
49
+ cmark_strbuf_putc(html, '<');
50
+ } else {
51
+ cmark_strbuf_puts(html, "&lt;");
52
+ }
53
+
54
+ ++data;
55
+ --len;
56
+ }
57
+
58
+ if (len)
59
+ cmark_strbuf_put(html, data, (bufsize_t)len);
60
+ }
61
+
62
+ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
63
+ if (renderer->written_footnote_ix >= renderer->footnote_ix)
64
+ return false;
65
+ renderer->written_footnote_ix = renderer->footnote_ix;
66
+
67
+ cmark_strbuf_puts(html, "<a href=\"#fnref");
68
+ char n[32];
69
+ snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
70
+ cmark_strbuf_puts(html, n);
71
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
72
+
73
+ return true;
74
+ }
75
+
76
+ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
77
+ cmark_event_type ev_type, int options) {
78
+ cmark_node *parent;
79
+ cmark_node *grandparent;
80
+ cmark_strbuf *html = renderer->html;
81
+ cmark_llist *it;
82
+ cmark_syntax_extension *ext;
83
+ char start_heading[] = "<h0";
84
+ char end_heading[] = "</h0";
85
+ bool tight;
86
+ bool filtered;
87
+ char buffer[BUFFER_SIZE];
88
+
89
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
90
+
91
+ if (renderer->plain == node) { // back at original node
92
+ renderer->plain = NULL;
93
+ }
94
+
95
+ if (renderer->plain != NULL) {
96
+ switch (node->type) {
97
+ case CMARK_NODE_TEXT:
98
+ case CMARK_NODE_CODE:
99
+ case CMARK_NODE_HTML_INLINE:
100
+ escape_html(html, node->as.literal.data, node->as.literal.len);
101
+ break;
102
+
103
+ case CMARK_NODE_LINEBREAK:
104
+ case CMARK_NODE_SOFTBREAK:
105
+ cmark_strbuf_putc(html, ' ');
106
+ break;
107
+
108
+ default:
109
+ break;
110
+ }
111
+ return 1;
112
+ }
113
+
114
+ if (node->extension && node->extension->html_render_func) {
115
+ node->extension->html_render_func(node->extension, renderer, node, ev_type, options);
116
+ return 1;
117
+ }
118
+
119
+ switch (node->type) {
120
+ case CMARK_NODE_DOCUMENT:
121
+ break;
122
+
123
+ case CMARK_NODE_BLOCK_QUOTE:
124
+ if (entering) {
125
+ cmark_html_render_cr(html);
126
+ cmark_strbuf_puts(html, "<blockquote");
127
+ cmark_html_render_sourcepos(node, html, options);
128
+ cmark_strbuf_puts(html, ">\n");
129
+ } else {
130
+ cmark_html_render_cr(html);
131
+ cmark_strbuf_puts(html, "</blockquote>\n");
132
+ }
133
+ break;
134
+
135
+ case CMARK_NODE_LIST: {
136
+ cmark_list_type list_type = node->as.list.list_type;
137
+ int start = node->as.list.start;
138
+
139
+ if (entering) {
140
+ cmark_html_render_cr(html);
141
+ if (list_type == CMARK_BULLET_LIST) {
142
+ cmark_strbuf_puts(html, "<ul");
143
+ cmark_html_render_sourcepos(node, html, options);
144
+ cmark_strbuf_puts(html, ">\n");
145
+ } else if (start == 1) {
146
+ cmark_strbuf_puts(html, "<ol");
147
+ cmark_html_render_sourcepos(node, html, options);
148
+ cmark_strbuf_puts(html, ">\n");
149
+ } else {
150
+ snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
151
+ cmark_strbuf_puts(html, buffer);
152
+ cmark_html_render_sourcepos(node, html, options);
153
+ cmark_strbuf_puts(html, ">\n");
154
+ }
155
+ } else {
156
+ cmark_strbuf_puts(html,
157
+ list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
158
+ }
159
+ break;
160
+ }
161
+
162
+ case CMARK_NODE_ITEM:
163
+ if (entering) {
164
+ cmark_html_render_cr(html);
165
+ cmark_strbuf_puts(html, "<li");
166
+ cmark_html_render_sourcepos(node, html, options);
167
+ cmark_strbuf_putc(html, '>');
168
+ } else {
169
+ cmark_strbuf_puts(html, "</li>\n");
170
+ }
171
+ break;
172
+
173
+ case CMARK_NODE_HEADING:
174
+ if (entering) {
175
+ cmark_html_render_cr(html);
176
+ start_heading[2] = (char)('0' + node->as.heading.level);
177
+ cmark_strbuf_puts(html, start_heading);
178
+ cmark_html_render_sourcepos(node, html, options);
179
+ cmark_strbuf_putc(html, '>');
180
+ } else {
181
+ end_heading[3] = (char)('0' + node->as.heading.level);
182
+ cmark_strbuf_puts(html, end_heading);
183
+ cmark_strbuf_puts(html, ">\n");
184
+ }
185
+ break;
186
+
187
+ case CMARK_NODE_CODE_BLOCK:
188
+ cmark_html_render_cr(html);
189
+
190
+ if (node->as.code.info.len == 0) {
191
+ cmark_strbuf_puts(html, "<pre");
192
+ cmark_html_render_sourcepos(node, html, options);
193
+ cmark_strbuf_puts(html, "><code>");
194
+ } else {
195
+ bufsize_t first_tag = 0;
196
+ while (first_tag < node->as.code.info.len &&
197
+ !cmark_isspace(node->as.code.info.data[first_tag])) {
198
+ first_tag += 1;
199
+ }
200
+
201
+ if (options & CMARK_OPT_GITHUB_PRE_LANG) {
202
+ cmark_strbuf_puts(html, "<pre");
203
+ cmark_html_render_sourcepos(node, html, options);
204
+ cmark_strbuf_puts(html, " lang=\"");
205
+ escape_html(html, node->as.code.info.data, first_tag);
206
+ if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
207
+ cmark_strbuf_puts(html, "\" data-meta=\"");
208
+ escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
209
+ }
210
+ cmark_strbuf_puts(html, "\"><code>");
211
+ } else {
212
+ cmark_strbuf_puts(html, "<pre");
213
+ cmark_html_render_sourcepos(node, html, options);
214
+ cmark_strbuf_puts(html, "><code class=\"language-");
215
+ escape_html(html, node->as.code.info.data, first_tag);
216
+ if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
217
+ cmark_strbuf_puts(html, "\" data-meta=\"");
218
+ escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
219
+ }
220
+ cmark_strbuf_puts(html, "\">");
221
+ }
222
+ }
223
+
224
+ escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
225
+ cmark_strbuf_puts(html, "</code></pre>\n");
226
+ break;
227
+
228
+ case CMARK_NODE_HTML_BLOCK:
229
+ cmark_html_render_cr(html);
230
+ if (!(options & CMARK_OPT_UNSAFE)) {
231
+ cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
232
+ } else if (renderer->filter_extensions) {
233
+ filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
234
+ } else {
235
+ cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
236
+ }
237
+ cmark_html_render_cr(html);
238
+ break;
239
+
240
+ case CMARK_NODE_CUSTOM_BLOCK:
241
+ cmark_html_render_cr(html);
242
+ if (entering) {
243
+ cmark_strbuf_put(html, node->as.custom.on_enter.data,
244
+ node->as.custom.on_enter.len);
245
+ } else {
246
+ cmark_strbuf_put(html, node->as.custom.on_exit.data,
247
+ node->as.custom.on_exit.len);
248
+ }
249
+ cmark_html_render_cr(html);
250
+ break;
251
+
252
+ case CMARK_NODE_THEMATIC_BREAK:
253
+ cmark_html_render_cr(html);
254
+ cmark_strbuf_puts(html, "<hr");
255
+ cmark_html_render_sourcepos(node, html, options);
256
+ cmark_strbuf_puts(html, " />\n");
257
+ break;
258
+
259
+ case CMARK_NODE_PARAGRAPH:
260
+ parent = cmark_node_parent(node);
261
+ grandparent = cmark_node_parent(parent);
262
+ if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
263
+ tight = grandparent->as.list.tight;
264
+ } else {
265
+ tight = false;
266
+ }
267
+ if (!tight) {
268
+ if (entering) {
269
+ cmark_html_render_cr(html);
270
+ cmark_strbuf_puts(html, "<p");
271
+ cmark_html_render_sourcepos(node, html, options);
272
+ cmark_strbuf_putc(html, '>');
273
+ } else {
274
+ if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
275
+ cmark_strbuf_putc(html, ' ');
276
+ S_put_footnote_backref(renderer, html);
277
+ }
278
+ cmark_strbuf_puts(html, "</p>\n");
279
+ }
280
+ }
281
+ break;
282
+
283
+ case CMARK_NODE_TEXT:
284
+ escape_html(html, node->as.literal.data, node->as.literal.len);
285
+ break;
286
+
287
+ case CMARK_NODE_LINEBREAK:
288
+ cmark_strbuf_puts(html, "<br />\n");
289
+ break;
290
+
291
+ case CMARK_NODE_SOFTBREAK:
292
+ if (options & CMARK_OPT_HARDBREAKS) {
293
+ cmark_strbuf_puts(html, "<br />\n");
294
+ } else if (options & CMARK_OPT_NOBREAKS) {
295
+ cmark_strbuf_putc(html, ' ');
296
+ } else {
297
+ cmark_strbuf_putc(html, '\n');
298
+ }
299
+ break;
300
+
301
+ case CMARK_NODE_CODE:
302
+ cmark_strbuf_puts(html, "<code>");
303
+ escape_html(html, node->as.literal.data, node->as.literal.len);
304
+ cmark_strbuf_puts(html, "</code>");
305
+ break;
306
+
307
+ case CMARK_NODE_HTML_INLINE:
308
+ if (!(options & CMARK_OPT_UNSAFE)) {
309
+ cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
310
+ } else {
311
+ filtered = false;
312
+ for (it = renderer->filter_extensions; it; it = it->next) {
313
+ ext = (cmark_syntax_extension *) it->data;
314
+ if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) {
315
+ filtered = true;
316
+ break;
317
+ }
318
+ }
319
+ if (!filtered) {
320
+ cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
321
+ } else {
322
+ cmark_strbuf_puts(html, "&lt;");
323
+ cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1);
324
+ }
325
+ }
326
+ break;
327
+
328
+ case CMARK_NODE_CUSTOM_INLINE:
329
+ if (entering) {
330
+ cmark_strbuf_put(html, node->as.custom.on_enter.data,
331
+ node->as.custom.on_enter.len);
332
+ } else {
333
+ cmark_strbuf_put(html, node->as.custom.on_exit.data,
334
+ node->as.custom.on_exit.len);
335
+ }
336
+ break;
337
+
338
+ case CMARK_NODE_STRONG:
339
+ if (entering) {
340
+ cmark_strbuf_puts(html, "<strong>");
341
+ } else {
342
+ cmark_strbuf_puts(html, "</strong>");
343
+ }
344
+ break;
345
+
346
+ case CMARK_NODE_EMPH:
347
+ if (entering) {
348
+ cmark_strbuf_puts(html, "<em>");
349
+ } else {
350
+ cmark_strbuf_puts(html, "</em>");
351
+ }
352
+ break;
353
+
354
+ case CMARK_NODE_LINK:
355
+ if (entering) {
356
+ cmark_strbuf_puts(html, "<a href=\"");
357
+ if ((options & CMARK_OPT_UNSAFE) ||
358
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
359
+ houdini_escape_href(html, node->as.link.url.data,
360
+ node->as.link.url.len);
361
+ }
362
+ if (node->as.link.title.len) {
363
+ cmark_strbuf_puts(html, "\" title=\"");
364
+ escape_html(html, node->as.link.title.data, node->as.link.title.len);
365
+ }
366
+ cmark_strbuf_puts(html, "\">");
367
+ } else {
368
+ cmark_strbuf_puts(html, "</a>");
369
+ }
370
+ break;
371
+
372
+ case CMARK_NODE_IMAGE:
373
+ if (entering) {
374
+ cmark_strbuf_puts(html, "<img src=\"");
375
+ if ((options & CMARK_OPT_UNSAFE) ||
376
+ !(scan_dangerous_url(&node->as.link.url, 0))) {
377
+ houdini_escape_href(html, node->as.link.url.data,
378
+ node->as.link.url.len);
379
+ }
380
+ cmark_strbuf_puts(html, "\" alt=\"");
381
+ renderer->plain = node;
382
+ } else {
383
+ if (node->as.link.title.len) {
384
+ cmark_strbuf_puts(html, "\" title=\"");
385
+ escape_html(html, node->as.link.title.data, node->as.link.title.len);
386
+ }
387
+
388
+ cmark_strbuf_puts(html, "\" />");
389
+ }
390
+ break;
391
+
392
+ case CMARK_NODE_FOOTNOTE_DEFINITION:
393
+ if (entering) {
394
+ if (renderer->footnote_ix == 0) {
395
+ cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
396
+ }
397
+ ++renderer->footnote_ix;
398
+ cmark_strbuf_puts(html, "<li id=\"fn");
399
+ char n[32];
400
+ snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
401
+ cmark_strbuf_puts(html, n);
402
+ cmark_strbuf_puts(html, "\">\n");
403
+ } else {
404
+ if (S_put_footnote_backref(renderer, html)) {
405
+ cmark_strbuf_putc(html, '\n');
406
+ }
407
+ cmark_strbuf_puts(html, "</li>\n");
408
+ }
409
+ break;
410
+
411
+ case CMARK_NODE_FOOTNOTE_REFERENCE:
412
+ if (entering) {
413
+ cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
414
+ cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
415
+ cmark_strbuf_puts(html, "\" id=\"fnref");
416
+ cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
417
+ cmark_strbuf_puts(html, "\">");
418
+ cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
419
+ cmark_strbuf_puts(html, "</a></sup>");
420
+ }
421
+ break;
422
+
423
+ default:
424
+ assert(false);
425
+ break;
426
+ }
427
+
428
+ return 1;
429
+ }
430
+
431
+ char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) {
432
+ return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root));
433
+ }
434
+
435
+ char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) {
436
+ char *result;
437
+ cmark_strbuf html = CMARK_BUF_INIT(mem);
438
+ cmark_event_type ev_type;
439
+ cmark_node *cur;
440
+ cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL};
441
+ cmark_iter *iter = cmark_iter_new(root);
442
+
443
+ for (; extensions; extensions = extensions->next)
444
+ if (((cmark_syntax_extension *) extensions->data)->html_filter_func)
445
+ renderer.filter_extensions = cmark_llist_append(
446
+ mem,
447
+ renderer.filter_extensions,
448
+ (cmark_syntax_extension *) extensions->data);
449
+
450
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
451
+ cur = cmark_iter_get_node(iter);
452
+ S_render_node(&renderer, cur, ev_type, options);
453
+ }
454
+
455
+ if (renderer.footnote_ix) {
456
+ cmark_strbuf_puts(&html, "</ol>\n</section>\n");
457
+ }
458
+
459
+ result = (char *)cmark_strbuf_detach(&html);
460
+
461
+ cmark_llist_free(mem, renderer.filter_extensions);
462
+
463
+ cmark_iter_free(iter);
464
+ return result;
465
+ }