markly 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/bin/markly +94 -0
  3. data/ext/markly/arena.c +103 -0
  4. data/ext/markly/autolink.c +425 -0
  5. data/ext/markly/autolink.h +8 -0
  6. data/ext/markly/blocks.c +1585 -0
  7. data/ext/markly/buffer.c +278 -0
  8. data/ext/markly/buffer.h +116 -0
  9. data/ext/markly/case_fold_switch.inc +4327 -0
  10. data/ext/markly/chunk.h +135 -0
  11. data/ext/markly/cmark-gfm-core-extensions.h +54 -0
  12. data/ext/markly/cmark-gfm-extension_api.h +736 -0
  13. data/ext/markly/cmark-gfm-extensions_export.h +42 -0
  14. data/ext/markly/cmark-gfm.h +817 -0
  15. data/ext/markly/cmark-gfm_export.h +42 -0
  16. data/ext/markly/cmark-gfm_version.h +7 -0
  17. data/ext/markly/cmark.c +55 -0
  18. data/ext/markly/cmark_ctype.c +44 -0
  19. data/ext/markly/cmark_ctype.h +33 -0
  20. data/ext/markly/commonmark.c +519 -0
  21. data/ext/markly/config.h +76 -0
  22. data/ext/markly/core-extensions.c +27 -0
  23. data/ext/markly/entities.inc +2138 -0
  24. data/ext/markly/ext_scanners.c +1159 -0
  25. data/ext/markly/ext_scanners.h +24 -0
  26. data/ext/markly/extconf.rb +7 -0
  27. data/ext/markly/footnotes.c +40 -0
  28. data/ext/markly/footnotes.h +25 -0
  29. data/ext/markly/houdini.h +57 -0
  30. data/ext/markly/houdini_href_e.c +100 -0
  31. data/ext/markly/houdini_html_e.c +66 -0
  32. data/ext/markly/houdini_html_u.c +149 -0
  33. data/ext/markly/html.c +465 -0
  34. data/ext/markly/html.h +27 -0
  35. data/ext/markly/inlines.c +1633 -0
  36. data/ext/markly/inlines.h +29 -0
  37. data/ext/markly/iterator.c +159 -0
  38. data/ext/markly/iterator.h +26 -0
  39. data/ext/markly/latex.c +466 -0
  40. data/ext/markly/linked_list.c +37 -0
  41. data/ext/markly/man.c +278 -0
  42. data/ext/markly/map.c +122 -0
  43. data/ext/markly/map.h +41 -0
  44. data/ext/markly/markly.c +1226 -0
  45. data/ext/markly/markly.h +16 -0
  46. data/ext/markly/node.c +979 -0
  47. data/ext/markly/node.h +118 -0
  48. data/ext/markly/parser.h +58 -0
  49. data/ext/markly/plaintext.c +235 -0
  50. data/ext/markly/plugin.c +36 -0
  51. data/ext/markly/plugin.h +34 -0
  52. data/ext/markly/references.c +42 -0
  53. data/ext/markly/references.h +26 -0
  54. data/ext/markly/registry.c +63 -0
  55. data/ext/markly/registry.h +24 -0
  56. data/ext/markly/render.c +205 -0
  57. data/ext/markly/render.h +62 -0
  58. data/ext/markly/scanners.c +20382 -0
  59. data/ext/markly/scanners.h +62 -0
  60. data/ext/markly/scanners.re +326 -0
  61. data/ext/markly/strikethrough.c +167 -0
  62. data/ext/markly/strikethrough.h +9 -0
  63. data/ext/markly/syntax_extension.c +149 -0
  64. data/ext/markly/syntax_extension.h +34 -0
  65. data/ext/markly/table.c +803 -0
  66. data/ext/markly/table.h +12 -0
  67. data/ext/markly/tagfilter.c +60 -0
  68. data/ext/markly/tagfilter.h +8 -0
  69. data/ext/markly/tasklist.c +156 -0
  70. data/ext/markly/tasklist.h +8 -0
  71. data/ext/markly/utf8.c +317 -0
  72. data/ext/markly/utf8.h +35 -0
  73. data/ext/markly/xml.c +181 -0
  74. data/lib/markly.rb +43 -0
  75. data/lib/markly/flags.rb +37 -0
  76. data/lib/markly/markly.so +0 -0
  77. data/lib/markly/node.rb +70 -0
  78. data/lib/markly/node/inspect.rb +59 -0
  79. data/lib/markly/renderer.rb +133 -0
  80. data/lib/markly/renderer/html_renderer.rb +252 -0
  81. data/lib/markly/version.rb +5 -0
  82. metadata +211 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3121bb14bcb09883bfb84879b6b69e28d47e3781f0bcd6225737734040ac2ec5
4
+ data.tar.gz: 2f47e7add1ee43c03e7b75b45e5f2c7ec2c322cac842aa5f5bdb8e539fbca8d5
5
+ SHA512:
6
+ metadata.gz: 6da8e6ef6f672489b7bcd4b6e378c86d3240e4b3f9d67aaba26bbe5414a363c4a071be3695bca85bb9658c0213cf1e59d32edb389b5ec8a0aebd9689a8245ff7
7
+ data.tar.gz: ce28eac1fcca2c0dfc6e637ca937e16ffbb211d4d64c596f3869b56860fb04227b20065c86a5d335cb8d11cdd49c1e924a09b0caa0b515e404e123420ed39e7d
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'optparse'
5
+ require 'ostruct'
6
+
7
+ require_relative '../lib/markly'
8
+
9
+ def parse_options
10
+ options = OpenStruct.new
11
+ extensions = Markly.extensions
12
+ parse_flags = Markly::PARSE_FLAGS
13
+ render_flags = Markly::RENDER_FLAGS
14
+
15
+ options.active_extensions = []
16
+ options.active_parse_flags = Markly::DEFAULT
17
+ options.active_render_flags = Markly::DEFAULT
18
+
19
+ option_parser = OptionParser.new do |opts|
20
+ opts.banner = 'Usage: markly [--html-renderer] [--extension=EXTENSION]'
21
+ opts.separator ' [--parse-option=OPTION]'
22
+ opts.separator ' [--render-option=OPTION]'
23
+ opts.separator ' [FILE..]'
24
+ opts.separator ''
25
+ opts.separator 'Convert one or more CommonMark files to HTML and write to standard output.'
26
+ opts.separator 'If no FILE argument is provided, text will be read from STDIN.'
27
+ opts.separator ''
28
+
29
+ opts.on('--extension=EXTENSION', Array, 'Use EXTENSION for parsing and HTML output (unless --html-renderer is specified)') do |values|
30
+ values.each do |value|
31
+ if extensions.include?(value)
32
+ options.active_extensions << value.to_sym
33
+ else
34
+ abort("extension '#{value}' not found")
35
+ end
36
+ end
37
+ end
38
+
39
+ opts.on('-h', '--help', 'Prints this help') do
40
+ puts opts
41
+ puts
42
+ puts "Available extentions: #{extensions.join(', ')}"
43
+ puts "Available parse flags: #{parser_flags.keys.join(', ')}"
44
+ puts "Available render flags: #{render_flags.keys.join(', ')}"
45
+ puts
46
+ puts 'See the README for more information on these.'
47
+ exit
48
+ end
49
+
50
+ opts.on('--html-renderer', 'Use the HtmlRenderer renderer rather than the native C renderer') do
51
+ options.renderer = true
52
+ end
53
+
54
+ opts.on('--parse-option=OPTION', Array, 'OPTION passed during parsing') do |values|
55
+ values.each do |value|active_parser_flags
56
+ if parser_flags.key?(value.to_sym)
57
+ options.active_parser_flags << value.to_sym
58
+ else
59
+ abort("parse-option '#{value}' not found")
60
+ end
61
+ end
62
+ end
63
+
64
+ opts.on('--render-option=OPTION', Array, 'OPTION passed during rendering') do |values|
65
+ values.each do |value|
66
+ if render_flags.key?(value.to_sym)
67
+ options.active_render_flags << value.to_sym
68
+ else
69
+ abort("render-option '#{value}' not found")
70
+ end
71
+ end
72
+ end
73
+
74
+ opts.on('-v', '--version', 'Version information') do
75
+ puts "markly #{Markly::VERSION}"
76
+ exit
77
+ end
78
+ end
79
+
80
+ option_parser.parse!
81
+
82
+ options
83
+ end
84
+
85
+ options = parse_options
86
+
87
+ doc = Markly.parse(ARGF.read, flags: options.active_parse_flags, extensions: options.active_extensions)
88
+
89
+ if options.renderer
90
+ renderer = Markly::HtmlRenderer.new(extensions: options.active_extensions)
91
+ STDOUT.write(renderer.render(doc))
92
+ else
93
+ STDOUT.write(doc.to_html(flags: options.active_render_flags, extensions: options.active_extensions))
94
+ end
@@ -0,0 +1,103 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <stdint.h>
4
+ #include "cmark-gfm.h"
5
+ #include "cmark-gfm-extension_api.h"
6
+
7
+ static struct arena_chunk {
8
+ size_t sz, used;
9
+ uint8_t push_point;
10
+ void *ptr;
11
+ struct arena_chunk *prev;
12
+ } *A = NULL;
13
+
14
+ static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
15
+ struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
16
+ if (!c)
17
+ abort();
18
+ c->sz = sz;
19
+ c->ptr = calloc(1, sz);
20
+ if (!c->ptr)
21
+ abort();
22
+ c->prev = prev;
23
+ return c;
24
+ }
25
+
26
+ void cmark_arena_push(void) {
27
+ if (!A)
28
+ return;
29
+ A->push_point = 1;
30
+ A = alloc_arena_chunk(10240, A);
31
+ }
32
+
33
+ int cmark_arena_pop(void) {
34
+ if (!A)
35
+ return 0;
36
+ while (A && !A->push_point) {
37
+ free(A->ptr);
38
+ struct arena_chunk *n = A->prev;
39
+ free(A);
40
+ A = n;
41
+ }
42
+ if (A)
43
+ A->push_point = 0;
44
+ return 1;
45
+ }
46
+
47
+ static void init_arena(void) {
48
+ A = alloc_arena_chunk(4 * 1048576, NULL);
49
+ }
50
+
51
+ void cmark_arena_reset(void) {
52
+ while (A) {
53
+ free(A->ptr);
54
+ struct arena_chunk *n = A->prev;
55
+ free(A);
56
+ A = n;
57
+ }
58
+ }
59
+
60
+ static void *arena_calloc(size_t nmem, size_t size) {
61
+ if (!A)
62
+ init_arena();
63
+
64
+ size_t sz = nmem * size + sizeof(size_t);
65
+
66
+ // Round allocation sizes to largest integer size to
67
+ // ensure returned memory is correctly aligned
68
+ const size_t align = sizeof(size_t) - 1;
69
+ sz = (sz + align) & ~align;
70
+
71
+ if (sz > A->sz) {
72
+ A->prev = alloc_arena_chunk(sz, A->prev);
73
+ return (uint8_t *) A->prev->ptr + sizeof(size_t);
74
+ }
75
+ if (sz > A->sz - A->used) {
76
+ A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77
+ }
78
+ void *ptr = (uint8_t *) A->ptr + A->used;
79
+ A->used += sz;
80
+ *((size_t *) ptr) = sz - sizeof(size_t);
81
+ return (uint8_t *) ptr + sizeof(size_t);
82
+ }
83
+
84
+ static void *arena_realloc(void *ptr, size_t size) {
85
+ if (!A)
86
+ init_arena();
87
+
88
+ void *new_ptr = arena_calloc(1, size);
89
+ if (ptr)
90
+ memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
91
+ return new_ptr;
92
+ }
93
+
94
+ static void arena_free(void *ptr) {
95
+ (void) ptr;
96
+ /* no-op */
97
+ }
98
+
99
+ cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100
+
101
+ cmark_mem *cmark_get_arena_mem_allocator() {
102
+ return &CMARK_ARENA_MEM_ALLOCATOR;
103
+ }
@@ -0,0 +1,425 @@
1
+ #include "autolink.h"
2
+ #include <parser.h>
3
+ #include <string.h>
4
+ #include <utf8.h>
5
+
6
+ #if defined(_WIN32)
7
+ #define strncasecmp _strnicmp
8
+ #else
9
+ #include <strings.h>
10
+ #endif
11
+
12
+ static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
13
+ int32_t ch;
14
+ int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
15
+ if (r < 0)
16
+ return 0;
17
+ return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
18
+ }
19
+
20
+ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
21
+ static const size_t valid_uris_count = 3;
22
+ static const char *valid_uris[] = {"http://", "https://", "ftp://"};
23
+
24
+ size_t i;
25
+
26
+ for (i = 0; i < valid_uris_count; ++i) {
27
+ size_t len = strlen(valid_uris[i]);
28
+
29
+ if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
30
+ is_valid_hostchar(link + len, link_len - len))
31
+ return 1;
32
+ }
33
+
34
+ return 0;
35
+ }
36
+
37
+ static size_t autolink_delim(uint8_t *data, size_t link_end) {
38
+ uint8_t cclose, copen;
39
+ size_t i;
40
+
41
+ for (i = 0; i < link_end; ++i)
42
+ if (data[i] == '<') {
43
+ link_end = i;
44
+ break;
45
+ }
46
+
47
+ while (link_end > 0) {
48
+ cclose = data[link_end - 1];
49
+
50
+ switch (cclose) {
51
+ case ')':
52
+ copen = '(';
53
+ break;
54
+ default:
55
+ copen = 0;
56
+ }
57
+
58
+ if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
59
+ link_end--;
60
+
61
+ else if (data[link_end - 1] == ';') {
62
+ size_t new_end = link_end - 2;
63
+
64
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
65
+ new_end--;
66
+
67
+ if (new_end < link_end - 2 && data[new_end] == '&')
68
+ link_end = new_end;
69
+ else
70
+ link_end--;
71
+ } else if (copen != 0) {
72
+ size_t closing = 0;
73
+ size_t opening = 0;
74
+ i = 0;
75
+
76
+ /* Allow any number of matching brackets (as recognised in copen/cclose)
77
+ * at the end of the URL. If there is a greater number of closing
78
+ * brackets than opening ones, we remove one character from the end of
79
+ * the link.
80
+ *
81
+ * Examples (input text => output linked portion):
82
+ *
83
+ * http://www.pokemon.com/Pikachu_(Electric)
84
+ * => http://www.pokemon.com/Pikachu_(Electric)
85
+ *
86
+ * http://www.pokemon.com/Pikachu_((Electric)
87
+ * => http://www.pokemon.com/Pikachu_((Electric)
88
+ *
89
+ * http://www.pokemon.com/Pikachu_(Electric))
90
+ * => http://www.pokemon.com/Pikachu_(Electric)
91
+ *
92
+ * http://www.pokemon.com/Pikachu_((Electric))
93
+ * => http://www.pokemon.com/Pikachu_((Electric))
94
+ */
95
+
96
+ while (i < link_end) {
97
+ if (data[i] == copen)
98
+ opening++;
99
+ else if (data[i] == cclose)
100
+ closing++;
101
+
102
+ i++;
103
+ }
104
+
105
+ if (closing <= opening)
106
+ break;
107
+
108
+ link_end--;
109
+ } else
110
+ break;
111
+ }
112
+
113
+ return link_end;
114
+ }
115
+
116
+ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
117
+ size_t i, np = 0, uscore1 = 0, uscore2 = 0;
118
+
119
+ for (i = 1; i < size - 1; i++) {
120
+ if (data[i] == '_')
121
+ uscore2++;
122
+ else if (data[i] == '.') {
123
+ uscore1 = uscore2;
124
+ uscore2 = 0;
125
+ np++;
126
+ } else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
127
+ break;
128
+ }
129
+
130
+ if (uscore1 > 0 || uscore2 > 0)
131
+ return 0;
132
+
133
+ if (allow_short) {
134
+ /* We don't need a valid domain in the strict sense (with
135
+ * least one dot; so just make sure it's composed of valid
136
+ * domain characters and return the length of the the valid
137
+ * sequence. */
138
+ return i;
139
+ } else {
140
+ /* a valid domain needs to have at least a dot.
141
+ * that's as far as we get */
142
+ return np ? i : 0;
143
+ }
144
+ }
145
+
146
+ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
147
+ cmark_inline_parser *inline_parser) {
148
+ cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
149
+ size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
150
+ uint8_t *data = chunk->data + max_rewind;
151
+ size_t size = chunk->len - max_rewind;
152
+ int start = cmark_inline_parser_get_column(inline_parser);
153
+
154
+ size_t link_end;
155
+
156
+ if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
157
+ !cmark_isspace(data[-1]))
158
+ return 0;
159
+
160
+ if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
161
+ return 0;
162
+
163
+ link_end = check_domain(data, size, 0);
164
+
165
+ if (link_end == 0)
166
+ return NULL;
167
+
168
+ while (link_end < size && !cmark_isspace(data[link_end]))
169
+ link_end++;
170
+
171
+ link_end = autolink_delim(data, link_end);
172
+
173
+ if (link_end == 0)
174
+ return NULL;
175
+
176
+ cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
177
+
178
+ cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
179
+
180
+ cmark_strbuf buf;
181
+ cmark_strbuf_init(parser->mem, &buf, 10);
182
+ cmark_strbuf_puts(&buf, "http://");
183
+ cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
184
+ node->as.link.url = cmark_chunk_buf_detach(&buf);
185
+
186
+ cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
187
+ text->as.literal =
188
+ cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
189
+ cmark_node_append_child(node, text);
190
+
191
+ node->start_line = text->start_line =
192
+ node->end_line = text->end_line =
193
+ cmark_inline_parser_get_line(inline_parser);
194
+
195
+ node->start_column = text->start_column = start - 1;
196
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
197
+
198
+ return node;
199
+ }
200
+
201
+ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
202
+ cmark_inline_parser *inline_parser) {
203
+ size_t link_end, domain_len;
204
+ int rewind = 0;
205
+
206
+ cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
207
+ int max_rewind = cmark_inline_parser_get_offset(inline_parser);
208
+ uint8_t *data = chunk->data + max_rewind;
209
+ size_t size = chunk->len - max_rewind;
210
+
211
+ if (size < 4 || data[1] != '/' || data[2] != '/')
212
+ return 0;
213
+
214
+ while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
215
+ rewind++;
216
+
217
+ if (!sd_autolink_issafe(data - rewind, size + rewind))
218
+ return 0;
219
+
220
+ link_end = strlen("://");
221
+
222
+ domain_len = check_domain(data + link_end, size - link_end, 1);
223
+
224
+ if (domain_len == 0)
225
+ return 0;
226
+
227
+ link_end += domain_len;
228
+ while (link_end < size && !cmark_isspace(data[link_end]))
229
+ link_end++;
230
+
231
+ link_end = autolink_delim(data, link_end);
232
+
233
+ if (link_end == 0)
234
+ return NULL;
235
+
236
+ cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
237
+ cmark_node_unput(parent, rewind);
238
+
239
+ cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
240
+
241
+ cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
242
+ (bufsize_t)(link_end + rewind));
243
+ node->as.link.url = url;
244
+
245
+ cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
246
+ text->as.literal = url;
247
+ cmark_node_append_child(node, text);
248
+
249
+ return node;
250
+ }
251
+
252
+ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
253
+ cmark_node *parent, unsigned char c,
254
+ cmark_inline_parser *inline_parser) {
255
+ if (cmark_inline_parser_in_bracket(inline_parser, false) ||
256
+ cmark_inline_parser_in_bracket(inline_parser, true))
257
+ return NULL;
258
+
259
+ if (c == ':')
260
+ return url_match(parser, parent, inline_parser);
261
+
262
+ if (c == 'w')
263
+ return www_match(parser, parent, inline_parser);
264
+
265
+ return NULL;
266
+
267
+ // note that we could end up re-consuming something already a
268
+ // part of an inline, because we don't track when the last
269
+ // inline was finished in inlines.c.
270
+ }
271
+
272
+ static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
273
+ // postprocess_text can recurse very deeply if there is a very long line of
274
+ // '@' only. Stop at a reasonable depth to ensure it cannot crash.
275
+ if (depth > 1000) return;
276
+
277
+ size_t link_end;
278
+ uint8_t *data = text->as.literal.data,
279
+ *at;
280
+ size_t size = text->as.literal.len;
281
+ int rewind, max_rewind,
282
+ nb = 0, np = 0, ns = 0;
283
+
284
+ if (offset < 0 || (size_t)offset >= size)
285
+ return;
286
+
287
+ data += offset;
288
+ size -= offset;
289
+
290
+ at = (uint8_t *)memchr(data, '@', size);
291
+ if (!at)
292
+ return;
293
+
294
+ max_rewind = (int)(at - data);
295
+ data += max_rewind;
296
+ size -= max_rewind;
297
+
298
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
299
+ uint8_t c = data[-rewind - 1];
300
+
301
+ if (cmark_isalnum(c))
302
+ continue;
303
+
304
+ if (strchr(".+-_", c) != NULL)
305
+ continue;
306
+
307
+ if (c == '/')
308
+ ns++;
309
+
310
+ break;
311
+ }
312
+
313
+ if (rewind == 0 || ns > 0) {
314
+ postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
315
+ return;
316
+ }
317
+
318
+ for (link_end = 0; link_end < size; ++link_end) {
319
+ uint8_t c = data[link_end];
320
+
321
+ if (cmark_isalnum(c))
322
+ continue;
323
+
324
+ if (c == '@')
325
+ nb++;
326
+ else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
327
+ np++;
328
+ else if (c != '-' && c != '_')
329
+ break;
330
+ }
331
+
332
+ if (link_end < 2 || nb != 1 || np == 0 ||
333
+ (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
334
+ postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
335
+ return;
336
+ }
337
+
338
+ link_end = autolink_delim(data, link_end);
339
+
340
+ if (link_end == 0) {
341
+ postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
342
+ return;
343
+ }
344
+
345
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
346
+
347
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
348
+ cmark_strbuf buf;
349
+ cmark_strbuf_init(parser->mem, &buf, 10);
350
+ cmark_strbuf_puts(&buf, "mailto:");
351
+ cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
352
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
353
+
354
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
355
+ cmark_chunk email = cmark_chunk_dup(
356
+ &text->as.literal,
357
+ offset + max_rewind - rewind,
358
+ (bufsize_t)(link_end + rewind));
359
+ cmark_chunk_to_cstr(parser->mem, &email);
360
+ link_text->as.literal = email;
361
+ cmark_node_append_child(link_node, link_text);
362
+
363
+ cmark_node_insert_after(text, link_node);
364
+
365
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
366
+ post->as.literal = cmark_chunk_dup(&text->as.literal,
367
+ (bufsize_t)(offset + max_rewind + link_end),
368
+ (bufsize_t)(size - link_end));
369
+ cmark_chunk_to_cstr(parser->mem, &post->as.literal);
370
+
371
+ cmark_node_insert_after(link_node, post);
372
+
373
+ text->as.literal.len = offset + max_rewind - rewind;
374
+ text->as.literal.data[text->as.literal.len] = 0;
375
+
376
+ postprocess_text(parser, post, 0, depth + 1);
377
+ }
378
+
379
+ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
380
+ cmark_iter *iter;
381
+ cmark_event_type ev;
382
+ cmark_node *node;
383
+ bool in_link = false;
384
+
385
+ cmark_consolidate_text_nodes(root);
386
+ iter = cmark_iter_new(root);
387
+
388
+ while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
389
+ node = cmark_iter_get_node(iter);
390
+ if (in_link) {
391
+ if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
392
+ in_link = false;
393
+ }
394
+ continue;
395
+ }
396
+
397
+ if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
398
+ in_link = true;
399
+ continue;
400
+ }
401
+
402
+ if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
403
+ postprocess_text(parser, node, 0, /*depth*/0);
404
+ }
405
+ }
406
+
407
+ cmark_iter_free(iter);
408
+
409
+ return root;
410
+ }
411
+
412
+ cmark_syntax_extension *create_autolink_extension(void) {
413
+ cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
414
+ cmark_llist *special_chars = NULL;
415
+
416
+ cmark_syntax_extension_set_match_inline_func(ext, match);
417
+ cmark_syntax_extension_set_postprocess_func(ext, postprocess);
418
+
419
+ cmark_mem *mem = cmark_get_default_mem_allocator();
420
+ special_chars = cmark_llist_append(mem, special_chars, (void *)':');
421
+ special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
422
+ cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
423
+
424
+ return ext;
425
+ }