markly 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/markly +94 -0
- data/ext/markly/arena.c +103 -0
- data/ext/markly/autolink.c +425 -0
- data/ext/markly/autolink.h +8 -0
- data/ext/markly/blocks.c +1585 -0
- data/ext/markly/buffer.c +278 -0
- data/ext/markly/buffer.h +116 -0
- data/ext/markly/case_fold_switch.inc +4327 -0
- data/ext/markly/chunk.h +135 -0
- data/ext/markly/cmark-gfm-core-extensions.h +54 -0
- data/ext/markly/cmark-gfm-extension_api.h +736 -0
- data/ext/markly/cmark-gfm-extensions_export.h +42 -0
- data/ext/markly/cmark-gfm.h +817 -0
- data/ext/markly/cmark-gfm_export.h +42 -0
- data/ext/markly/cmark-gfm_version.h +7 -0
- data/ext/markly/cmark.c +55 -0
- data/ext/markly/cmark_ctype.c +44 -0
- data/ext/markly/cmark_ctype.h +33 -0
- data/ext/markly/commonmark.c +519 -0
- data/ext/markly/config.h +76 -0
- data/ext/markly/core-extensions.c +27 -0
- data/ext/markly/entities.inc +2138 -0
- data/ext/markly/ext_scanners.c +1159 -0
- data/ext/markly/ext_scanners.h +24 -0
- data/ext/markly/extconf.rb +7 -0
- data/ext/markly/footnotes.c +40 -0
- data/ext/markly/footnotes.h +25 -0
- data/ext/markly/houdini.h +57 -0
- data/ext/markly/houdini_href_e.c +100 -0
- data/ext/markly/houdini_html_e.c +66 -0
- data/ext/markly/houdini_html_u.c +149 -0
- data/ext/markly/html.c +465 -0
- data/ext/markly/html.h +27 -0
- data/ext/markly/inlines.c +1633 -0
- data/ext/markly/inlines.h +29 -0
- data/ext/markly/iterator.c +159 -0
- data/ext/markly/iterator.h +26 -0
- data/ext/markly/latex.c +466 -0
- data/ext/markly/linked_list.c +37 -0
- data/ext/markly/man.c +278 -0
- data/ext/markly/map.c +122 -0
- data/ext/markly/map.h +41 -0
- data/ext/markly/markly.c +1226 -0
- data/ext/markly/markly.h +16 -0
- data/ext/markly/node.c +979 -0
- data/ext/markly/node.h +118 -0
- data/ext/markly/parser.h +58 -0
- data/ext/markly/plaintext.c +235 -0
- data/ext/markly/plugin.c +36 -0
- data/ext/markly/plugin.h +34 -0
- data/ext/markly/references.c +42 -0
- data/ext/markly/references.h +26 -0
- data/ext/markly/registry.c +63 -0
- data/ext/markly/registry.h +24 -0
- data/ext/markly/render.c +205 -0
- data/ext/markly/render.h +62 -0
- data/ext/markly/scanners.c +20382 -0
- data/ext/markly/scanners.h +62 -0
- data/ext/markly/scanners.re +326 -0
- data/ext/markly/strikethrough.c +167 -0
- data/ext/markly/strikethrough.h +9 -0
- data/ext/markly/syntax_extension.c +149 -0
- data/ext/markly/syntax_extension.h +34 -0
- data/ext/markly/table.c +803 -0
- data/ext/markly/table.h +12 -0
- data/ext/markly/tagfilter.c +60 -0
- data/ext/markly/tagfilter.h +8 -0
- data/ext/markly/tasklist.c +156 -0
- data/ext/markly/tasklist.h +8 -0
- data/ext/markly/utf8.c +317 -0
- data/ext/markly/utf8.h +35 -0
- data/ext/markly/xml.c +181 -0
- data/lib/markly.rb +43 -0
- data/lib/markly/flags.rb +37 -0
- data/lib/markly/markly.so +0 -0
- data/lib/markly/node.rb +70 -0
- data/lib/markly/node/inspect.rb +59 -0
- data/lib/markly/renderer.rb +133 -0
- data/lib/markly/renderer/html_renderer.rb +252 -0
- data/lib/markly/version.rb +5 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3121bb14bcb09883bfb84879b6b69e28d47e3781f0bcd6225737734040ac2ec5
|
4
|
+
data.tar.gz: 2f47e7add1ee43c03e7b75b45e5f2c7ec2c322cac842aa5f5bdb8e539fbca8d5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6da8e6ef6f672489b7bcd4b6e378c86d3240e4b3f9d67aaba26bbe5414a363c4a071be3695bca85bb9658c0213cf1e59d32edb389b5ec8a0aebd9689a8245ff7
|
7
|
+
data.tar.gz: ce28eac1fcca2c0dfc6e637ca937e16ffbb211d4d64c596f3869b56860fb04227b20065c86a5d335cb8d11cdd49c1e924a09b0caa0b515e404e123420ed39e7d
|
data/bin/markly
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
require_relative '../lib/markly'
|
8
|
+
|
9
|
+
def parse_options
|
10
|
+
options = OpenStruct.new
|
11
|
+
extensions = Markly.extensions
|
12
|
+
parse_flags = Markly::PARSE_FLAGS
|
13
|
+
render_flags = Markly::RENDER_FLAGS
|
14
|
+
|
15
|
+
options.active_extensions = []
|
16
|
+
options.active_parse_flags = Markly::DEFAULT
|
17
|
+
options.active_render_flags = Markly::DEFAULT
|
18
|
+
|
19
|
+
option_parser = OptionParser.new do |opts|
|
20
|
+
opts.banner = 'Usage: markly [--html-renderer] [--extension=EXTENSION]'
|
21
|
+
opts.separator ' [--parse-option=OPTION]'
|
22
|
+
opts.separator ' [--render-option=OPTION]'
|
23
|
+
opts.separator ' [FILE..]'
|
24
|
+
opts.separator ''
|
25
|
+
opts.separator 'Convert one or more CommonMark files to HTML and write to standard output.'
|
26
|
+
opts.separator 'If no FILE argument is provided, text will be read from STDIN.'
|
27
|
+
opts.separator ''
|
28
|
+
|
29
|
+
opts.on('--extension=EXTENSION', Array, 'Use EXTENSION for parsing and HTML output (unless --html-renderer is specified)') do |values|
|
30
|
+
values.each do |value|
|
31
|
+
if extensions.include?(value)
|
32
|
+
options.active_extensions << value.to_sym
|
33
|
+
else
|
34
|
+
abort("extension '#{value}' not found")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on('-h', '--help', 'Prints this help') do
|
40
|
+
puts opts
|
41
|
+
puts
|
42
|
+
puts "Available extentions: #{extensions.join(', ')}"
|
43
|
+
puts "Available parse flags: #{parser_flags.keys.join(', ')}"
|
44
|
+
puts "Available render flags: #{render_flags.keys.join(', ')}"
|
45
|
+
puts
|
46
|
+
puts 'See the README for more information on these.'
|
47
|
+
exit
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.on('--html-renderer', 'Use the HtmlRenderer renderer rather than the native C renderer') do
|
51
|
+
options.renderer = true
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on('--parse-option=OPTION', Array, 'OPTION passed during parsing') do |values|
|
55
|
+
values.each do |value|active_parser_flags
|
56
|
+
if parser_flags.key?(value.to_sym)
|
57
|
+
options.active_parser_flags << value.to_sym
|
58
|
+
else
|
59
|
+
abort("parse-option '#{value}' not found")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
opts.on('--render-option=OPTION', Array, 'OPTION passed during rendering') do |values|
|
65
|
+
values.each do |value|
|
66
|
+
if render_flags.key?(value.to_sym)
|
67
|
+
options.active_render_flags << value.to_sym
|
68
|
+
else
|
69
|
+
abort("render-option '#{value}' not found")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on('-v', '--version', 'Version information') do
|
75
|
+
puts "markly #{Markly::VERSION}"
|
76
|
+
exit
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
option_parser.parse!
|
81
|
+
|
82
|
+
options
|
83
|
+
end
|
84
|
+
|
85
|
+
options = parse_options
|
86
|
+
|
87
|
+
doc = Markly.parse(ARGF.read, flags: options.active_parse_flags, extensions: options.active_extensions)
|
88
|
+
|
89
|
+
if options.renderer
|
90
|
+
renderer = Markly::HtmlRenderer.new(extensions: options.active_extensions)
|
91
|
+
STDOUT.write(renderer.render(doc))
|
92
|
+
else
|
93
|
+
STDOUT.write(doc.to_html(flags: options.active_render_flags, extensions: options.active_extensions))
|
94
|
+
end
|
data/ext/markly/arena.c
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
#include "cmark-gfm.h"
|
5
|
+
#include "cmark-gfm-extension_api.h"
|
6
|
+
|
7
|
+
static struct arena_chunk {
|
8
|
+
size_t sz, used;
|
9
|
+
uint8_t push_point;
|
10
|
+
void *ptr;
|
11
|
+
struct arena_chunk *prev;
|
12
|
+
} *A = NULL;
|
13
|
+
|
14
|
+
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
|
15
|
+
struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
|
16
|
+
if (!c)
|
17
|
+
abort();
|
18
|
+
c->sz = sz;
|
19
|
+
c->ptr = calloc(1, sz);
|
20
|
+
if (!c->ptr)
|
21
|
+
abort();
|
22
|
+
c->prev = prev;
|
23
|
+
return c;
|
24
|
+
}
|
25
|
+
|
26
|
+
void cmark_arena_push(void) {
|
27
|
+
if (!A)
|
28
|
+
return;
|
29
|
+
A->push_point = 1;
|
30
|
+
A = alloc_arena_chunk(10240, A);
|
31
|
+
}
|
32
|
+
|
33
|
+
int cmark_arena_pop(void) {
|
34
|
+
if (!A)
|
35
|
+
return 0;
|
36
|
+
while (A && !A->push_point) {
|
37
|
+
free(A->ptr);
|
38
|
+
struct arena_chunk *n = A->prev;
|
39
|
+
free(A);
|
40
|
+
A = n;
|
41
|
+
}
|
42
|
+
if (A)
|
43
|
+
A->push_point = 0;
|
44
|
+
return 1;
|
45
|
+
}
|
46
|
+
|
47
|
+
static void init_arena(void) {
|
48
|
+
A = alloc_arena_chunk(4 * 1048576, NULL);
|
49
|
+
}
|
50
|
+
|
51
|
+
void cmark_arena_reset(void) {
|
52
|
+
while (A) {
|
53
|
+
free(A->ptr);
|
54
|
+
struct arena_chunk *n = A->prev;
|
55
|
+
free(A);
|
56
|
+
A = n;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
static void *arena_calloc(size_t nmem, size_t size) {
|
61
|
+
if (!A)
|
62
|
+
init_arena();
|
63
|
+
|
64
|
+
size_t sz = nmem * size + sizeof(size_t);
|
65
|
+
|
66
|
+
// Round allocation sizes to largest integer size to
|
67
|
+
// ensure returned memory is correctly aligned
|
68
|
+
const size_t align = sizeof(size_t) - 1;
|
69
|
+
sz = (sz + align) & ~align;
|
70
|
+
|
71
|
+
if (sz > A->sz) {
|
72
|
+
A->prev = alloc_arena_chunk(sz, A->prev);
|
73
|
+
return (uint8_t *) A->prev->ptr + sizeof(size_t);
|
74
|
+
}
|
75
|
+
if (sz > A->sz - A->used) {
|
76
|
+
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
|
77
|
+
}
|
78
|
+
void *ptr = (uint8_t *) A->ptr + A->used;
|
79
|
+
A->used += sz;
|
80
|
+
*((size_t *) ptr) = sz - sizeof(size_t);
|
81
|
+
return (uint8_t *) ptr + sizeof(size_t);
|
82
|
+
}
|
83
|
+
|
84
|
+
static void *arena_realloc(void *ptr, size_t size) {
|
85
|
+
if (!A)
|
86
|
+
init_arena();
|
87
|
+
|
88
|
+
void *new_ptr = arena_calloc(1, size);
|
89
|
+
if (ptr)
|
90
|
+
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
|
91
|
+
return new_ptr;
|
92
|
+
}
|
93
|
+
|
94
|
+
static void arena_free(void *ptr) {
|
95
|
+
(void) ptr;
|
96
|
+
/* no-op */
|
97
|
+
}
|
98
|
+
|
99
|
+
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
|
100
|
+
|
101
|
+
cmark_mem *cmark_get_arena_mem_allocator() {
|
102
|
+
return &CMARK_ARENA_MEM_ALLOCATOR;
|
103
|
+
}
|
@@ -0,0 +1,425 @@
|
|
1
|
+
#include "autolink.h"
|
2
|
+
#include <parser.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <utf8.h>
|
5
|
+
|
6
|
+
#if defined(_WIN32)
|
7
|
+
#define strncasecmp _strnicmp
|
8
|
+
#else
|
9
|
+
#include <strings.h>
|
10
|
+
#endif
|
11
|
+
|
12
|
+
static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
|
13
|
+
int32_t ch;
|
14
|
+
int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
|
15
|
+
if (r < 0)
|
16
|
+
return 0;
|
17
|
+
return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
|
18
|
+
}
|
19
|
+
|
20
|
+
static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
|
21
|
+
static const size_t valid_uris_count = 3;
|
22
|
+
static const char *valid_uris[] = {"http://", "https://", "ftp://"};
|
23
|
+
|
24
|
+
size_t i;
|
25
|
+
|
26
|
+
for (i = 0; i < valid_uris_count; ++i) {
|
27
|
+
size_t len = strlen(valid_uris[i]);
|
28
|
+
|
29
|
+
if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
30
|
+
is_valid_hostchar(link + len, link_len - len))
|
31
|
+
return 1;
|
32
|
+
}
|
33
|
+
|
34
|
+
return 0;
|
35
|
+
}
|
36
|
+
|
37
|
+
static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
38
|
+
uint8_t cclose, copen;
|
39
|
+
size_t i;
|
40
|
+
|
41
|
+
for (i = 0; i < link_end; ++i)
|
42
|
+
if (data[i] == '<') {
|
43
|
+
link_end = i;
|
44
|
+
break;
|
45
|
+
}
|
46
|
+
|
47
|
+
while (link_end > 0) {
|
48
|
+
cclose = data[link_end - 1];
|
49
|
+
|
50
|
+
switch (cclose) {
|
51
|
+
case ')':
|
52
|
+
copen = '(';
|
53
|
+
break;
|
54
|
+
default:
|
55
|
+
copen = 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
|
59
|
+
link_end--;
|
60
|
+
|
61
|
+
else if (data[link_end - 1] == ';') {
|
62
|
+
size_t new_end = link_end - 2;
|
63
|
+
|
64
|
+
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
65
|
+
new_end--;
|
66
|
+
|
67
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
68
|
+
link_end = new_end;
|
69
|
+
else
|
70
|
+
link_end--;
|
71
|
+
} else if (copen != 0) {
|
72
|
+
size_t closing = 0;
|
73
|
+
size_t opening = 0;
|
74
|
+
i = 0;
|
75
|
+
|
76
|
+
/* Allow any number of matching brackets (as recognised in copen/cclose)
|
77
|
+
* at the end of the URL. If there is a greater number of closing
|
78
|
+
* brackets than opening ones, we remove one character from the end of
|
79
|
+
* the link.
|
80
|
+
*
|
81
|
+
* Examples (input text => output linked portion):
|
82
|
+
*
|
83
|
+
* http://www.pokemon.com/Pikachu_(Electric)
|
84
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
85
|
+
*
|
86
|
+
* http://www.pokemon.com/Pikachu_((Electric)
|
87
|
+
* => http://www.pokemon.com/Pikachu_((Electric)
|
88
|
+
*
|
89
|
+
* http://www.pokemon.com/Pikachu_(Electric))
|
90
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
91
|
+
*
|
92
|
+
* http://www.pokemon.com/Pikachu_((Electric))
|
93
|
+
* => http://www.pokemon.com/Pikachu_((Electric))
|
94
|
+
*/
|
95
|
+
|
96
|
+
while (i < link_end) {
|
97
|
+
if (data[i] == copen)
|
98
|
+
opening++;
|
99
|
+
else if (data[i] == cclose)
|
100
|
+
closing++;
|
101
|
+
|
102
|
+
i++;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (closing <= opening)
|
106
|
+
break;
|
107
|
+
|
108
|
+
link_end--;
|
109
|
+
} else
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
|
113
|
+
return link_end;
|
114
|
+
}
|
115
|
+
|
116
|
+
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
117
|
+
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
|
118
|
+
|
119
|
+
for (i = 1; i < size - 1; i++) {
|
120
|
+
if (data[i] == '_')
|
121
|
+
uscore2++;
|
122
|
+
else if (data[i] == '.') {
|
123
|
+
uscore1 = uscore2;
|
124
|
+
uscore2 = 0;
|
125
|
+
np++;
|
126
|
+
} else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
|
127
|
+
break;
|
128
|
+
}
|
129
|
+
|
130
|
+
if (uscore1 > 0 || uscore2 > 0)
|
131
|
+
return 0;
|
132
|
+
|
133
|
+
if (allow_short) {
|
134
|
+
/* We don't need a valid domain in the strict sense (with
|
135
|
+
* least one dot; so just make sure it's composed of valid
|
136
|
+
* domain characters and return the length of the the valid
|
137
|
+
* sequence. */
|
138
|
+
return i;
|
139
|
+
} else {
|
140
|
+
/* a valid domain needs to have at least a dot.
|
141
|
+
* that's as far as we get */
|
142
|
+
return np ? i : 0;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
147
|
+
cmark_inline_parser *inline_parser) {
|
148
|
+
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
149
|
+
size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
150
|
+
uint8_t *data = chunk->data + max_rewind;
|
151
|
+
size_t size = chunk->len - max_rewind;
|
152
|
+
int start = cmark_inline_parser_get_column(inline_parser);
|
153
|
+
|
154
|
+
size_t link_end;
|
155
|
+
|
156
|
+
if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
|
157
|
+
!cmark_isspace(data[-1]))
|
158
|
+
return 0;
|
159
|
+
|
160
|
+
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
161
|
+
return 0;
|
162
|
+
|
163
|
+
link_end = check_domain(data, size, 0);
|
164
|
+
|
165
|
+
if (link_end == 0)
|
166
|
+
return NULL;
|
167
|
+
|
168
|
+
while (link_end < size && !cmark_isspace(data[link_end]))
|
169
|
+
link_end++;
|
170
|
+
|
171
|
+
link_end = autolink_delim(data, link_end);
|
172
|
+
|
173
|
+
if (link_end == 0)
|
174
|
+
return NULL;
|
175
|
+
|
176
|
+
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
177
|
+
|
178
|
+
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
179
|
+
|
180
|
+
cmark_strbuf buf;
|
181
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
182
|
+
cmark_strbuf_puts(&buf, "http://");
|
183
|
+
cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
|
184
|
+
node->as.link.url = cmark_chunk_buf_detach(&buf);
|
185
|
+
|
186
|
+
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
187
|
+
text->as.literal =
|
188
|
+
cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
|
189
|
+
cmark_node_append_child(node, text);
|
190
|
+
|
191
|
+
node->start_line = text->start_line =
|
192
|
+
node->end_line = text->end_line =
|
193
|
+
cmark_inline_parser_get_line(inline_parser);
|
194
|
+
|
195
|
+
node->start_column = text->start_column = start - 1;
|
196
|
+
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
197
|
+
|
198
|
+
return node;
|
199
|
+
}
|
200
|
+
|
201
|
+
static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
202
|
+
cmark_inline_parser *inline_parser) {
|
203
|
+
size_t link_end, domain_len;
|
204
|
+
int rewind = 0;
|
205
|
+
|
206
|
+
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
207
|
+
int max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
208
|
+
uint8_t *data = chunk->data + max_rewind;
|
209
|
+
size_t size = chunk->len - max_rewind;
|
210
|
+
|
211
|
+
if (size < 4 || data[1] != '/' || data[2] != '/')
|
212
|
+
return 0;
|
213
|
+
|
214
|
+
while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
|
215
|
+
rewind++;
|
216
|
+
|
217
|
+
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
218
|
+
return 0;
|
219
|
+
|
220
|
+
link_end = strlen("://");
|
221
|
+
|
222
|
+
domain_len = check_domain(data + link_end, size - link_end, 1);
|
223
|
+
|
224
|
+
if (domain_len == 0)
|
225
|
+
return 0;
|
226
|
+
|
227
|
+
link_end += domain_len;
|
228
|
+
while (link_end < size && !cmark_isspace(data[link_end]))
|
229
|
+
link_end++;
|
230
|
+
|
231
|
+
link_end = autolink_delim(data, link_end);
|
232
|
+
|
233
|
+
if (link_end == 0)
|
234
|
+
return NULL;
|
235
|
+
|
236
|
+
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
237
|
+
cmark_node_unput(parent, rewind);
|
238
|
+
|
239
|
+
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
240
|
+
|
241
|
+
cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
|
242
|
+
(bufsize_t)(link_end + rewind));
|
243
|
+
node->as.link.url = url;
|
244
|
+
|
245
|
+
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
246
|
+
text->as.literal = url;
|
247
|
+
cmark_node_append_child(node, text);
|
248
|
+
|
249
|
+
return node;
|
250
|
+
}
|
251
|
+
|
252
|
+
static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
|
253
|
+
cmark_node *parent, unsigned char c,
|
254
|
+
cmark_inline_parser *inline_parser) {
|
255
|
+
if (cmark_inline_parser_in_bracket(inline_parser, false) ||
|
256
|
+
cmark_inline_parser_in_bracket(inline_parser, true))
|
257
|
+
return NULL;
|
258
|
+
|
259
|
+
if (c == ':')
|
260
|
+
return url_match(parser, parent, inline_parser);
|
261
|
+
|
262
|
+
if (c == 'w')
|
263
|
+
return www_match(parser, parent, inline_parser);
|
264
|
+
|
265
|
+
return NULL;
|
266
|
+
|
267
|
+
// note that we could end up re-consuming something already a
|
268
|
+
// part of an inline, because we don't track when the last
|
269
|
+
// inline was finished in inlines.c.
|
270
|
+
}
|
271
|
+
|
272
|
+
static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
|
273
|
+
// postprocess_text can recurse very deeply if there is a very long line of
|
274
|
+
// '@' only. Stop at a reasonable depth to ensure it cannot crash.
|
275
|
+
if (depth > 1000) return;
|
276
|
+
|
277
|
+
size_t link_end;
|
278
|
+
uint8_t *data = text->as.literal.data,
|
279
|
+
*at;
|
280
|
+
size_t size = text->as.literal.len;
|
281
|
+
int rewind, max_rewind,
|
282
|
+
nb = 0, np = 0, ns = 0;
|
283
|
+
|
284
|
+
if (offset < 0 || (size_t)offset >= size)
|
285
|
+
return;
|
286
|
+
|
287
|
+
data += offset;
|
288
|
+
size -= offset;
|
289
|
+
|
290
|
+
at = (uint8_t *)memchr(data, '@', size);
|
291
|
+
if (!at)
|
292
|
+
return;
|
293
|
+
|
294
|
+
max_rewind = (int)(at - data);
|
295
|
+
data += max_rewind;
|
296
|
+
size -= max_rewind;
|
297
|
+
|
298
|
+
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
299
|
+
uint8_t c = data[-rewind - 1];
|
300
|
+
|
301
|
+
if (cmark_isalnum(c))
|
302
|
+
continue;
|
303
|
+
|
304
|
+
if (strchr(".+-_", c) != NULL)
|
305
|
+
continue;
|
306
|
+
|
307
|
+
if (c == '/')
|
308
|
+
ns++;
|
309
|
+
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
|
313
|
+
if (rewind == 0 || ns > 0) {
|
314
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
315
|
+
return;
|
316
|
+
}
|
317
|
+
|
318
|
+
for (link_end = 0; link_end < size; ++link_end) {
|
319
|
+
uint8_t c = data[link_end];
|
320
|
+
|
321
|
+
if (cmark_isalnum(c))
|
322
|
+
continue;
|
323
|
+
|
324
|
+
if (c == '@')
|
325
|
+
nb++;
|
326
|
+
else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
|
327
|
+
np++;
|
328
|
+
else if (c != '-' && c != '_')
|
329
|
+
break;
|
330
|
+
}
|
331
|
+
|
332
|
+
if (link_end < 2 || nb != 1 || np == 0 ||
|
333
|
+
(!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
|
334
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
335
|
+
return;
|
336
|
+
}
|
337
|
+
|
338
|
+
link_end = autolink_delim(data, link_end);
|
339
|
+
|
340
|
+
if (link_end == 0) {
|
341
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
342
|
+
return;
|
343
|
+
}
|
344
|
+
|
345
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
346
|
+
|
347
|
+
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
348
|
+
cmark_strbuf buf;
|
349
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
350
|
+
cmark_strbuf_puts(&buf, "mailto:");
|
351
|
+
cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
|
352
|
+
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
353
|
+
|
354
|
+
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
355
|
+
cmark_chunk email = cmark_chunk_dup(
|
356
|
+
&text->as.literal,
|
357
|
+
offset + max_rewind - rewind,
|
358
|
+
(bufsize_t)(link_end + rewind));
|
359
|
+
cmark_chunk_to_cstr(parser->mem, &email);
|
360
|
+
link_text->as.literal = email;
|
361
|
+
cmark_node_append_child(link_node, link_text);
|
362
|
+
|
363
|
+
cmark_node_insert_after(text, link_node);
|
364
|
+
|
365
|
+
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
366
|
+
post->as.literal = cmark_chunk_dup(&text->as.literal,
|
367
|
+
(bufsize_t)(offset + max_rewind + link_end),
|
368
|
+
(bufsize_t)(size - link_end));
|
369
|
+
cmark_chunk_to_cstr(parser->mem, &post->as.literal);
|
370
|
+
|
371
|
+
cmark_node_insert_after(link_node, post);
|
372
|
+
|
373
|
+
text->as.literal.len = offset + max_rewind - rewind;
|
374
|
+
text->as.literal.data[text->as.literal.len] = 0;
|
375
|
+
|
376
|
+
postprocess_text(parser, post, 0, depth + 1);
|
377
|
+
}
|
378
|
+
|
379
|
+
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
|
380
|
+
cmark_iter *iter;
|
381
|
+
cmark_event_type ev;
|
382
|
+
cmark_node *node;
|
383
|
+
bool in_link = false;
|
384
|
+
|
385
|
+
cmark_consolidate_text_nodes(root);
|
386
|
+
iter = cmark_iter_new(root);
|
387
|
+
|
388
|
+
while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
389
|
+
node = cmark_iter_get_node(iter);
|
390
|
+
if (in_link) {
|
391
|
+
if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
|
392
|
+
in_link = false;
|
393
|
+
}
|
394
|
+
continue;
|
395
|
+
}
|
396
|
+
|
397
|
+
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
|
398
|
+
in_link = true;
|
399
|
+
continue;
|
400
|
+
}
|
401
|
+
|
402
|
+
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
|
403
|
+
postprocess_text(parser, node, 0, /*depth*/0);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
cmark_iter_free(iter);
|
408
|
+
|
409
|
+
return root;
|
410
|
+
}
|
411
|
+
|
412
|
+
cmark_syntax_extension *create_autolink_extension(void) {
|
413
|
+
cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
|
414
|
+
cmark_llist *special_chars = NULL;
|
415
|
+
|
416
|
+
cmark_syntax_extension_set_match_inline_func(ext, match);
|
417
|
+
cmark_syntax_extension_set_postprocess_func(ext, postprocess);
|
418
|
+
|
419
|
+
cmark_mem *mem = cmark_get_default_mem_allocator();
|
420
|
+
special_chars = cmark_llist_append(mem, special_chars, (void *)':');
|
421
|
+
special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
|
422
|
+
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
|
423
|
+
|
424
|
+
return ext;
|
425
|
+
}
|