markly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/markly +94 -0
- data/ext/markly/arena.c +103 -0
- data/ext/markly/autolink.c +425 -0
- data/ext/markly/autolink.h +8 -0
- data/ext/markly/blocks.c +1585 -0
- data/ext/markly/buffer.c +278 -0
- data/ext/markly/buffer.h +116 -0
- data/ext/markly/case_fold_switch.inc +4327 -0
- data/ext/markly/chunk.h +135 -0
- data/ext/markly/cmark-gfm-core-extensions.h +54 -0
- data/ext/markly/cmark-gfm-extension_api.h +736 -0
- data/ext/markly/cmark-gfm-extensions_export.h +42 -0
- data/ext/markly/cmark-gfm.h +817 -0
- data/ext/markly/cmark-gfm_export.h +42 -0
- data/ext/markly/cmark-gfm_version.h +7 -0
- data/ext/markly/cmark.c +55 -0
- data/ext/markly/cmark_ctype.c +44 -0
- data/ext/markly/cmark_ctype.h +33 -0
- data/ext/markly/commonmark.c +519 -0
- data/ext/markly/config.h +76 -0
- data/ext/markly/core-extensions.c +27 -0
- data/ext/markly/entities.inc +2138 -0
- data/ext/markly/ext_scanners.c +1159 -0
- data/ext/markly/ext_scanners.h +24 -0
- data/ext/markly/extconf.rb +7 -0
- data/ext/markly/footnotes.c +40 -0
- data/ext/markly/footnotes.h +25 -0
- data/ext/markly/houdini.h +57 -0
- data/ext/markly/houdini_href_e.c +100 -0
- data/ext/markly/houdini_html_e.c +66 -0
- data/ext/markly/houdini_html_u.c +149 -0
- data/ext/markly/html.c +465 -0
- data/ext/markly/html.h +27 -0
- data/ext/markly/inlines.c +1633 -0
- data/ext/markly/inlines.h +29 -0
- data/ext/markly/iterator.c +159 -0
- data/ext/markly/iterator.h +26 -0
- data/ext/markly/latex.c +466 -0
- data/ext/markly/linked_list.c +37 -0
- data/ext/markly/man.c +278 -0
- data/ext/markly/map.c +122 -0
- data/ext/markly/map.h +41 -0
- data/ext/markly/markly.c +1226 -0
- data/ext/markly/markly.h +16 -0
- data/ext/markly/node.c +979 -0
- data/ext/markly/node.h +118 -0
- data/ext/markly/parser.h +58 -0
- data/ext/markly/plaintext.c +235 -0
- data/ext/markly/plugin.c +36 -0
- data/ext/markly/plugin.h +34 -0
- data/ext/markly/references.c +42 -0
- data/ext/markly/references.h +26 -0
- data/ext/markly/registry.c +63 -0
- data/ext/markly/registry.h +24 -0
- data/ext/markly/render.c +205 -0
- data/ext/markly/render.h +62 -0
- data/ext/markly/scanners.c +20382 -0
- data/ext/markly/scanners.h +62 -0
- data/ext/markly/scanners.re +326 -0
- data/ext/markly/strikethrough.c +167 -0
- data/ext/markly/strikethrough.h +9 -0
- data/ext/markly/syntax_extension.c +149 -0
- data/ext/markly/syntax_extension.h +34 -0
- data/ext/markly/table.c +803 -0
- data/ext/markly/table.h +12 -0
- data/ext/markly/tagfilter.c +60 -0
- data/ext/markly/tagfilter.h +8 -0
- data/ext/markly/tasklist.c +156 -0
- data/ext/markly/tasklist.h +8 -0
- data/ext/markly/utf8.c +317 -0
- data/ext/markly/utf8.h +35 -0
- data/ext/markly/xml.c +181 -0
- data/lib/markly.rb +43 -0
- data/lib/markly/flags.rb +37 -0
- data/lib/markly/markly.so +0 -0
- data/lib/markly/node.rb +70 -0
- data/lib/markly/node/inspect.rb +59 -0
- data/lib/markly/renderer.rb +133 -0
- data/lib/markly/renderer/html_renderer.rb +252 -0
- data/lib/markly/version.rb +5 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3121bb14bcb09883bfb84879b6b69e28d47e3781f0bcd6225737734040ac2ec5
|
4
|
+
data.tar.gz: 2f47e7add1ee43c03e7b75b45e5f2c7ec2c322cac842aa5f5bdb8e539fbca8d5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6da8e6ef6f672489b7bcd4b6e378c86d3240e4b3f9d67aaba26bbe5414a363c4a071be3695bca85bb9658c0213cf1e59d32edb389b5ec8a0aebd9689a8245ff7
|
7
|
+
data.tar.gz: ce28eac1fcca2c0dfc6e637ca937e16ffbb211d4d64c596f3869b56860fb04227b20065c86a5d335cb8d11cdd49c1e924a09b0caa0b515e404e123420ed39e7d
|
data/bin/markly
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
require_relative '../lib/markly'
|
8
|
+
|
9
|
+
def parse_options
|
10
|
+
options = OpenStruct.new
|
11
|
+
extensions = Markly.extensions
|
12
|
+
parse_flags = Markly::PARSE_FLAGS
|
13
|
+
render_flags = Markly::RENDER_FLAGS
|
14
|
+
|
15
|
+
options.active_extensions = []
|
16
|
+
options.active_parse_flags = Markly::DEFAULT
|
17
|
+
options.active_render_flags = Markly::DEFAULT
|
18
|
+
|
19
|
+
option_parser = OptionParser.new do |opts|
|
20
|
+
opts.banner = 'Usage: markly [--html-renderer] [--extension=EXTENSION]'
|
21
|
+
opts.separator ' [--parse-option=OPTION]'
|
22
|
+
opts.separator ' [--render-option=OPTION]'
|
23
|
+
opts.separator ' [FILE..]'
|
24
|
+
opts.separator ''
|
25
|
+
opts.separator 'Convert one or more CommonMark files to HTML and write to standard output.'
|
26
|
+
opts.separator 'If no FILE argument is provided, text will be read from STDIN.'
|
27
|
+
opts.separator ''
|
28
|
+
|
29
|
+
opts.on('--extension=EXTENSION', Array, 'Use EXTENSION for parsing and HTML output (unless --html-renderer is specified)') do |values|
|
30
|
+
values.each do |value|
|
31
|
+
if extensions.include?(value)
|
32
|
+
options.active_extensions << value.to_sym
|
33
|
+
else
|
34
|
+
abort("extension '#{value}' not found")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on('-h', '--help', 'Prints this help') do
|
40
|
+
puts opts
|
41
|
+
puts
|
42
|
+
puts "Available extentions: #{extensions.join(', ')}"
|
43
|
+
puts "Available parse flags: #{parser_flags.keys.join(', ')}"
|
44
|
+
puts "Available render flags: #{render_flags.keys.join(', ')}"
|
45
|
+
puts
|
46
|
+
puts 'See the README for more information on these.'
|
47
|
+
exit
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.on('--html-renderer', 'Use the HtmlRenderer renderer rather than the native C renderer') do
|
51
|
+
options.renderer = true
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on('--parse-option=OPTION', Array, 'OPTION passed during parsing') do |values|
|
55
|
+
values.each do |value|active_parser_flags
|
56
|
+
if parser_flags.key?(value.to_sym)
|
57
|
+
options.active_parser_flags << value.to_sym
|
58
|
+
else
|
59
|
+
abort("parse-option '#{value}' not found")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
opts.on('--render-option=OPTION', Array, 'OPTION passed during rendering') do |values|
|
65
|
+
values.each do |value|
|
66
|
+
if render_flags.key?(value.to_sym)
|
67
|
+
options.active_render_flags << value.to_sym
|
68
|
+
else
|
69
|
+
abort("render-option '#{value}' not found")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on('-v', '--version', 'Version information') do
|
75
|
+
puts "markly #{Markly::VERSION}"
|
76
|
+
exit
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
option_parser.parse!
|
81
|
+
|
82
|
+
options
|
83
|
+
end
|
84
|
+
|
85
|
+
options = parse_options
|
86
|
+
|
87
|
+
doc = Markly.parse(ARGF.read, flags: options.active_parse_flags, extensions: options.active_extensions)
|
88
|
+
|
89
|
+
if options.renderer
|
90
|
+
renderer = Markly::HtmlRenderer.new(extensions: options.active_extensions)
|
91
|
+
STDOUT.write(renderer.render(doc))
|
92
|
+
else
|
93
|
+
STDOUT.write(doc.to_html(flags: options.active_render_flags, extensions: options.active_extensions))
|
94
|
+
end
|
data/ext/markly/arena.c
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
#include "cmark-gfm.h"
|
5
|
+
#include "cmark-gfm-extension_api.h"
|
6
|
+
|
7
|
+
static struct arena_chunk {
|
8
|
+
size_t sz, used;
|
9
|
+
uint8_t push_point;
|
10
|
+
void *ptr;
|
11
|
+
struct arena_chunk *prev;
|
12
|
+
} *A = NULL;
|
13
|
+
|
14
|
+
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
|
15
|
+
struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
|
16
|
+
if (!c)
|
17
|
+
abort();
|
18
|
+
c->sz = sz;
|
19
|
+
c->ptr = calloc(1, sz);
|
20
|
+
if (!c->ptr)
|
21
|
+
abort();
|
22
|
+
c->prev = prev;
|
23
|
+
return c;
|
24
|
+
}
|
25
|
+
|
26
|
+
void cmark_arena_push(void) {
|
27
|
+
if (!A)
|
28
|
+
return;
|
29
|
+
A->push_point = 1;
|
30
|
+
A = alloc_arena_chunk(10240, A);
|
31
|
+
}
|
32
|
+
|
33
|
+
int cmark_arena_pop(void) {
|
34
|
+
if (!A)
|
35
|
+
return 0;
|
36
|
+
while (A && !A->push_point) {
|
37
|
+
free(A->ptr);
|
38
|
+
struct arena_chunk *n = A->prev;
|
39
|
+
free(A);
|
40
|
+
A = n;
|
41
|
+
}
|
42
|
+
if (A)
|
43
|
+
A->push_point = 0;
|
44
|
+
return 1;
|
45
|
+
}
|
46
|
+
|
47
|
+
static void init_arena(void) {
|
48
|
+
A = alloc_arena_chunk(4 * 1048576, NULL);
|
49
|
+
}
|
50
|
+
|
51
|
+
void cmark_arena_reset(void) {
|
52
|
+
while (A) {
|
53
|
+
free(A->ptr);
|
54
|
+
struct arena_chunk *n = A->prev;
|
55
|
+
free(A);
|
56
|
+
A = n;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
static void *arena_calloc(size_t nmem, size_t size) {
|
61
|
+
if (!A)
|
62
|
+
init_arena();
|
63
|
+
|
64
|
+
size_t sz = nmem * size + sizeof(size_t);
|
65
|
+
|
66
|
+
// Round allocation sizes to largest integer size to
|
67
|
+
// ensure returned memory is correctly aligned
|
68
|
+
const size_t align = sizeof(size_t) - 1;
|
69
|
+
sz = (sz + align) & ~align;
|
70
|
+
|
71
|
+
if (sz > A->sz) {
|
72
|
+
A->prev = alloc_arena_chunk(sz, A->prev);
|
73
|
+
return (uint8_t *) A->prev->ptr + sizeof(size_t);
|
74
|
+
}
|
75
|
+
if (sz > A->sz - A->used) {
|
76
|
+
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
|
77
|
+
}
|
78
|
+
void *ptr = (uint8_t *) A->ptr + A->used;
|
79
|
+
A->used += sz;
|
80
|
+
*((size_t *) ptr) = sz - sizeof(size_t);
|
81
|
+
return (uint8_t *) ptr + sizeof(size_t);
|
82
|
+
}
|
83
|
+
|
84
|
+
static void *arena_realloc(void *ptr, size_t size) {
|
85
|
+
if (!A)
|
86
|
+
init_arena();
|
87
|
+
|
88
|
+
void *new_ptr = arena_calloc(1, size);
|
89
|
+
if (ptr)
|
90
|
+
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
|
91
|
+
return new_ptr;
|
92
|
+
}
|
93
|
+
|
94
|
+
static void arena_free(void *ptr) {
|
95
|
+
(void) ptr;
|
96
|
+
/* no-op */
|
97
|
+
}
|
98
|
+
|
99
|
+
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
|
100
|
+
|
101
|
+
cmark_mem *cmark_get_arena_mem_allocator() {
|
102
|
+
return &CMARK_ARENA_MEM_ALLOCATOR;
|
103
|
+
}
|
@@ -0,0 +1,425 @@
|
|
1
|
+
#include "autolink.h"
|
2
|
+
#include <parser.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <utf8.h>
|
5
|
+
|
6
|
+
#if defined(_WIN32)
|
7
|
+
#define strncasecmp _strnicmp
|
8
|
+
#else
|
9
|
+
#include <strings.h>
|
10
|
+
#endif
|
11
|
+
|
12
|
+
static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
|
13
|
+
int32_t ch;
|
14
|
+
int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
|
15
|
+
if (r < 0)
|
16
|
+
return 0;
|
17
|
+
return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
|
18
|
+
}
|
19
|
+
|
20
|
+
static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
|
21
|
+
static const size_t valid_uris_count = 3;
|
22
|
+
static const char *valid_uris[] = {"http://", "https://", "ftp://"};
|
23
|
+
|
24
|
+
size_t i;
|
25
|
+
|
26
|
+
for (i = 0; i < valid_uris_count; ++i) {
|
27
|
+
size_t len = strlen(valid_uris[i]);
|
28
|
+
|
29
|
+
if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
|
30
|
+
is_valid_hostchar(link + len, link_len - len))
|
31
|
+
return 1;
|
32
|
+
}
|
33
|
+
|
34
|
+
return 0;
|
35
|
+
}
|
36
|
+
|
37
|
+
static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
38
|
+
uint8_t cclose, copen;
|
39
|
+
size_t i;
|
40
|
+
|
41
|
+
for (i = 0; i < link_end; ++i)
|
42
|
+
if (data[i] == '<') {
|
43
|
+
link_end = i;
|
44
|
+
break;
|
45
|
+
}
|
46
|
+
|
47
|
+
while (link_end > 0) {
|
48
|
+
cclose = data[link_end - 1];
|
49
|
+
|
50
|
+
switch (cclose) {
|
51
|
+
case ')':
|
52
|
+
copen = '(';
|
53
|
+
break;
|
54
|
+
default:
|
55
|
+
copen = 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
|
59
|
+
link_end--;
|
60
|
+
|
61
|
+
else if (data[link_end - 1] == ';') {
|
62
|
+
size_t new_end = link_end - 2;
|
63
|
+
|
64
|
+
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
65
|
+
new_end--;
|
66
|
+
|
67
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
68
|
+
link_end = new_end;
|
69
|
+
else
|
70
|
+
link_end--;
|
71
|
+
} else if (copen != 0) {
|
72
|
+
size_t closing = 0;
|
73
|
+
size_t opening = 0;
|
74
|
+
i = 0;
|
75
|
+
|
76
|
+
/* Allow any number of matching brackets (as recognised in copen/cclose)
|
77
|
+
* at the end of the URL. If there is a greater number of closing
|
78
|
+
* brackets than opening ones, we remove one character from the end of
|
79
|
+
* the link.
|
80
|
+
*
|
81
|
+
* Examples (input text => output linked portion):
|
82
|
+
*
|
83
|
+
* http://www.pokemon.com/Pikachu_(Electric)
|
84
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
85
|
+
*
|
86
|
+
* http://www.pokemon.com/Pikachu_((Electric)
|
87
|
+
* => http://www.pokemon.com/Pikachu_((Electric)
|
88
|
+
*
|
89
|
+
* http://www.pokemon.com/Pikachu_(Electric))
|
90
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
91
|
+
*
|
92
|
+
* http://www.pokemon.com/Pikachu_((Electric))
|
93
|
+
* => http://www.pokemon.com/Pikachu_((Electric))
|
94
|
+
*/
|
95
|
+
|
96
|
+
while (i < link_end) {
|
97
|
+
if (data[i] == copen)
|
98
|
+
opening++;
|
99
|
+
else if (data[i] == cclose)
|
100
|
+
closing++;
|
101
|
+
|
102
|
+
i++;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (closing <= opening)
|
106
|
+
break;
|
107
|
+
|
108
|
+
link_end--;
|
109
|
+
} else
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
|
113
|
+
return link_end;
|
114
|
+
}
|
115
|
+
|
116
|
+
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
117
|
+
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
|
118
|
+
|
119
|
+
for (i = 1; i < size - 1; i++) {
|
120
|
+
if (data[i] == '_')
|
121
|
+
uscore2++;
|
122
|
+
else if (data[i] == '.') {
|
123
|
+
uscore1 = uscore2;
|
124
|
+
uscore2 = 0;
|
125
|
+
np++;
|
126
|
+
} else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
|
127
|
+
break;
|
128
|
+
}
|
129
|
+
|
130
|
+
if (uscore1 > 0 || uscore2 > 0)
|
131
|
+
return 0;
|
132
|
+
|
133
|
+
if (allow_short) {
|
134
|
+
/* We don't need a valid domain in the strict sense (with
|
135
|
+
* least one dot; so just make sure it's composed of valid
|
136
|
+
* domain characters and return the length of the the valid
|
137
|
+
* sequence. */
|
138
|
+
return i;
|
139
|
+
} else {
|
140
|
+
/* a valid domain needs to have at least a dot.
|
141
|
+
* that's as far as we get */
|
142
|
+
return np ? i : 0;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
147
|
+
cmark_inline_parser *inline_parser) {
|
148
|
+
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
149
|
+
size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
150
|
+
uint8_t *data = chunk->data + max_rewind;
|
151
|
+
size_t size = chunk->len - max_rewind;
|
152
|
+
int start = cmark_inline_parser_get_column(inline_parser);
|
153
|
+
|
154
|
+
size_t link_end;
|
155
|
+
|
156
|
+
if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
|
157
|
+
!cmark_isspace(data[-1]))
|
158
|
+
return 0;
|
159
|
+
|
160
|
+
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
161
|
+
return 0;
|
162
|
+
|
163
|
+
link_end = check_domain(data, size, 0);
|
164
|
+
|
165
|
+
if (link_end == 0)
|
166
|
+
return NULL;
|
167
|
+
|
168
|
+
while (link_end < size && !cmark_isspace(data[link_end]))
|
169
|
+
link_end++;
|
170
|
+
|
171
|
+
link_end = autolink_delim(data, link_end);
|
172
|
+
|
173
|
+
if (link_end == 0)
|
174
|
+
return NULL;
|
175
|
+
|
176
|
+
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
177
|
+
|
178
|
+
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
179
|
+
|
180
|
+
cmark_strbuf buf;
|
181
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
182
|
+
cmark_strbuf_puts(&buf, "http://");
|
183
|
+
cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
|
184
|
+
node->as.link.url = cmark_chunk_buf_detach(&buf);
|
185
|
+
|
186
|
+
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
187
|
+
text->as.literal =
|
188
|
+
cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
|
189
|
+
cmark_node_append_child(node, text);
|
190
|
+
|
191
|
+
node->start_line = text->start_line =
|
192
|
+
node->end_line = text->end_line =
|
193
|
+
cmark_inline_parser_get_line(inline_parser);
|
194
|
+
|
195
|
+
node->start_column = text->start_column = start - 1;
|
196
|
+
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
197
|
+
|
198
|
+
return node;
|
199
|
+
}
|
200
|
+
|
201
|
+
static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
202
|
+
cmark_inline_parser *inline_parser) {
|
203
|
+
size_t link_end, domain_len;
|
204
|
+
int rewind = 0;
|
205
|
+
|
206
|
+
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
|
207
|
+
int max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
208
|
+
uint8_t *data = chunk->data + max_rewind;
|
209
|
+
size_t size = chunk->len - max_rewind;
|
210
|
+
|
211
|
+
if (size < 4 || data[1] != '/' || data[2] != '/')
|
212
|
+
return 0;
|
213
|
+
|
214
|
+
while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
|
215
|
+
rewind++;
|
216
|
+
|
217
|
+
if (!sd_autolink_issafe(data - rewind, size + rewind))
|
218
|
+
return 0;
|
219
|
+
|
220
|
+
link_end = strlen("://");
|
221
|
+
|
222
|
+
domain_len = check_domain(data + link_end, size - link_end, 1);
|
223
|
+
|
224
|
+
if (domain_len == 0)
|
225
|
+
return 0;
|
226
|
+
|
227
|
+
link_end += domain_len;
|
228
|
+
while (link_end < size && !cmark_isspace(data[link_end]))
|
229
|
+
link_end++;
|
230
|
+
|
231
|
+
link_end = autolink_delim(data, link_end);
|
232
|
+
|
233
|
+
if (link_end == 0)
|
234
|
+
return NULL;
|
235
|
+
|
236
|
+
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
|
237
|
+
cmark_node_unput(parent, rewind);
|
238
|
+
|
239
|
+
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
240
|
+
|
241
|
+
cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
|
242
|
+
(bufsize_t)(link_end + rewind));
|
243
|
+
node->as.link.url = url;
|
244
|
+
|
245
|
+
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
246
|
+
text->as.literal = url;
|
247
|
+
cmark_node_append_child(node, text);
|
248
|
+
|
249
|
+
return node;
|
250
|
+
}
|
251
|
+
|
252
|
+
static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
|
253
|
+
cmark_node *parent, unsigned char c,
|
254
|
+
cmark_inline_parser *inline_parser) {
|
255
|
+
if (cmark_inline_parser_in_bracket(inline_parser, false) ||
|
256
|
+
cmark_inline_parser_in_bracket(inline_parser, true))
|
257
|
+
return NULL;
|
258
|
+
|
259
|
+
if (c == ':')
|
260
|
+
return url_match(parser, parent, inline_parser);
|
261
|
+
|
262
|
+
if (c == 'w')
|
263
|
+
return www_match(parser, parent, inline_parser);
|
264
|
+
|
265
|
+
return NULL;
|
266
|
+
|
267
|
+
// note that we could end up re-consuming something already a
|
268
|
+
// part of an inline, because we don't track when the last
|
269
|
+
// inline was finished in inlines.c.
|
270
|
+
}
|
271
|
+
|
272
|
+
static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
|
273
|
+
// postprocess_text can recurse very deeply if there is a very long line of
|
274
|
+
// '@' only. Stop at a reasonable depth to ensure it cannot crash.
|
275
|
+
if (depth > 1000) return;
|
276
|
+
|
277
|
+
size_t link_end;
|
278
|
+
uint8_t *data = text->as.literal.data,
|
279
|
+
*at;
|
280
|
+
size_t size = text->as.literal.len;
|
281
|
+
int rewind, max_rewind,
|
282
|
+
nb = 0, np = 0, ns = 0;
|
283
|
+
|
284
|
+
if (offset < 0 || (size_t)offset >= size)
|
285
|
+
return;
|
286
|
+
|
287
|
+
data += offset;
|
288
|
+
size -= offset;
|
289
|
+
|
290
|
+
at = (uint8_t *)memchr(data, '@', size);
|
291
|
+
if (!at)
|
292
|
+
return;
|
293
|
+
|
294
|
+
max_rewind = (int)(at - data);
|
295
|
+
data += max_rewind;
|
296
|
+
size -= max_rewind;
|
297
|
+
|
298
|
+
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
299
|
+
uint8_t c = data[-rewind - 1];
|
300
|
+
|
301
|
+
if (cmark_isalnum(c))
|
302
|
+
continue;
|
303
|
+
|
304
|
+
if (strchr(".+-_", c) != NULL)
|
305
|
+
continue;
|
306
|
+
|
307
|
+
if (c == '/')
|
308
|
+
ns++;
|
309
|
+
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
|
313
|
+
if (rewind == 0 || ns > 0) {
|
314
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
315
|
+
return;
|
316
|
+
}
|
317
|
+
|
318
|
+
for (link_end = 0; link_end < size; ++link_end) {
|
319
|
+
uint8_t c = data[link_end];
|
320
|
+
|
321
|
+
if (cmark_isalnum(c))
|
322
|
+
continue;
|
323
|
+
|
324
|
+
if (c == '@')
|
325
|
+
nb++;
|
326
|
+
else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
|
327
|
+
np++;
|
328
|
+
else if (c != '-' && c != '_')
|
329
|
+
break;
|
330
|
+
}
|
331
|
+
|
332
|
+
if (link_end < 2 || nb != 1 || np == 0 ||
|
333
|
+
(!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
|
334
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
335
|
+
return;
|
336
|
+
}
|
337
|
+
|
338
|
+
link_end = autolink_delim(data, link_end);
|
339
|
+
|
340
|
+
if (link_end == 0) {
|
341
|
+
postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
|
342
|
+
return;
|
343
|
+
}
|
344
|
+
|
345
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
346
|
+
|
347
|
+
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
348
|
+
cmark_strbuf buf;
|
349
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
350
|
+
cmark_strbuf_puts(&buf, "mailto:");
|
351
|
+
cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
|
352
|
+
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
353
|
+
|
354
|
+
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
355
|
+
cmark_chunk email = cmark_chunk_dup(
|
356
|
+
&text->as.literal,
|
357
|
+
offset + max_rewind - rewind,
|
358
|
+
(bufsize_t)(link_end + rewind));
|
359
|
+
cmark_chunk_to_cstr(parser->mem, &email);
|
360
|
+
link_text->as.literal = email;
|
361
|
+
cmark_node_append_child(link_node, link_text);
|
362
|
+
|
363
|
+
cmark_node_insert_after(text, link_node);
|
364
|
+
|
365
|
+
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
366
|
+
post->as.literal = cmark_chunk_dup(&text->as.literal,
|
367
|
+
(bufsize_t)(offset + max_rewind + link_end),
|
368
|
+
(bufsize_t)(size - link_end));
|
369
|
+
cmark_chunk_to_cstr(parser->mem, &post->as.literal);
|
370
|
+
|
371
|
+
cmark_node_insert_after(link_node, post);
|
372
|
+
|
373
|
+
text->as.literal.len = offset + max_rewind - rewind;
|
374
|
+
text->as.literal.data[text->as.literal.len] = 0;
|
375
|
+
|
376
|
+
postprocess_text(parser, post, 0, depth + 1);
|
377
|
+
}
|
378
|
+
|
379
|
+
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
|
380
|
+
cmark_iter *iter;
|
381
|
+
cmark_event_type ev;
|
382
|
+
cmark_node *node;
|
383
|
+
bool in_link = false;
|
384
|
+
|
385
|
+
cmark_consolidate_text_nodes(root);
|
386
|
+
iter = cmark_iter_new(root);
|
387
|
+
|
388
|
+
while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
389
|
+
node = cmark_iter_get_node(iter);
|
390
|
+
if (in_link) {
|
391
|
+
if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
|
392
|
+
in_link = false;
|
393
|
+
}
|
394
|
+
continue;
|
395
|
+
}
|
396
|
+
|
397
|
+
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
|
398
|
+
in_link = true;
|
399
|
+
continue;
|
400
|
+
}
|
401
|
+
|
402
|
+
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
|
403
|
+
postprocess_text(parser, node, 0, /*depth*/0);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
cmark_iter_free(iter);
|
408
|
+
|
409
|
+
return root;
|
410
|
+
}
|
411
|
+
|
412
|
+
cmark_syntax_extension *create_autolink_extension(void) {
|
413
|
+
cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
|
414
|
+
cmark_llist *special_chars = NULL;
|
415
|
+
|
416
|
+
cmark_syntax_extension_set_match_inline_func(ext, match);
|
417
|
+
cmark_syntax_extension_set_postprocess_func(ext, postprocess);
|
418
|
+
|
419
|
+
cmark_mem *mem = cmark_get_default_mem_allocator();
|
420
|
+
special_chars = cmark_llist_append(mem, special_chars, (void *)':');
|
421
|
+
special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
|
422
|
+
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
|
423
|
+
|
424
|
+
return ext;
|
425
|
+
}
|