markly 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/conduct.md +133 -0
- data/ext/markly/arena.c +9 -8
- data/ext/markly/autolink.c +217 -134
- data/ext/markly/blocks.c +27 -2
- data/ext/markly/cmark-gfm-core-extensions.h +11 -11
- data/ext/markly/cmark-gfm-extension_api.h +1 -0
- data/ext/markly/cmark-gfm.h +18 -2
- data/ext/markly/cmark.c +3 -3
- data/ext/markly/commonmark.c +19 -34
- data/ext/markly/extconf.rb +8 -1
- data/ext/markly/html.c +22 -6
- data/ext/markly/inlines.c +148 -51
- data/ext/markly/latex.c +6 -4
- data/ext/markly/man.c +7 -11
- data/ext/markly/map.c +11 -4
- data/ext/markly/map.h +5 -2
- data/ext/markly/markly.c +582 -586
- data/ext/markly/markly.h +1 -1
- data/ext/markly/node.c +76 -10
- data/ext/markly/node.h +42 -1
- data/ext/markly/parser.h +1 -0
- data/ext/markly/plaintext.c +12 -29
- data/ext/markly/references.c +1 -0
- data/ext/markly/render.c +15 -7
- data/ext/markly/scanners.c +13916 -10380
- data/ext/markly/scanners.h +8 -0
- data/ext/markly/scanners.re +47 -8
- data/ext/markly/strikethrough.c +1 -1
- data/ext/markly/table.c +81 -31
- data/ext/markly/xml.c +2 -1
- data/lib/markly/flags.rb +16 -0
- data/lib/markly/node/inspect.rb +59 -53
- data/lib/markly/node.rb +125 -58
- data/lib/markly/renderer/generic.rb +129 -124
- data/lib/markly/renderer/html.rb +294 -275
- data/lib/markly/version.rb +7 -1
- data/lib/markly.rb +36 -30
- data/license.md +39 -0
- data/readme.md +36 -0
- data.tar.gz.sig +0 -0
- metadata +61 -29
- metadata.gz.sig +0 -0
- data/bin/markly +0 -94
- data/lib/markly/markly.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f8bb5972577921f29b489d7ef01e509d5e9a771f1c864cad7e64e4b2c7129a8
|
4
|
+
data.tar.gz: 237d34500aca112b8aaede861618ad205a2518d86a04e7a9f1dcc06b4101de45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea92b1d1b3ae144d31c75445d58fed31201a9d0d5df4c8922ae85d1384f35abe630df8213f8f33a084fdff6e02222fdbb663364a898cff5a98af5f71b09a08b2
|
7
|
+
data.tar.gz: 8bf9723221304cdd0d7a8d9fbfb74d4f9f16badcf08d28bc919226e471a7612adf5e1bd6fa3ae4ac2f94b2108e46c6c5c26af4a38aec48e796af097b811d30fb
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data/conduct.md
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
|
2
|
+
# Contributor Covenant Code of Conduct
|
3
|
+
|
4
|
+
## Our Pledge
|
5
|
+
|
6
|
+
We as members, contributors, and leaders pledge to make participation in our
|
7
|
+
community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
9
|
+
identity and expression, level of experience, education, socio-economic status,
|
10
|
+
nationality, personal appearance, race, caste, color, religion, or sexual
|
11
|
+
identity and orientation.
|
12
|
+
|
13
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
14
|
+
diverse, inclusive, and healthy community.
|
15
|
+
|
16
|
+
## Our Standards
|
17
|
+
|
18
|
+
Examples of behavior that contributes to a positive environment for our
|
19
|
+
community include:
|
20
|
+
|
21
|
+
* Demonstrating empathy and kindness toward other people
|
22
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
23
|
+
* Giving and gracefully accepting constructive feedback
|
24
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
25
|
+
and learning from the experience
|
26
|
+
* Focusing on what is best not just for us as individuals, but for the overall
|
27
|
+
community
|
28
|
+
|
29
|
+
Examples of unacceptable behavior include:
|
30
|
+
|
31
|
+
* The use of sexualized language or imagery, and sexual attention or advances of
|
32
|
+
any kind
|
33
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
34
|
+
* Public or private harassment
|
35
|
+
* Publishing others' private information, such as a physical or email address,
|
36
|
+
without their explicit permission
|
37
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
38
|
+
professional setting
|
39
|
+
|
40
|
+
## Enforcement Responsibilities
|
41
|
+
|
42
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
43
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
44
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
45
|
+
or harmful.
|
46
|
+
|
47
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
48
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
49
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
50
|
+
decisions when appropriate.
|
51
|
+
|
52
|
+
## Scope
|
53
|
+
|
54
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
55
|
+
an individual is officially representing the community in public spaces.
|
56
|
+
Examples of representing our community include using an official e-mail address,
|
57
|
+
posting via an official social media account, or acting as an appointed
|
58
|
+
representative at an online or offline event.
|
59
|
+
|
60
|
+
## Enforcement
|
61
|
+
|
62
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
63
|
+
reported to the community leaders responsible for enforcement at
|
64
|
+
[INSERT CONTACT METHOD].
|
65
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
66
|
+
|
67
|
+
All community leaders are obligated to respect the privacy and security of the
|
68
|
+
reporter of any incident.
|
69
|
+
|
70
|
+
## Enforcement Guidelines
|
71
|
+
|
72
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
73
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
74
|
+
|
75
|
+
### 1. Correction
|
76
|
+
|
77
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
78
|
+
unprofessional or unwelcome in the community.
|
79
|
+
|
80
|
+
**Consequence**: A private, written warning from community leaders, providing
|
81
|
+
clarity around the nature of the violation and an explanation of why the
|
82
|
+
behavior was inappropriate. A public apology may be requested.
|
83
|
+
|
84
|
+
### 2. Warning
|
85
|
+
|
86
|
+
**Community Impact**: A violation through a single incident or series of
|
87
|
+
actions.
|
88
|
+
|
89
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
90
|
+
interaction with the people involved, including unsolicited interaction with
|
91
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
92
|
+
includes avoiding interactions in community spaces as well as external channels
|
93
|
+
like social media. Violating these terms may lead to a temporary or permanent
|
94
|
+
ban.
|
95
|
+
|
96
|
+
### 3. Temporary Ban
|
97
|
+
|
98
|
+
**Community Impact**: A serious violation of community standards, including
|
99
|
+
sustained inappropriate behavior.
|
100
|
+
|
101
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
102
|
+
communication with the community for a specified period of time. No public or
|
103
|
+
private interaction with the people involved, including unsolicited interaction
|
104
|
+
with those enforcing the Code of Conduct, is allowed during this period.
|
105
|
+
Violating these terms may lead to a permanent ban.
|
106
|
+
|
107
|
+
### 4. Permanent Ban
|
108
|
+
|
109
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
110
|
+
standards, including sustained inappropriate behavior, harassment of an
|
111
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
112
|
+
|
113
|
+
**Consequence**: A permanent ban from any sort of public interaction within the
|
114
|
+
community.
|
115
|
+
|
116
|
+
## Attribution
|
117
|
+
|
118
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
119
|
+
version 2.1, available at
|
120
|
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
121
|
+
|
122
|
+
Community Impact Guidelines were inspired by
|
123
|
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
124
|
+
|
125
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
126
|
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
127
|
+
[https://www.contributor-covenant.org/translations][translations].
|
128
|
+
|
129
|
+
[homepage]: https://www.contributor-covenant.org
|
130
|
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
131
|
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
132
|
+
[FAQ]: https://www.contributor-covenant.org/faq
|
133
|
+
[translations]: https://www.contributor-covenant.org/translations
|
data/ext/markly/arena.c
CHANGED
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
|
|
68
68
|
const size_t align = sizeof(size_t) - 1;
|
69
69
|
sz = (sz + align) & ~align;
|
70
70
|
|
71
|
+
struct arena_chunk *chunk;
|
71
72
|
if (sz > A->sz) {
|
72
|
-
A->prev = alloc_arena_chunk(sz, A->prev);
|
73
|
-
|
73
|
+
A->prev = chunk = alloc_arena_chunk(sz, A->prev);
|
74
|
+
} else if (sz > A->sz - A->used) {
|
75
|
+
A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
|
76
|
+
} else {
|
77
|
+
chunk = A;
|
74
78
|
}
|
75
|
-
|
76
|
-
|
77
|
-
}
|
78
|
-
void *ptr = (uint8_t *) A->ptr + A->used;
|
79
|
-
A->used += sz;
|
79
|
+
void *ptr = (uint8_t *) chunk->ptr + chunk->used;
|
80
|
+
chunk->used += sz;
|
80
81
|
*((size_t *) ptr) = sz - sizeof(size_t);
|
81
82
|
return (uint8_t *) ptr + sizeof(size_t);
|
82
83
|
}
|
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
|
|
98
99
|
|
99
100
|
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
|
100
101
|
|
101
|
-
cmark_mem *cmark_get_arena_mem_allocator() {
|
102
|
+
cmark_mem *cmark_get_arena_mem_allocator(void) {
|
102
103
|
return &CMARK_ARENA_MEM_ALLOCATOR;
|
103
104
|
}
|
data/ext/markly/autolink.c
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#include <parser.h>
|
3
3
|
#include <string.h>
|
4
4
|
#include <utf8.h>
|
5
|
+
#include <stddef.h>
|
5
6
|
|
6
7
|
#if defined(_WIN32)
|
7
8
|
#define strncasecmp _strnicmp
|
@@ -35,44 +36,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
|
|
35
36
|
}
|
36
37
|
|
37
38
|
static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
38
|
-
uint8_t cclose, copen;
|
39
39
|
size_t i;
|
40
|
+
size_t closing = 0;
|
41
|
+
size_t opening = 0;
|
40
42
|
|
41
|
-
for (i = 0; i < link_end; ++i)
|
42
|
-
|
43
|
+
for (i = 0; i < link_end; ++i) {
|
44
|
+
const uint8_t c = data[i];
|
45
|
+
if (c == '<') {
|
43
46
|
link_end = i;
|
44
47
|
break;
|
48
|
+
} else if (c == '(') {
|
49
|
+
opening++;
|
50
|
+
} else if (c == ')') {
|
51
|
+
closing++;
|
45
52
|
}
|
53
|
+
}
|
46
54
|
|
47
55
|
while (link_end > 0) {
|
48
|
-
|
49
|
-
|
50
|
-
switch (cclose) {
|
56
|
+
switch (data[link_end - 1]) {
|
51
57
|
case ')':
|
52
|
-
copen = '(';
|
53
|
-
break;
|
54
|
-
default:
|
55
|
-
copen = 0;
|
56
|
-
}
|
57
|
-
|
58
|
-
if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
|
59
|
-
link_end--;
|
60
|
-
|
61
|
-
else if (data[link_end - 1] == ';') {
|
62
|
-
size_t new_end = link_end - 2;
|
63
|
-
|
64
|
-
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
65
|
-
new_end--;
|
66
|
-
|
67
|
-
if (new_end < link_end - 2 && data[new_end] == '&')
|
68
|
-
link_end = new_end;
|
69
|
-
else
|
70
|
-
link_end--;
|
71
|
-
} else if (copen != 0) {
|
72
|
-
size_t closing = 0;
|
73
|
-
size_t opening = 0;
|
74
|
-
i = 0;
|
75
|
-
|
76
58
|
/* Allow any number of matching brackets (as recognised in copen/cclose)
|
77
59
|
* at the end of the URL. If there is a greater number of closing
|
78
60
|
* brackets than opening ones, we remove one character from the end of
|
@@ -80,34 +62,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
|
80
62
|
*
|
81
63
|
* Examples (input text => output linked portion):
|
82
64
|
*
|
83
|
-
*
|
84
|
-
*
|
65
|
+
* http://www.pokemon.com/Pikachu_(Electric)
|
66
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
85
67
|
*
|
86
|
-
*
|
87
|
-
*
|
68
|
+
* http://www.pokemon.com/Pikachu_((Electric)
|
69
|
+
* => http://www.pokemon.com/Pikachu_((Electric)
|
88
70
|
*
|
89
|
-
*
|
90
|
-
*
|
71
|
+
* http://www.pokemon.com/Pikachu_(Electric))
|
72
|
+
* => http://www.pokemon.com/Pikachu_(Electric)
|
91
73
|
*
|
92
|
-
*
|
93
|
-
*
|
74
|
+
* http://www.pokemon.com/Pikachu_((Electric))
|
75
|
+
* => http://www.pokemon.com/Pikachu_((Electric))
|
94
76
|
*/
|
95
|
-
|
96
|
-
|
97
|
-
if (data[i] == copen)
|
98
|
-
opening++;
|
99
|
-
else if (data[i] == cclose)
|
100
|
-
closing++;
|
101
|
-
|
102
|
-
i++;
|
77
|
+
if (closing <= opening) {
|
78
|
+
return link_end;
|
103
79
|
}
|
80
|
+
closing--;
|
81
|
+
link_end--;
|
82
|
+
break;
|
83
|
+
case '?':
|
84
|
+
case '!':
|
85
|
+
case '.':
|
86
|
+
case ',':
|
87
|
+
case ':':
|
88
|
+
case '*':
|
89
|
+
case '_':
|
90
|
+
case '~':
|
91
|
+
case '\'':
|
92
|
+
case '"':
|
93
|
+
link_end--;
|
94
|
+
break;
|
95
|
+
case ';': {
|
96
|
+
size_t new_end = link_end - 2;
|
104
97
|
|
105
|
-
|
106
|
-
|
98
|
+
while (new_end > 0 && cmark_isalpha(data[new_end]))
|
99
|
+
new_end--;
|
107
100
|
|
108
|
-
link_end
|
109
|
-
|
101
|
+
if (new_end < link_end - 2 && data[new_end] == '&')
|
102
|
+
link_end = new_end;
|
103
|
+
else
|
104
|
+
link_end--;
|
110
105
|
break;
|
106
|
+
}
|
107
|
+
|
108
|
+
default:
|
109
|
+
return link_end;
|
110
|
+
}
|
111
111
|
}
|
112
112
|
|
113
113
|
return link_end;
|
@@ -116,7 +116,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
|
|
116
116
|
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
117
117
|
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
|
118
118
|
|
119
|
+
/* The purpose of this code is to reject urls that contain an underscore
|
120
|
+
* in one of the last two segments. Examples:
|
121
|
+
*
|
122
|
+
* www.xxx.yyy.zzz autolinked
|
123
|
+
* www.xxx.yyy._zzz not autolinked
|
124
|
+
* www.xxx._yyy.zzz not autolinked
|
125
|
+
* www._xxx.yyy.zzz autolinked
|
126
|
+
*
|
127
|
+
* The reason is that domain names are allowed to include underscores,
|
128
|
+
* but host names are not. See: https://stackoverflow.com/a/2183140
|
129
|
+
*/
|
119
130
|
for (i = 1; i < size - 1; i++) {
|
131
|
+
if (data[i] == '\\' && i < size - 2)
|
132
|
+
i++;
|
120
133
|
if (data[i] == '_')
|
121
134
|
uscore2++;
|
122
135
|
else if (data[i] == '.') {
|
@@ -127,8 +140,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
|
|
127
140
|
break;
|
128
141
|
}
|
129
142
|
|
130
|
-
if (uscore1 > 0 || uscore2 > 0)
|
131
|
-
|
143
|
+
if (uscore1 > 0 || uscore2 > 0) {
|
144
|
+
/* If the url is very long then accept it despite the underscores,
|
145
|
+
* to avoid quadratic behavior causing a denial of service. See:
|
146
|
+
* https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
|
147
|
+
* Reasonable urls are unlikely to have more than 10 segments, so
|
148
|
+
* this extra condition shouldn't have any impact on normal usage.
|
149
|
+
*/
|
150
|
+
if (np <= 10) {
|
151
|
+
return 0;
|
152
|
+
}
|
153
|
+
}
|
132
154
|
|
133
155
|
if (allow_short) {
|
134
156
|
/* We don't need a valid domain in the strict sense (with
|
@@ -165,7 +187,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
|
165
187
|
if (link_end == 0)
|
166
188
|
return NULL;
|
167
189
|
|
168
|
-
while (link_end < size && !cmark_isspace(data[link_end]))
|
190
|
+
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
169
191
|
link_end++;
|
170
192
|
|
171
193
|
link_end = autolink_delim(data, link_end);
|
@@ -225,7 +247,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
|
225
247
|
return 0;
|
226
248
|
|
227
249
|
link_end += domain_len;
|
228
|
-
while (link_end < size && !cmark_isspace(data[link_end]))
|
250
|
+
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
|
229
251
|
link_end++;
|
230
252
|
|
231
253
|
link_end = autolink_delim(data, link_end);
|
@@ -245,6 +267,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
|
|
245
267
|
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
246
268
|
text->as.literal = url;
|
247
269
|
cmark_node_append_child(node, text);
|
270
|
+
|
271
|
+
node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
|
272
|
+
|
273
|
+
node->start_column = text->start_column = max_rewind - rewind;
|
274
|
+
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
248
275
|
|
249
276
|
return node;
|
250
277
|
}
|
@@ -269,111 +296,167 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
|
|
269
296
|
// inline was finished in inlines.c.
|
270
297
|
}
|
271
298
|
|
272
|
-
static
|
273
|
-
|
274
|
-
// '@' only. Stop at a reasonable depth to ensure it cannot crash.
|
275
|
-
if (depth > 1000) return;
|
299
|
+
static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
|
300
|
+
size_t len = strlen(protocol);
|
276
301
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
size_t size = text->as.literal.len;
|
281
|
-
int rewind, max_rewind,
|
282
|
-
nb = 0, np = 0, ns = 0;
|
302
|
+
if (len > (max_rewind - rewind)) {
|
303
|
+
return false;
|
304
|
+
}
|
283
305
|
|
284
|
-
|
285
|
-
|
306
|
+
// Check that the protocol matches
|
307
|
+
if (memcmp(data - rewind - len, protocol, len) != 0) {
|
308
|
+
return false;
|
309
|
+
}
|
286
310
|
|
287
|
-
|
288
|
-
|
311
|
+
if (len == (max_rewind - rewind)) {
|
312
|
+
return true;
|
313
|
+
}
|
289
314
|
|
290
|
-
|
291
|
-
if (!at)
|
292
|
-
return;
|
315
|
+
char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
|
293
316
|
|
294
|
-
|
295
|
-
|
296
|
-
|
317
|
+
// Make sure the character before the protocol is non-alphanumeric
|
318
|
+
return !cmark_isalnum(prev_char);
|
319
|
+
}
|
297
320
|
|
298
|
-
|
299
|
-
|
321
|
+
static void postprocess_text(cmark_parser *parser, cmark_node *text) {
|
322
|
+
size_t start = 0;
|
323
|
+
size_t offset = 0;
|
324
|
+
// `text` is going to be split into a list of nodes containing shorter segments
|
325
|
+
// of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
|
326
|
+
// create references to it. Later, `cmark_chunk_to_cstr` is used to convert
|
327
|
+
// the references into allocated buffers. The detached buffer is freed before we
|
328
|
+
// return.
|
329
|
+
cmark_chunk detached_chunk = text->as.literal;
|
330
|
+
text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
|
331
|
+
|
332
|
+
uint8_t *data = text->as.literal.data;
|
333
|
+
size_t remaining = text->as.literal.len;
|
334
|
+
|
335
|
+
while (true) {
|
336
|
+
size_t link_end;
|
337
|
+
uint8_t *at;
|
338
|
+
bool auto_mailto = true;
|
339
|
+
bool is_xmpp = false;
|
340
|
+
size_t rewind;
|
341
|
+
size_t max_rewind;
|
342
|
+
size_t np = 0;
|
343
|
+
|
344
|
+
if (offset >= remaining)
|
345
|
+
break;
|
300
346
|
|
301
|
-
|
302
|
-
|
347
|
+
at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
|
348
|
+
if (!at)
|
349
|
+
break;
|
303
350
|
|
304
|
-
|
305
|
-
continue;
|
351
|
+
max_rewind = at - (data + start + offset);
|
306
352
|
|
307
|
-
|
308
|
-
|
353
|
+
found_at:
|
354
|
+
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
355
|
+
uint8_t c = data[start + offset + max_rewind - rewind - 1];
|
309
356
|
|
310
|
-
|
311
|
-
|
357
|
+
if (cmark_isalnum(c))
|
358
|
+
continue;
|
312
359
|
|
313
|
-
|
314
|
-
|
315
|
-
return;
|
316
|
-
}
|
360
|
+
if (strchr(".+-_", c) != NULL)
|
361
|
+
continue;
|
317
362
|
|
318
|
-
|
319
|
-
|
363
|
+
if (strchr(":", c) != NULL) {
|
364
|
+
if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
365
|
+
auto_mailto = false;
|
366
|
+
continue;
|
367
|
+
}
|
320
368
|
|
321
|
-
|
322
|
-
|
369
|
+
if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
|
370
|
+
auto_mailto = false;
|
371
|
+
is_xmpp = true;
|
372
|
+
continue;
|
373
|
+
}
|
374
|
+
}
|
323
375
|
|
324
|
-
if (c == '@')
|
325
|
-
nb++;
|
326
|
-
else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
|
327
|
-
np++;
|
328
|
-
else if (c != '-' && c != '_')
|
329
376
|
break;
|
330
|
-
|
377
|
+
}
|
331
378
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
}
|
379
|
+
if (rewind == 0) {
|
380
|
+
offset += max_rewind + 1;
|
381
|
+
continue;
|
382
|
+
}
|
337
383
|
|
338
|
-
|
384
|
+
assert(data[start + offset + max_rewind] == '@');
|
385
|
+
for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
|
386
|
+
uint8_t c = data[start + offset + max_rewind + link_end];
|
387
|
+
|
388
|
+
if (cmark_isalnum(c))
|
389
|
+
continue;
|
390
|
+
|
391
|
+
if (c == '@') {
|
392
|
+
// Found another '@', so go back and try again with an updated offset and max_rewind.
|
393
|
+
offset += max_rewind + 1;
|
394
|
+
max_rewind = link_end - 1;
|
395
|
+
goto found_at;
|
396
|
+
} else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
|
397
|
+
cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
|
398
|
+
np++;
|
399
|
+
else if (c == '/' && is_xmpp)
|
400
|
+
continue;
|
401
|
+
else if (c != '-' && c != '_')
|
402
|
+
break;
|
403
|
+
}
|
339
404
|
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
405
|
+
if (link_end < 2 || np == 0 ||
|
406
|
+
(!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
|
407
|
+
data[start + offset + max_rewind + link_end - 1] != '.')) {
|
408
|
+
offset += max_rewind + link_end;
|
409
|
+
continue;
|
410
|
+
}
|
344
411
|
|
345
|
-
|
412
|
+
link_end = autolink_delim(data + start + offset + max_rewind, link_end);
|
346
413
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
&
|
357
|
-
|
414
|
+
if (link_end == 0) {
|
415
|
+
offset += max_rewind + 1;
|
416
|
+
continue;
|
417
|
+
}
|
418
|
+
|
419
|
+
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
|
420
|
+
cmark_strbuf buf;
|
421
|
+
cmark_strbuf_init(parser->mem, &buf, 10);
|
422
|
+
if (auto_mailto)
|
423
|
+
cmark_strbuf_puts(&buf, "mailto:");
|
424
|
+
cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
|
425
|
+
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
|
426
|
+
|
427
|
+
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
428
|
+
cmark_chunk email = cmark_chunk_dup(
|
429
|
+
&detached_chunk,
|
430
|
+
(bufsize_t)(start + offset + max_rewind - rewind),
|
358
431
|
(bufsize_t)(link_end + rewind));
|
359
|
-
|
360
|
-
|
361
|
-
|
432
|
+
cmark_chunk_to_cstr(parser->mem, &email);
|
433
|
+
link_text->as.literal = email;
|
434
|
+
cmark_node_append_child(link_node, link_text);
|
362
435
|
|
363
|
-
|
436
|
+
cmark_node_insert_after(text, link_node);
|
364
437
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
cmark_chunk_to_cstr(parser->mem, &post->as.literal);
|
438
|
+
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
439
|
+
post->as.literal = cmark_chunk_dup(&detached_chunk,
|
440
|
+
(bufsize_t)(start + offset + max_rewind + link_end),
|
441
|
+
(bufsize_t)(remaining - offset - max_rewind - link_end));
|
370
442
|
|
371
|
-
|
443
|
+
cmark_node_insert_after(link_node, post);
|
372
444
|
|
373
|
-
|
374
|
-
|
445
|
+
text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
|
446
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
447
|
+
|
448
|
+
text = post;
|
449
|
+
start += offset + max_rewind + link_end;
|
450
|
+
remaining -= offset + max_rewind + link_end;
|
451
|
+
offset = 0;
|
452
|
+
}
|
453
|
+
|
454
|
+
// Convert the reference to allocated memory.
|
455
|
+
assert(!text->as.literal.alloc);
|
456
|
+
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
|
375
457
|
|
376
|
-
|
458
|
+
// Free the detached buffer.
|
459
|
+
cmark_chunk_free(parser->mem, &detached_chunk);
|
377
460
|
}
|
378
461
|
|
379
462
|
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
|
@@ -400,7 +483,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
|
|
400
483
|
}
|
401
484
|
|
402
485
|
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
|
403
|
-
postprocess_text(parser, node
|
486
|
+
postprocess_text(parser, node);
|
404
487
|
}
|
405
488
|
}
|
406
489
|
|