markly 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/arena.c +9 -8
  5. data/ext/markly/autolink.c +217 -134
  6. data/ext/markly/blocks.c +40 -4
  7. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  8. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  9. data/ext/markly/cmark-gfm.h +18 -2
  10. data/ext/markly/cmark-gfm_version.h +2 -2
  11. data/ext/markly/cmark.c +3 -3
  12. data/ext/markly/commonmark.c +33 -38
  13. data/ext/markly/ext_scanners.c +360 -640
  14. data/ext/markly/extconf.rb +8 -1
  15. data/ext/markly/footnotes.c +23 -0
  16. data/ext/markly/footnotes.h +2 -0
  17. data/ext/markly/html.c +60 -23
  18. data/ext/markly/inlines.c +216 -61
  19. data/ext/markly/latex.c +6 -4
  20. data/ext/markly/man.c +7 -11
  21. data/ext/markly/map.c +11 -4
  22. data/ext/markly/map.h +5 -2
  23. data/ext/markly/markly.c +582 -586
  24. data/ext/markly/markly.h +1 -1
  25. data/ext/markly/node.c +76 -10
  26. data/ext/markly/node.h +49 -1
  27. data/ext/markly/parser.h +1 -0
  28. data/ext/markly/plaintext.c +12 -29
  29. data/ext/markly/references.c +1 -0
  30. data/ext/markly/render.c +15 -7
  31. data/ext/markly/scanners.c +13916 -20242
  32. data/ext/markly/scanners.h +8 -0
  33. data/ext/markly/scanners.re +47 -8
  34. data/ext/markly/strikethrough.c +1 -1
  35. data/ext/markly/table.c +143 -74
  36. data/ext/markly/xml.c +2 -1
  37. data/lib/markly/flags.rb +16 -0
  38. data/lib/markly/node/inspect.rb +59 -53
  39. data/lib/markly/node.rb +125 -58
  40. data/lib/markly/renderer/generic.rb +136 -0
  41. data/lib/markly/renderer/html.rb +301 -0
  42. data/lib/markly/version.rb +7 -1
  43. data/lib/markly.rb +38 -32
  44. data/license.md +39 -0
  45. data/readme.md +36 -0
  46. data.tar.gz.sig +0 -0
  47. metadata +63 -31
  48. metadata.gz.sig +0 -0
  49. data/bin/markly +0 -94
  50. data/lib/markly/markly.so +0 -0
  51. data/lib/markly/renderer/html_renderer.rb +0 -281
  52. data/lib/markly/renderer.rb +0 -133
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78cfc5814d3b2666b77795de6b7e19ab99acf8b63ae6925bbb6158a69063fdda
4
- data.tar.gz: e063c28a6019b7623df0db94b2393e226d9e45c425d9230f77392a7097df0a9e
3
+ metadata.gz: 8f8bb5972577921f29b489d7ef01e509d5e9a771f1c864cad7e64e4b2c7129a8
4
+ data.tar.gz: 237d34500aca112b8aaede861618ad205a2518d86a04e7a9f1dcc06b4101de45
5
5
  SHA512:
6
- metadata.gz: 8e3ee35349c3aeafb8cb18cd586b564040c1db886c13dabcc2432628f6a692fd65d0c59c57ab0065f5ebdb6b8635e4bb3c7c6be598170c81164dcbf955324efe
7
- data.tar.gz: 818216b44c592bf1882ab799959aa6ec5d5cdf7c5ed8ead5f38fd8f5252226cb38192affb3a2dd22f5c30c6fd07d985d87b5b477d865aca41822e43e21d79c85
6
+ metadata.gz: ea92b1d1b3ae144d31c75445d58fed31201a9d0d5df4c8922ae85d1384f35abe630df8213f8f33a084fdff6e02222fdbb663364a898cff5a98af5f71b09a08b2
7
+ data.tar.gz: 8bf9723221304cdd0d7a8d9fbfb74d4f9f16badcf08d28bc919226e471a7612adf5e1bd6fa3ae4ac2f94b2108e46c6c5c26af4a38aec48e796af097b811d30fb
checksums.yaml.gz.sig ADDED
Binary file
data/conduct.md ADDED
@@ -0,0 +1,133 @@
1
+
2
+ # Contributor Covenant Code of Conduct
3
+
4
+ ## Our Pledge
5
+
6
+ We as members, contributors, and leaders pledge to make participation in our
7
+ community a harassment-free experience for everyone, regardless of age, body
8
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
9
+ identity and expression, level of experience, education, socio-economic status,
10
+ nationality, personal appearance, race, caste, color, religion, or sexual
11
+ identity and orientation.
12
+
13
+ We pledge to act and interact in ways that contribute to an open, welcoming,
14
+ diverse, inclusive, and healthy community.
15
+
16
+ ## Our Standards
17
+
18
+ Examples of behavior that contributes to a positive environment for our
19
+ community include:
20
+
21
+ * Demonstrating empathy and kindness toward other people
22
+ * Being respectful of differing opinions, viewpoints, and experiences
23
+ * Giving and gracefully accepting constructive feedback
24
+ * Accepting responsibility and apologizing to those affected by our mistakes,
25
+ and learning from the experience
26
+ * Focusing on what is best not just for us as individuals, but for the overall
27
+ community
28
+
29
+ Examples of unacceptable behavior include:
30
+
31
+ * The use of sexualized language or imagery, and sexual attention or advances of
32
+ any kind
33
+ * Trolling, insulting or derogatory comments, and personal or political attacks
34
+ * Public or private harassment
35
+ * Publishing others' private information, such as a physical or email address,
36
+ without their explicit permission
37
+ * Other conduct which could reasonably be considered inappropriate in a
38
+ professional setting
39
+
40
+ ## Enforcement Responsibilities
41
+
42
+ Community leaders are responsible for clarifying and enforcing our standards of
43
+ acceptable behavior and will take appropriate and fair corrective action in
44
+ response to any behavior that they deem inappropriate, threatening, offensive,
45
+ or harmful.
46
+
47
+ Community leaders have the right and responsibility to remove, edit, or reject
48
+ comments, commits, code, wiki edits, issues, and other contributions that are
49
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
50
+ decisions when appropriate.
51
+
52
+ ## Scope
53
+
54
+ This Code of Conduct applies within all community spaces, and also applies when
55
+ an individual is officially representing the community in public spaces.
56
+ Examples of representing our community include using an official e-mail address,
57
+ posting via an official social media account, or acting as an appointed
58
+ representative at an online or offline event.
59
+
60
+ ## Enforcement
61
+
62
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
63
+ reported to the community leaders responsible for enforcement at
64
+ [INSERT CONTACT METHOD].
65
+ All complaints will be reviewed and investigated promptly and fairly.
66
+
67
+ All community leaders are obligated to respect the privacy and security of the
68
+ reporter of any incident.
69
+
70
+ ## Enforcement Guidelines
71
+
72
+ Community leaders will follow these Community Impact Guidelines in determining
73
+ the consequences for any action they deem in violation of this Code of Conduct:
74
+
75
+ ### 1. Correction
76
+
77
+ **Community Impact**: Use of inappropriate language or other behavior deemed
78
+ unprofessional or unwelcome in the community.
79
+
80
+ **Consequence**: A private, written warning from community leaders, providing
81
+ clarity around the nature of the violation and an explanation of why the
82
+ behavior was inappropriate. A public apology may be requested.
83
+
84
+ ### 2. Warning
85
+
86
+ **Community Impact**: A violation through a single incident or series of
87
+ actions.
88
+
89
+ **Consequence**: A warning with consequences for continued behavior. No
90
+ interaction with the people involved, including unsolicited interaction with
91
+ those enforcing the Code of Conduct, for a specified period of time. This
92
+ includes avoiding interactions in community spaces as well as external channels
93
+ like social media. Violating these terms may lead to a temporary or permanent
94
+ ban.
95
+
96
+ ### 3. Temporary Ban
97
+
98
+ **Community Impact**: A serious violation of community standards, including
99
+ sustained inappropriate behavior.
100
+
101
+ **Consequence**: A temporary ban from any sort of interaction or public
102
+ communication with the community for a specified period of time. No public or
103
+ private interaction with the people involved, including unsolicited interaction
104
+ with those enforcing the Code of Conduct, is allowed during this period.
105
+ Violating these terms may lead to a permanent ban.
106
+
107
+ ### 4. Permanent Ban
108
+
109
+ **Community Impact**: Demonstrating a pattern of violation of community
110
+ standards, including sustained inappropriate behavior, harassment of an
111
+ individual, or aggression toward or disparagement of classes of individuals.
112
+
113
+ **Consequence**: A permanent ban from any sort of public interaction within the
114
+ community.
115
+
116
+ ## Attribution
117
+
118
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119
+ version 2.1, available at
120
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121
+
122
+ Community Impact Guidelines were inspired by
123
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124
+
125
+ For answers to common questions about this code of conduct, see the FAQ at
126
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127
+ [https://www.contributor-covenant.org/translations][translations].
128
+
129
+ [homepage]: https://www.contributor-covenant.org
130
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131
+ [Mozilla CoC]: https://github.com/mozilla/diversity
132
+ [FAQ]: https://www.contributor-covenant.org/faq
133
+ [translations]: https://www.contributor-covenant.org/translations
data/ext/markly/arena.c CHANGED
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
68
68
  const size_t align = sizeof(size_t) - 1;
69
69
  sz = (sz + align) & ~align;
70
70
 
71
+ struct arena_chunk *chunk;
71
72
  if (sz > A->sz) {
72
- A->prev = alloc_arena_chunk(sz, A->prev);
73
- return (uint8_t *) A->prev->ptr + sizeof(size_t);
73
+ A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74
+ } else if (sz > A->sz - A->used) {
75
+ A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76
+ } else {
77
+ chunk = A;
74
78
  }
75
- if (sz > A->sz - A->used) {
76
- A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77
- }
78
- void *ptr = (uint8_t *) A->ptr + A->used;
79
- A->used += sz;
79
+ void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80
+ chunk->used += sz;
80
81
  *((size_t *) ptr) = sz - sizeof(size_t);
81
82
  return (uint8_t *) ptr + sizeof(size_t);
82
83
  }
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
98
99
 
99
100
  cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100
101
 
101
- cmark_mem *cmark_get_arena_mem_allocator() {
102
+ cmark_mem *cmark_get_arena_mem_allocator(void) {
102
103
  return &CMARK_ARENA_MEM_ALLOCATOR;
103
104
  }
@@ -2,6 +2,7 @@
2
2
  #include <parser.h>
3
3
  #include <string.h>
4
4
  #include <utf8.h>
5
+ #include <stddef.h>
5
6
 
6
7
  #if defined(_WIN32)
7
8
  #define strncasecmp _strnicmp
@@ -35,44 +36,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
35
36
  }
36
37
 
37
38
  static size_t autolink_delim(uint8_t *data, size_t link_end) {
38
- uint8_t cclose, copen;
39
39
  size_t i;
40
+ size_t closing = 0;
41
+ size_t opening = 0;
40
42
 
41
- for (i = 0; i < link_end; ++i)
42
- if (data[i] == '<') {
43
+ for (i = 0; i < link_end; ++i) {
44
+ const uint8_t c = data[i];
45
+ if (c == '<') {
43
46
  link_end = i;
44
47
  break;
48
+ } else if (c == '(') {
49
+ opening++;
50
+ } else if (c == ')') {
51
+ closing++;
45
52
  }
53
+ }
46
54
 
47
55
  while (link_end > 0) {
48
- cclose = data[link_end - 1];
49
-
50
- switch (cclose) {
56
+ switch (data[link_end - 1]) {
51
57
  case ')':
52
- copen = '(';
53
- break;
54
- default:
55
- copen = 0;
56
- }
57
-
58
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
59
- link_end--;
60
-
61
- else if (data[link_end - 1] == ';') {
62
- size_t new_end = link_end - 2;
63
-
64
- while (new_end > 0 && cmark_isalpha(data[new_end]))
65
- new_end--;
66
-
67
- if (new_end < link_end - 2 && data[new_end] == '&')
68
- link_end = new_end;
69
- else
70
- link_end--;
71
- } else if (copen != 0) {
72
- size_t closing = 0;
73
- size_t opening = 0;
74
- i = 0;
75
-
76
58
  /* Allow any number of matching brackets (as recognised in copen/cclose)
77
59
  * at the end of the URL. If there is a greater number of closing
78
60
  * brackets than opening ones, we remove one character from the end of
@@ -80,34 +62,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
80
62
  *
81
63
  * Examples (input text => output linked portion):
82
64
  *
83
- * http://www.pokemon.com/Pikachu_(Electric)
84
- * => http://www.pokemon.com/Pikachu_(Electric)
65
+ * http://www.pokemon.com/Pikachu_(Electric)
66
+ * => http://www.pokemon.com/Pikachu_(Electric)
85
67
  *
86
- * http://www.pokemon.com/Pikachu_((Electric)
87
- * => http://www.pokemon.com/Pikachu_((Electric)
68
+ * http://www.pokemon.com/Pikachu_((Electric)
69
+ * => http://www.pokemon.com/Pikachu_((Electric)
88
70
  *
89
- * http://www.pokemon.com/Pikachu_(Electric))
90
- * => http://www.pokemon.com/Pikachu_(Electric)
71
+ * http://www.pokemon.com/Pikachu_(Electric))
72
+ * => http://www.pokemon.com/Pikachu_(Electric)
91
73
  *
92
- * http://www.pokemon.com/Pikachu_((Electric))
93
- * => http://www.pokemon.com/Pikachu_((Electric))
74
+ * http://www.pokemon.com/Pikachu_((Electric))
75
+ * => http://www.pokemon.com/Pikachu_((Electric))
94
76
  */
95
-
96
- while (i < link_end) {
97
- if (data[i] == copen)
98
- opening++;
99
- else if (data[i] == cclose)
100
- closing++;
101
-
102
- i++;
77
+ if (closing <= opening) {
78
+ return link_end;
103
79
  }
80
+ closing--;
81
+ link_end--;
82
+ break;
83
+ case '?':
84
+ case '!':
85
+ case '.':
86
+ case ',':
87
+ case ':':
88
+ case '*':
89
+ case '_':
90
+ case '~':
91
+ case '\'':
92
+ case '"':
93
+ link_end--;
94
+ break;
95
+ case ';': {
96
+ size_t new_end = link_end - 2;
104
97
 
105
- if (closing <= opening)
106
- break;
98
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
99
+ new_end--;
107
100
 
108
- link_end--;
109
- } else
101
+ if (new_end < link_end - 2 && data[new_end] == '&')
102
+ link_end = new_end;
103
+ else
104
+ link_end--;
110
105
  break;
106
+ }
107
+
108
+ default:
109
+ return link_end;
110
+ }
111
111
  }
112
112
 
113
113
  return link_end;
@@ -116,7 +116,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
116
116
  static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
117
117
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;
118
118
 
119
+ /* The purpose of this code is to reject urls that contain an underscore
120
+ * in one of the last two segments. Examples:
121
+ *
122
+ * www.xxx.yyy.zzz autolinked
123
+ * www.xxx.yyy._zzz not autolinked
124
+ * www.xxx._yyy.zzz not autolinked
125
+ * www._xxx.yyy.zzz autolinked
126
+ *
127
+ * The reason is that domain names are allowed to include underscores,
128
+ * but host names are not. See: https://stackoverflow.com/a/2183140
129
+ */
119
130
  for (i = 1; i < size - 1; i++) {
131
+ if (data[i] == '\\' && i < size - 2)
132
+ i++;
120
133
  if (data[i] == '_')
121
134
  uscore2++;
122
135
  else if (data[i] == '.') {
@@ -127,8 +140,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
127
140
  break;
128
141
  }
129
142
 
130
- if (uscore1 > 0 || uscore2 > 0)
131
- return 0;
143
+ if (uscore1 > 0 || uscore2 > 0) {
144
+ /* If the url is very long then accept it despite the underscores,
145
+ * to avoid quadratic behavior causing a denial of service. See:
146
+ * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
147
+ * Reasonable urls are unlikely to have more than 10 segments, so
148
+ * this extra condition shouldn't have any impact on normal usage.
149
+ */
150
+ if (np <= 10) {
151
+ return 0;
152
+ }
153
+ }
132
154
 
133
155
  if (allow_short) {
134
156
  /* We don't need a valid domain in the strict sense (with
@@ -165,7 +187,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
165
187
  if (link_end == 0)
166
188
  return NULL;
167
189
 
168
- while (link_end < size && !cmark_isspace(data[link_end]))
190
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
169
191
  link_end++;
170
192
 
171
193
  link_end = autolink_delim(data, link_end);
@@ -225,7 +247,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
225
247
  return 0;
226
248
 
227
249
  link_end += domain_len;
228
- while (link_end < size && !cmark_isspace(data[link_end]))
250
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
229
251
  link_end++;
230
252
 
231
253
  link_end = autolink_delim(data, link_end);
@@ -245,6 +267,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
245
267
  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
246
268
  text->as.literal = url;
247
269
  cmark_node_append_child(node, text);
270
+
271
+ node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
272
+
273
+ node->start_column = text->start_column = max_rewind - rewind;
274
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
248
275
 
249
276
  return node;
250
277
  }
@@ -269,111 +296,167 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
269
296
  // inline was finished in inlines.c.
270
297
  }
271
298
 
272
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
273
- // postprocess_text can recurse very deeply if there is a very long line of
274
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
275
- if (depth > 1000) return;
299
+ static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
300
+ size_t len = strlen(protocol);
276
301
 
277
- size_t link_end;
278
- uint8_t *data = text->as.literal.data,
279
- *at;
280
- size_t size = text->as.literal.len;
281
- int rewind, max_rewind,
282
- nb = 0, np = 0, ns = 0;
302
+ if (len > (max_rewind - rewind)) {
303
+ return false;
304
+ }
283
305
 
284
- if (offset < 0 || (size_t)offset >= size)
285
- return;
306
+ // Check that the protocol matches
307
+ if (memcmp(data - rewind - len, protocol, len) != 0) {
308
+ return false;
309
+ }
286
310
 
287
- data += offset;
288
- size -= offset;
311
+ if (len == (max_rewind - rewind)) {
312
+ return true;
313
+ }
289
314
 
290
- at = (uint8_t *)memchr(data, '@', size);
291
- if (!at)
292
- return;
315
+ char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
293
316
 
294
- max_rewind = (int)(at - data);
295
- data += max_rewind;
296
- size -= max_rewind;
317
+ // Make sure the character before the protocol is non-alphanumeric
318
+ return !cmark_isalnum(prev_char);
319
+ }
297
320
 
298
- for (rewind = 0; rewind < max_rewind; ++rewind) {
299
- uint8_t c = data[-rewind - 1];
321
+ static void postprocess_text(cmark_parser *parser, cmark_node *text) {
322
+ size_t start = 0;
323
+ size_t offset = 0;
324
+ // `text` is going to be split into a list of nodes containing shorter segments
325
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
326
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
327
+ // the references into allocated buffers. The detached buffer is freed before we
328
+ // return.
329
+ cmark_chunk detached_chunk = text->as.literal;
330
+ text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
331
+
332
+ uint8_t *data = text->as.literal.data;
333
+ size_t remaining = text->as.literal.len;
334
+
335
+ while (true) {
336
+ size_t link_end;
337
+ uint8_t *at;
338
+ bool auto_mailto = true;
339
+ bool is_xmpp = false;
340
+ size_t rewind;
341
+ size_t max_rewind;
342
+ size_t np = 0;
343
+
344
+ if (offset >= remaining)
345
+ break;
300
346
 
301
- if (cmark_isalnum(c))
302
- continue;
347
+ at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
348
+ if (!at)
349
+ break;
303
350
 
304
- if (strchr(".+-_", c) != NULL)
305
- continue;
351
+ max_rewind = at - (data + start + offset);
306
352
 
307
- if (c == '/')
308
- ns++;
353
+ found_at:
354
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
355
+ uint8_t c = data[start + offset + max_rewind - rewind - 1];
309
356
 
310
- break;
311
- }
357
+ if (cmark_isalnum(c))
358
+ continue;
312
359
 
313
- if (rewind == 0 || ns > 0) {
314
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
315
- return;
316
- }
360
+ if (strchr(".+-_", c) != NULL)
361
+ continue;
317
362
 
318
- for (link_end = 0; link_end < size; ++link_end) {
319
- uint8_t c = data[link_end];
363
+ if (strchr(":", c) != NULL) {
364
+ if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
365
+ auto_mailto = false;
366
+ continue;
367
+ }
320
368
 
321
- if (cmark_isalnum(c))
322
- continue;
369
+ if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
370
+ auto_mailto = false;
371
+ is_xmpp = true;
372
+ continue;
373
+ }
374
+ }
323
375
 
324
- if (c == '@')
325
- nb++;
326
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
327
- np++;
328
- else if (c != '-' && c != '_')
329
376
  break;
330
- }
377
+ }
331
378
 
332
- if (link_end < 2 || nb != 1 || np == 0 ||
333
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
334
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
335
- return;
336
- }
379
+ if (rewind == 0) {
380
+ offset += max_rewind + 1;
381
+ continue;
382
+ }
337
383
 
338
- link_end = autolink_delim(data, link_end);
384
+ assert(data[start + offset + max_rewind] == '@');
385
+ for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
386
+ uint8_t c = data[start + offset + max_rewind + link_end];
387
+
388
+ if (cmark_isalnum(c))
389
+ continue;
390
+
391
+ if (c == '@') {
392
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
393
+ offset += max_rewind + 1;
394
+ max_rewind = link_end - 1;
395
+ goto found_at;
396
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
397
+ cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
398
+ np++;
399
+ else if (c == '/' && is_xmpp)
400
+ continue;
401
+ else if (c != '-' && c != '_')
402
+ break;
403
+ }
339
404
 
340
- if (link_end == 0) {
341
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
342
- return;
343
- }
405
+ if (link_end < 2 || np == 0 ||
406
+ (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
407
+ data[start + offset + max_rewind + link_end - 1] != '.')) {
408
+ offset += max_rewind + link_end;
409
+ continue;
410
+ }
344
411
 
345
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
412
+ link_end = autolink_delim(data + start + offset + max_rewind, link_end);
346
413
 
347
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
348
- cmark_strbuf buf;
349
- cmark_strbuf_init(parser->mem, &buf, 10);
350
- cmark_strbuf_puts(&buf, "mailto:");
351
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
352
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
353
-
354
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
355
- cmark_chunk email = cmark_chunk_dup(
356
- &text->as.literal,
357
- offset + max_rewind - rewind,
414
+ if (link_end == 0) {
415
+ offset += max_rewind + 1;
416
+ continue;
417
+ }
418
+
419
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
420
+ cmark_strbuf buf;
421
+ cmark_strbuf_init(parser->mem, &buf, 10);
422
+ if (auto_mailto)
423
+ cmark_strbuf_puts(&buf, "mailto:");
424
+ cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
425
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
426
+
427
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
428
+ cmark_chunk email = cmark_chunk_dup(
429
+ &detached_chunk,
430
+ (bufsize_t)(start + offset + max_rewind - rewind),
358
431
  (bufsize_t)(link_end + rewind));
359
- cmark_chunk_to_cstr(parser->mem, &email);
360
- link_text->as.literal = email;
361
- cmark_node_append_child(link_node, link_text);
432
+ cmark_chunk_to_cstr(parser->mem, &email);
433
+ link_text->as.literal = email;
434
+ cmark_node_append_child(link_node, link_text);
362
435
 
363
- cmark_node_insert_after(text, link_node);
436
+ cmark_node_insert_after(text, link_node);
364
437
 
365
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
366
- post->as.literal = cmark_chunk_dup(&text->as.literal,
367
- (bufsize_t)(offset + max_rewind + link_end),
368
- (bufsize_t)(size - link_end));
369
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
438
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
439
+ post->as.literal = cmark_chunk_dup(&detached_chunk,
440
+ (bufsize_t)(start + offset + max_rewind + link_end),
441
+ (bufsize_t)(remaining - offset - max_rewind - link_end));
370
442
 
371
- cmark_node_insert_after(link_node, post);
443
+ cmark_node_insert_after(link_node, post);
372
444
 
373
- text->as.literal.len = offset + max_rewind - rewind;
374
- text->as.literal.data[text->as.literal.len] = 0;
445
+ text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
446
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
447
+
448
+ text = post;
449
+ start += offset + max_rewind + link_end;
450
+ remaining -= offset + max_rewind + link_end;
451
+ offset = 0;
452
+ }
453
+
454
+ // Convert the reference to allocated memory.
455
+ assert(!text->as.literal.alloc);
456
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
375
457
 
376
- postprocess_text(parser, post, 0, depth + 1);
458
+ // Free the detached buffer.
459
+ cmark_chunk_free(parser->mem, &detached_chunk);
377
460
  }
378
461
 
379
462
  static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
@@ -400,7 +483,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
400
483
  }
401
484
 
402
485
  if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
403
- postprocess_text(parser, node, 0, /*depth*/0);
486
+ postprocess_text(parser, node);
404
487
  }
405
488
  }
406
489