markly 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/arena.c +9 -8
  5. data/ext/markly/autolink.c +217 -134
  6. data/ext/markly/blocks.c +27 -2
  7. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  8. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  9. data/ext/markly/cmark-gfm.h +18 -2
  10. data/ext/markly/cmark.c +3 -3
  11. data/ext/markly/commonmark.c +19 -34
  12. data/ext/markly/extconf.rb +8 -1
  13. data/ext/markly/html.c +22 -6
  14. data/ext/markly/inlines.c +148 -51
  15. data/ext/markly/latex.c +6 -4
  16. data/ext/markly/man.c +7 -11
  17. data/ext/markly/map.c +11 -4
  18. data/ext/markly/map.h +5 -2
  19. data/ext/markly/markly.c +582 -586
  20. data/ext/markly/markly.h +1 -1
  21. data/ext/markly/node.c +76 -10
  22. data/ext/markly/node.h +42 -1
  23. data/ext/markly/parser.h +1 -0
  24. data/ext/markly/plaintext.c +12 -29
  25. data/ext/markly/references.c +1 -0
  26. data/ext/markly/render.c +15 -7
  27. data/ext/markly/scanners.c +13916 -10380
  28. data/ext/markly/scanners.h +8 -0
  29. data/ext/markly/scanners.re +47 -8
  30. data/ext/markly/strikethrough.c +1 -1
  31. data/ext/markly/table.c +81 -31
  32. data/ext/markly/xml.c +2 -1
  33. data/lib/markly/flags.rb +16 -0
  34. data/lib/markly/node/inspect.rb +59 -53
  35. data/lib/markly/node.rb +125 -58
  36. data/lib/markly/renderer/generic.rb +129 -124
  37. data/lib/markly/renderer/html.rb +294 -275
  38. data/lib/markly/version.rb +7 -1
  39. data/lib/markly.rb +36 -30
  40. data/license.md +39 -0
  41. data/readme.md +36 -0
  42. data.tar.gz.sig +0 -0
  43. metadata +61 -29
  44. metadata.gz.sig +0 -0
  45. data/bin/markly +0 -94
  46. data/lib/markly/markly.bundle +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e62908560f536f22b024d0fc29c02ee109a99aa6b4216d58e82fc97d7525171a
4
- data.tar.gz: b6b7eae8d1579556a4461aa9d338cb1d86433fdcb20c6ac70983d0cdedeead6e
3
+ metadata.gz: 8f8bb5972577921f29b489d7ef01e509d5e9a771f1c864cad7e64e4b2c7129a8
4
+ data.tar.gz: 237d34500aca112b8aaede861618ad205a2518d86a04e7a9f1dcc06b4101de45
5
5
  SHA512:
6
- metadata.gz: daee4db1b759180ea27886da828fa1301aac0adb1833203b80b635ee815e61bebb8caedad03853cd4d4231de748781cefa6d341d3172d3a387e170cbd806cc47
7
- data.tar.gz: a0f79ef83f73d32c8b1b14a21c13ad3c20574abed043e29cb0057c31c9b85ccb051244462efb0dfd508a3315232083de3bafefa0dc4d24fd5106bc966721bcdf
6
+ metadata.gz: ea92b1d1b3ae144d31c75445d58fed31201a9d0d5df4c8922ae85d1384f35abe630df8213f8f33a084fdff6e02222fdbb663364a898cff5a98af5f71b09a08b2
7
+ data.tar.gz: 8bf9723221304cdd0d7a8d9fbfb74d4f9f16badcf08d28bc919226e471a7612adf5e1bd6fa3ae4ac2f94b2108e46c6c5c26af4a38aec48e796af097b811d30fb
checksums.yaml.gz.sig ADDED
Binary file
data/conduct.md ADDED
@@ -0,0 +1,133 @@
1
+
2
+ # Contributor Covenant Code of Conduct
3
+
4
+ ## Our Pledge
5
+
6
+ We as members, contributors, and leaders pledge to make participation in our
7
+ community a harassment-free experience for everyone, regardless of age, body
8
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
9
+ identity and expression, level of experience, education, socio-economic status,
10
+ nationality, personal appearance, race, caste, color, religion, or sexual
11
+ identity and orientation.
12
+
13
+ We pledge to act and interact in ways that contribute to an open, welcoming,
14
+ diverse, inclusive, and healthy community.
15
+
16
+ ## Our Standards
17
+
18
+ Examples of behavior that contributes to a positive environment for our
19
+ community include:
20
+
21
+ * Demonstrating empathy and kindness toward other people
22
+ * Being respectful of differing opinions, viewpoints, and experiences
23
+ * Giving and gracefully accepting constructive feedback
24
+ * Accepting responsibility and apologizing to those affected by our mistakes,
25
+ and learning from the experience
26
+ * Focusing on what is best not just for us as individuals, but for the overall
27
+ community
28
+
29
+ Examples of unacceptable behavior include:
30
+
31
+ * The use of sexualized language or imagery, and sexual attention or advances of
32
+ any kind
33
+ * Trolling, insulting or derogatory comments, and personal or political attacks
34
+ * Public or private harassment
35
+ * Publishing others' private information, such as a physical or email address,
36
+ without their explicit permission
37
+ * Other conduct which could reasonably be considered inappropriate in a
38
+ professional setting
39
+
40
+ ## Enforcement Responsibilities
41
+
42
+ Community leaders are responsible for clarifying and enforcing our standards of
43
+ acceptable behavior and will take appropriate and fair corrective action in
44
+ response to any behavior that they deem inappropriate, threatening, offensive,
45
+ or harmful.
46
+
47
+ Community leaders have the right and responsibility to remove, edit, or reject
48
+ comments, commits, code, wiki edits, issues, and other contributions that are
49
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
50
+ decisions when appropriate.
51
+
52
+ ## Scope
53
+
54
+ This Code of Conduct applies within all community spaces, and also applies when
55
+ an individual is officially representing the community in public spaces.
56
+ Examples of representing our community include using an official e-mail address,
57
+ posting via an official social media account, or acting as an appointed
58
+ representative at an online or offline event.
59
+
60
+ ## Enforcement
61
+
62
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
63
+ reported to the community leaders responsible for enforcement at
64
+ [INSERT CONTACT METHOD].
65
+ All complaints will be reviewed and investigated promptly and fairly.
66
+
67
+ All community leaders are obligated to respect the privacy and security of the
68
+ reporter of any incident.
69
+
70
+ ## Enforcement Guidelines
71
+
72
+ Community leaders will follow these Community Impact Guidelines in determining
73
+ the consequences for any action they deem in violation of this Code of Conduct:
74
+
75
+ ### 1. Correction
76
+
77
+ **Community Impact**: Use of inappropriate language or other behavior deemed
78
+ unprofessional or unwelcome in the community.
79
+
80
+ **Consequence**: A private, written warning from community leaders, providing
81
+ clarity around the nature of the violation and an explanation of why the
82
+ behavior was inappropriate. A public apology may be requested.
83
+
84
+ ### 2. Warning
85
+
86
+ **Community Impact**: A violation through a single incident or series of
87
+ actions.
88
+
89
+ **Consequence**: A warning with consequences for continued behavior. No
90
+ interaction with the people involved, including unsolicited interaction with
91
+ those enforcing the Code of Conduct, for a specified period of time. This
92
+ includes avoiding interactions in community spaces as well as external channels
93
+ like social media. Violating these terms may lead to a temporary or permanent
94
+ ban.
95
+
96
+ ### 3. Temporary Ban
97
+
98
+ **Community Impact**: A serious violation of community standards, including
99
+ sustained inappropriate behavior.
100
+
101
+ **Consequence**: A temporary ban from any sort of interaction or public
102
+ communication with the community for a specified period of time. No public or
103
+ private interaction with the people involved, including unsolicited interaction
104
+ with those enforcing the Code of Conduct, is allowed during this period.
105
+ Violating these terms may lead to a permanent ban.
106
+
107
+ ### 4. Permanent Ban
108
+
109
+ **Community Impact**: Demonstrating a pattern of violation of community
110
+ standards, including sustained inappropriate behavior, harassment of an
111
+ individual, or aggression toward or disparagement of classes of individuals.
112
+
113
+ **Consequence**: A permanent ban from any sort of public interaction within the
114
+ community.
115
+
116
+ ## Attribution
117
+
118
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119
+ version 2.1, available at
120
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121
+
122
+ Community Impact Guidelines were inspired by
123
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124
+
125
+ For answers to common questions about this code of conduct, see the FAQ at
126
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127
+ [https://www.contributor-covenant.org/translations][translations].
128
+
129
+ [homepage]: https://www.contributor-covenant.org
130
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131
+ [Mozilla CoC]: https://github.com/mozilla/diversity
132
+ [FAQ]: https://www.contributor-covenant.org/faq
133
+ [translations]: https://www.contributor-covenant.org/translations
data/ext/markly/arena.c CHANGED
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
68
68
  const size_t align = sizeof(size_t) - 1;
69
69
  sz = (sz + align) & ~align;
70
70
 
71
+ struct arena_chunk *chunk;
71
72
  if (sz > A->sz) {
72
- A->prev = alloc_arena_chunk(sz, A->prev);
73
- return (uint8_t *) A->prev->ptr + sizeof(size_t);
73
+ A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74
+ } else if (sz > A->sz - A->used) {
75
+ A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76
+ } else {
77
+ chunk = A;
74
78
  }
75
- if (sz > A->sz - A->used) {
76
- A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77
- }
78
- void *ptr = (uint8_t *) A->ptr + A->used;
79
- A->used += sz;
79
+ void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80
+ chunk->used += sz;
80
81
  *((size_t *) ptr) = sz - sizeof(size_t);
81
82
  return (uint8_t *) ptr + sizeof(size_t);
82
83
  }
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
98
99
 
99
100
  cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100
101
 
101
- cmark_mem *cmark_get_arena_mem_allocator() {
102
+ cmark_mem *cmark_get_arena_mem_allocator(void) {
102
103
  return &CMARK_ARENA_MEM_ALLOCATOR;
103
104
  }
@@ -2,6 +2,7 @@
2
2
  #include <parser.h>
3
3
  #include <string.h>
4
4
  #include <utf8.h>
5
+ #include <stddef.h>
5
6
 
6
7
  #if defined(_WIN32)
7
8
  #define strncasecmp _strnicmp
@@ -35,44 +36,25 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
35
36
  }
36
37
 
37
38
  static size_t autolink_delim(uint8_t *data, size_t link_end) {
38
- uint8_t cclose, copen;
39
39
  size_t i;
40
+ size_t closing = 0;
41
+ size_t opening = 0;
40
42
 
41
- for (i = 0; i < link_end; ++i)
42
- if (data[i] == '<') {
43
+ for (i = 0; i < link_end; ++i) {
44
+ const uint8_t c = data[i];
45
+ if (c == '<') {
43
46
  link_end = i;
44
47
  break;
48
+ } else if (c == '(') {
49
+ opening++;
50
+ } else if (c == ')') {
51
+ closing++;
45
52
  }
53
+ }
46
54
 
47
55
  while (link_end > 0) {
48
- cclose = data[link_end - 1];
49
-
50
- switch (cclose) {
56
+ switch (data[link_end - 1]) {
51
57
  case ')':
52
- copen = '(';
53
- break;
54
- default:
55
- copen = 0;
56
- }
57
-
58
- if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
59
- link_end--;
60
-
61
- else if (data[link_end - 1] == ';') {
62
- size_t new_end = link_end - 2;
63
-
64
- while (new_end > 0 && cmark_isalpha(data[new_end]))
65
- new_end--;
66
-
67
- if (new_end < link_end - 2 && data[new_end] == '&')
68
- link_end = new_end;
69
- else
70
- link_end--;
71
- } else if (copen != 0) {
72
- size_t closing = 0;
73
- size_t opening = 0;
74
- i = 0;
75
-
76
58
  /* Allow any number of matching brackets (as recognised in copen/cclose)
77
59
  * at the end of the URL. If there is a greater number of closing
78
60
  * brackets than opening ones, we remove one character from the end of
@@ -80,34 +62,52 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
80
62
  *
81
63
  * Examples (input text => output linked portion):
82
64
  *
83
- * http://www.pokemon.com/Pikachu_(Electric)
84
- * => http://www.pokemon.com/Pikachu_(Electric)
65
+ * http://www.pokemon.com/Pikachu_(Electric)
66
+ * => http://www.pokemon.com/Pikachu_(Electric)
85
67
  *
86
- * http://www.pokemon.com/Pikachu_((Electric)
87
- * => http://www.pokemon.com/Pikachu_((Electric)
68
+ * http://www.pokemon.com/Pikachu_((Electric)
69
+ * => http://www.pokemon.com/Pikachu_((Electric)
88
70
  *
89
- * http://www.pokemon.com/Pikachu_(Electric))
90
- * => http://www.pokemon.com/Pikachu_(Electric)
71
+ * http://www.pokemon.com/Pikachu_(Electric))
72
+ * => http://www.pokemon.com/Pikachu_(Electric)
91
73
  *
92
- * http://www.pokemon.com/Pikachu_((Electric))
93
- * => http://www.pokemon.com/Pikachu_((Electric))
74
+ * http://www.pokemon.com/Pikachu_((Electric))
75
+ * => http://www.pokemon.com/Pikachu_((Electric))
94
76
  */
95
-
96
- while (i < link_end) {
97
- if (data[i] == copen)
98
- opening++;
99
- else if (data[i] == cclose)
100
- closing++;
101
-
102
- i++;
77
+ if (closing <= opening) {
78
+ return link_end;
103
79
  }
80
+ closing--;
81
+ link_end--;
82
+ break;
83
+ case '?':
84
+ case '!':
85
+ case '.':
86
+ case ',':
87
+ case ':':
88
+ case '*':
89
+ case '_':
90
+ case '~':
91
+ case '\'':
92
+ case '"':
93
+ link_end--;
94
+ break;
95
+ case ';': {
96
+ size_t new_end = link_end - 2;
104
97
 
105
- if (closing <= opening)
106
- break;
98
+ while (new_end > 0 && cmark_isalpha(data[new_end]))
99
+ new_end--;
107
100
 
108
- link_end--;
109
- } else
101
+ if (new_end < link_end - 2 && data[new_end] == '&')
102
+ link_end = new_end;
103
+ else
104
+ link_end--;
110
105
  break;
106
+ }
107
+
108
+ default:
109
+ return link_end;
110
+ }
111
111
  }
112
112
 
113
113
  return link_end;
@@ -116,7 +116,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
116
116
  static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
117
117
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;
118
118
 
119
+ /* The purpose of this code is to reject urls that contain an underscore
120
+ * in one of the last two segments. Examples:
121
+ *
122
+ * www.xxx.yyy.zzz autolinked
123
+ * www.xxx.yyy._zzz not autolinked
124
+ * www.xxx._yyy.zzz not autolinked
125
+ * www._xxx.yyy.zzz autolinked
126
+ *
127
+ * The reason is that domain names are allowed to include underscores,
128
+ * but host names are not. See: https://stackoverflow.com/a/2183140
129
+ */
119
130
  for (i = 1; i < size - 1; i++) {
131
+ if (data[i] == '\\' && i < size - 2)
132
+ i++;
120
133
  if (data[i] == '_')
121
134
  uscore2++;
122
135
  else if (data[i] == '.') {
@@ -127,8 +140,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
127
140
  break;
128
141
  }
129
142
 
130
- if (uscore1 > 0 || uscore2 > 0)
131
- return 0;
143
+ if (uscore1 > 0 || uscore2 > 0) {
144
+ /* If the url is very long then accept it despite the underscores,
145
+ * to avoid quadratic behavior causing a denial of service. See:
146
+ * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
147
+ * Reasonable urls are unlikely to have more than 10 segments, so
148
+ * this extra condition shouldn't have any impact on normal usage.
149
+ */
150
+ if (np <= 10) {
151
+ return 0;
152
+ }
153
+ }
132
154
 
133
155
  if (allow_short) {
134
156
  /* We don't need a valid domain in the strict sense (with
@@ -165,7 +187,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
165
187
  if (link_end == 0)
166
188
  return NULL;
167
189
 
168
- while (link_end < size && !cmark_isspace(data[link_end]))
190
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
169
191
  link_end++;
170
192
 
171
193
  link_end = autolink_delim(data, link_end);
@@ -225,7 +247,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
225
247
  return 0;
226
248
 
227
249
  link_end += domain_len;
228
- while (link_end < size && !cmark_isspace(data[link_end]))
250
+ while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
229
251
  link_end++;
230
252
 
231
253
  link_end = autolink_delim(data, link_end);
@@ -245,6 +267,11 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
245
267
  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
246
268
  text->as.literal = url;
247
269
  cmark_node_append_child(node, text);
270
+
271
+ node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
272
+
273
+ node->start_column = text->start_column = max_rewind - rewind;
274
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
248
275
 
249
276
  return node;
250
277
  }
@@ -269,111 +296,167 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
269
296
  // inline was finished in inlines.c.
270
297
  }
271
298
 
272
- static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
273
- // postprocess_text can recurse very deeply if there is a very long line of
274
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
275
- if (depth > 1000) return;
299
+ static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
300
+ size_t len = strlen(protocol);
276
301
 
277
- size_t link_end;
278
- uint8_t *data = text->as.literal.data,
279
- *at;
280
- size_t size = text->as.literal.len;
281
- int rewind, max_rewind,
282
- nb = 0, np = 0, ns = 0;
302
+ if (len > (max_rewind - rewind)) {
303
+ return false;
304
+ }
283
305
 
284
- if (offset < 0 || (size_t)offset >= size)
285
- return;
306
+ // Check that the protocol matches
307
+ if (memcmp(data - rewind - len, protocol, len) != 0) {
308
+ return false;
309
+ }
286
310
 
287
- data += offset;
288
- size -= offset;
311
+ if (len == (max_rewind - rewind)) {
312
+ return true;
313
+ }
289
314
 
290
- at = (uint8_t *)memchr(data, '@', size);
291
- if (!at)
292
- return;
315
+ char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
293
316
 
294
- max_rewind = (int)(at - data);
295
- data += max_rewind;
296
- size -= max_rewind;
317
+ // Make sure the character before the protocol is non-alphanumeric
318
+ return !cmark_isalnum(prev_char);
319
+ }
297
320
 
298
- for (rewind = 0; rewind < max_rewind; ++rewind) {
299
- uint8_t c = data[-rewind - 1];
321
+ static void postprocess_text(cmark_parser *parser, cmark_node *text) {
322
+ size_t start = 0;
323
+ size_t offset = 0;
324
+ // `text` is going to be split into a list of nodes containing shorter segments
325
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
326
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
327
+ // the references into allocated buffers. The detached buffer is freed before we
328
+ // return.
329
+ cmark_chunk detached_chunk = text->as.literal;
330
+ text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
331
+
332
+ uint8_t *data = text->as.literal.data;
333
+ size_t remaining = text->as.literal.len;
334
+
335
+ while (true) {
336
+ size_t link_end;
337
+ uint8_t *at;
338
+ bool auto_mailto = true;
339
+ bool is_xmpp = false;
340
+ size_t rewind;
341
+ size_t max_rewind;
342
+ size_t np = 0;
343
+
344
+ if (offset >= remaining)
345
+ break;
300
346
 
301
- if (cmark_isalnum(c))
302
- continue;
347
+ at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
348
+ if (!at)
349
+ break;
303
350
 
304
- if (strchr(".+-_", c) != NULL)
305
- continue;
351
+ max_rewind = at - (data + start + offset);
306
352
 
307
- if (c == '/')
308
- ns++;
353
+ found_at:
354
+ for (rewind = 0; rewind < max_rewind; ++rewind) {
355
+ uint8_t c = data[start + offset + max_rewind - rewind - 1];
309
356
 
310
- break;
311
- }
357
+ if (cmark_isalnum(c))
358
+ continue;
312
359
 
313
- if (rewind == 0 || ns > 0) {
314
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
315
- return;
316
- }
360
+ if (strchr(".+-_", c) != NULL)
361
+ continue;
317
362
 
318
- for (link_end = 0; link_end < size; ++link_end) {
319
- uint8_t c = data[link_end];
363
+ if (strchr(":", c) != NULL) {
364
+ if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
365
+ auto_mailto = false;
366
+ continue;
367
+ }
320
368
 
321
- if (cmark_isalnum(c))
322
- continue;
369
+ if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
370
+ auto_mailto = false;
371
+ is_xmpp = true;
372
+ continue;
373
+ }
374
+ }
323
375
 
324
- if (c == '@')
325
- nb++;
326
- else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
327
- np++;
328
- else if (c != '-' && c != '_')
329
376
  break;
330
- }
377
+ }
331
378
 
332
- if (link_end < 2 || nb != 1 || np == 0 ||
333
- (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
334
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
335
- return;
336
- }
379
+ if (rewind == 0) {
380
+ offset += max_rewind + 1;
381
+ continue;
382
+ }
337
383
 
338
- link_end = autolink_delim(data, link_end);
384
+ assert(data[start + offset + max_rewind] == '@');
385
+ for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
386
+ uint8_t c = data[start + offset + max_rewind + link_end];
387
+
388
+ if (cmark_isalnum(c))
389
+ continue;
390
+
391
+ if (c == '@') {
392
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
393
+ offset += max_rewind + 1;
394
+ max_rewind = link_end - 1;
395
+ goto found_at;
396
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
397
+ cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
398
+ np++;
399
+ else if (c == '/' && is_xmpp)
400
+ continue;
401
+ else if (c != '-' && c != '_')
402
+ break;
403
+ }
339
404
 
340
- if (link_end == 0) {
341
- postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
342
- return;
343
- }
405
+ if (link_end < 2 || np == 0 ||
406
+ (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
407
+ data[start + offset + max_rewind + link_end - 1] != '.')) {
408
+ offset += max_rewind + link_end;
409
+ continue;
410
+ }
344
411
 
345
- cmark_chunk_to_cstr(parser->mem, &text->as.literal);
412
+ link_end = autolink_delim(data + start + offset + max_rewind, link_end);
346
413
 
347
- cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
348
- cmark_strbuf buf;
349
- cmark_strbuf_init(parser->mem, &buf, 10);
350
- cmark_strbuf_puts(&buf, "mailto:");
351
- cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
352
- link_node->as.link.url = cmark_chunk_buf_detach(&buf);
353
-
354
- cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
355
- cmark_chunk email = cmark_chunk_dup(
356
- &text->as.literal,
357
- offset + max_rewind - rewind,
414
+ if (link_end == 0) {
415
+ offset += max_rewind + 1;
416
+ continue;
417
+ }
418
+
419
+ cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
420
+ cmark_strbuf buf;
421
+ cmark_strbuf_init(parser->mem, &buf, 10);
422
+ if (auto_mailto)
423
+ cmark_strbuf_puts(&buf, "mailto:");
424
+ cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
425
+ link_node->as.link.url = cmark_chunk_buf_detach(&buf);
426
+
427
+ cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
428
+ cmark_chunk email = cmark_chunk_dup(
429
+ &detached_chunk,
430
+ (bufsize_t)(start + offset + max_rewind - rewind),
358
431
  (bufsize_t)(link_end + rewind));
359
- cmark_chunk_to_cstr(parser->mem, &email);
360
- link_text->as.literal = email;
361
- cmark_node_append_child(link_node, link_text);
432
+ cmark_chunk_to_cstr(parser->mem, &email);
433
+ link_text->as.literal = email;
434
+ cmark_node_append_child(link_node, link_text);
362
435
 
363
- cmark_node_insert_after(text, link_node);
436
+ cmark_node_insert_after(text, link_node);
364
437
 
365
- cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
366
- post->as.literal = cmark_chunk_dup(&text->as.literal,
367
- (bufsize_t)(offset + max_rewind + link_end),
368
- (bufsize_t)(size - link_end));
369
- cmark_chunk_to_cstr(parser->mem, &post->as.literal);
438
+ cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
439
+ post->as.literal = cmark_chunk_dup(&detached_chunk,
440
+ (bufsize_t)(start + offset + max_rewind + link_end),
441
+ (bufsize_t)(remaining - offset - max_rewind - link_end));
370
442
 
371
- cmark_node_insert_after(link_node, post);
443
+ cmark_node_insert_after(link_node, post);
372
444
 
373
- text->as.literal.len = offset + max_rewind - rewind;
374
- text->as.literal.data[text->as.literal.len] = 0;
445
+ text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
446
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
447
+
448
+ text = post;
449
+ start += offset + max_rewind + link_end;
450
+ remaining -= offset + max_rewind + link_end;
451
+ offset = 0;
452
+ }
453
+
454
+ // Convert the reference to allocated memory.
455
+ assert(!text->as.literal.alloc);
456
+ cmark_chunk_to_cstr(parser->mem, &text->as.literal);
375
457
 
376
- postprocess_text(parser, post, 0, depth + 1);
458
+ // Free the detached buffer.
459
+ cmark_chunk_free(parser->mem, &detached_chunk);
377
460
  }
378
461
 
379
462
  static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
@@ -400,7 +483,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
400
483
  }
401
484
 
402
485
  if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
403
- postprocess_text(parser, node, 0, /*depth*/0);
486
+ postprocess_text(parser, node);
404
487
  }
405
488
  }
406
489