emdp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +124 -0
  3. package/dist/cjs/cli.js +36 -0
  4. package/dist/cjs/gfm.js +26 -0
  5. package/dist/cjs/index.js +26 -0
  6. package/dist/cjs/parser/block-parser.js +644 -0
  7. package/dist/cjs/parser/blocks/blockquote.js +28 -0
  8. package/dist/cjs/parser/blocks/code-block-fenced.js +58 -0
  9. package/dist/cjs/parser/blocks/code-block-indented.js +47 -0
  10. package/dist/cjs/parser/blocks/heading-atx.js +29 -0
  11. package/dist/cjs/parser/blocks/heading-setext.js +24 -0
  12. package/dist/cjs/parser/blocks/html-block.js +83 -0
  13. package/dist/cjs/parser/blocks/link-reference.js +109 -0
  14. package/dist/cjs/parser/blocks/list.js +155 -0
  15. package/dist/cjs/parser/blocks/paragraph.js +20 -0
  16. package/dist/cjs/parser/blocks/table.js +163 -0
  17. package/dist/cjs/parser/blocks/task-list.js +66 -0
  18. package/dist/cjs/parser/blocks/thematic-break.js +17 -0
  19. package/dist/cjs/parser/entities.js +2133 -0
  20. package/dist/cjs/parser/gfm/block-parser.js +773 -0
  21. package/dist/cjs/parser/gfm/index.js +125 -0
  22. package/dist/cjs/parser/gfm/inline-parser.js +813 -0
  23. package/dist/cjs/parser/gfm/renderer.js +513 -0
  24. package/dist/cjs/parser/index.js +104 -0
  25. package/dist/cjs/parser/inline-parser.js +564 -0
  26. package/dist/cjs/parser/inlines/autolink-extended.js +364 -0
  27. package/dist/cjs/parser/inlines/autolink.js +44 -0
  28. package/dist/cjs/parser/inlines/code-span.js +48 -0
  29. package/dist/cjs/parser/inlines/emphasis.js +64 -0
  30. package/dist/cjs/parser/inlines/entity.js +25 -0
  31. package/dist/cjs/parser/inlines/escape.js +25 -0
  32. package/dist/cjs/parser/inlines/footnote.js +41 -0
  33. package/dist/cjs/parser/inlines/hard-break.js +45 -0
  34. package/dist/cjs/parser/inlines/image.js +9 -0
  35. package/dist/cjs/parser/inlines/link.js +166 -0
  36. package/dist/cjs/parser/inlines/soft-break.js +18 -0
  37. package/dist/cjs/parser/inlines/strikethrough.js +48 -0
  38. package/dist/cjs/parser/inlines/text.js +20 -0
  39. package/dist/cjs/parser/renderer.js +345 -0
  40. package/dist/cjs/parser/types.js +5 -0
  41. package/dist/cjs/parser/utils.js +277 -0
  42. package/dist/cli.d.ts +6 -0
  43. package/dist/cli.js +36 -0
  44. package/dist/esm/cli.js +34 -0
  45. package/dist/esm/gfm.js +5 -0
  46. package/dist/esm/index.js +5 -0
  47. package/dist/esm/package.json +3 -0
  48. package/dist/esm/parser/block-parser.js +640 -0
  49. package/dist/esm/parser/blocks/blockquote.js +22 -0
  50. package/dist/esm/parser/blocks/code-block-fenced.js +52 -0
  51. package/dist/esm/parser/blocks/code-block-indented.js +42 -0
  52. package/dist/esm/parser/blocks/heading-atx.js +24 -0
  53. package/dist/esm/parser/blocks/heading-setext.js +19 -0
  54. package/dist/esm/parser/blocks/html-block.js +77 -0
  55. package/dist/esm/parser/blocks/link-reference.js +105 -0
  56. package/dist/esm/parser/blocks/list.js +145 -0
  57. package/dist/esm/parser/blocks/paragraph.js +15 -0
  58. package/dist/esm/parser/blocks/table.js +152 -0
  59. package/dist/esm/parser/blocks/task-list.js +61 -0
  60. package/dist/esm/parser/blocks/thematic-break.js +13 -0
  61. package/dist/esm/parser/entities.js +2130 -0
  62. package/dist/esm/parser/gfm/block-parser.js +769 -0
  63. package/dist/esm/parser/gfm/index.js +115 -0
  64. package/dist/esm/parser/gfm/inline-parser.js +809 -0
  65. package/dist/esm/parser/gfm/renderer.js +509 -0
  66. package/dist/esm/parser/index.js +80 -0
  67. package/dist/esm/parser/inline-parser.js +560 -0
  68. package/dist/esm/parser/inlines/autolink-extended.js +357 -0
  69. package/dist/esm/parser/inlines/autolink.js +40 -0
  70. package/dist/esm/parser/inlines/code-span.js +44 -0
  71. package/dist/esm/parser/inlines/emphasis.js +59 -0
  72. package/dist/esm/parser/inlines/entity.js +21 -0
  73. package/dist/esm/parser/inlines/escape.js +21 -0
  74. package/dist/esm/parser/inlines/footnote.js +38 -0
  75. package/dist/esm/parser/inlines/hard-break.js +41 -0
  76. package/dist/esm/parser/inlines/image.js +4 -0
  77. package/dist/esm/parser/inlines/link.js +156 -0
  78. package/dist/esm/parser/inlines/soft-break.js +14 -0
  79. package/dist/esm/parser/inlines/strikethrough.js +42 -0
  80. package/dist/esm/parser/inlines/text.js +16 -0
  81. package/dist/esm/parser/renderer.js +341 -0
  82. package/dist/esm/parser/types.js +4 -0
  83. package/dist/esm/parser/utils.js +254 -0
  84. package/dist/gfm.d.ts +6 -0
  85. package/dist/gfm.js +26 -0
  86. package/dist/index.d.ts +5 -0
  87. package/dist/index.js +26 -0
  88. package/dist/parser/block-parser.d.ts +25 -0
  89. package/dist/parser/block-parser.js +644 -0
  90. package/dist/parser/blocks/blockquote.d.ts +8 -0
  91. package/dist/parser/blocks/blockquote.js +28 -0
  92. package/dist/parser/blocks/code-block-fenced.d.ts +14 -0
  93. package/dist/parser/blocks/code-block-fenced.js +58 -0
  94. package/dist/parser/blocks/code-block-indented.d.ts +7 -0
  95. package/dist/parser/blocks/code-block-indented.js +47 -0
  96. package/dist/parser/blocks/heading-atx.d.ts +10 -0
  97. package/dist/parser/blocks/heading-atx.js +29 -0
  98. package/dist/parser/blocks/heading-setext.d.ts +8 -0
  99. package/dist/parser/blocks/heading-setext.js +24 -0
  100. package/dist/parser/blocks/html-block.d.ts +9 -0
  101. package/dist/parser/blocks/html-block.js +83 -0
  102. package/dist/parser/blocks/link-reference.d.ts +11 -0
  103. package/dist/parser/blocks/link-reference.js +109 -0
  104. package/dist/parser/blocks/list.d.ts +25 -0
  105. package/dist/parser/blocks/list.js +155 -0
  106. package/dist/parser/blocks/paragraph.d.ts +7 -0
  107. package/dist/parser/blocks/paragraph.js +20 -0
  108. package/dist/parser/blocks/table.d.ts +13 -0
  109. package/dist/parser/blocks/table.js +163 -0
  110. package/dist/parser/blocks/task-list.d.ts +10 -0
  111. package/dist/parser/blocks/task-list.js +66 -0
  112. package/dist/parser/blocks/thematic-break.d.ts +6 -0
  113. package/dist/parser/blocks/thematic-break.js +17 -0
  114. package/dist/parser/entities.d.ts +4 -0
  115. package/dist/parser/entities.js +2133 -0
  116. package/dist/parser/gfm/block-parser.d.ts +32 -0
  117. package/dist/parser/gfm/block-parser.js +773 -0
  118. package/dist/parser/gfm/index.d.ts +31 -0
  119. package/dist/parser/gfm/index.js +125 -0
  120. package/dist/parser/gfm/inline-parser.d.ts +25 -0
  121. package/dist/parser/gfm/inline-parser.js +813 -0
  122. package/dist/parser/gfm/renderer.d.ts +43 -0
  123. package/dist/parser/gfm/renderer.js +513 -0
  124. package/dist/parser/index.d.ts +33 -0
  125. package/dist/parser/index.js +104 -0
  126. package/dist/parser/inline-parser.d.ts +16 -0
  127. package/dist/parser/inline-parser.js +564 -0
  128. package/dist/parser/inlines/autolink-extended.d.ts +24 -0
  129. package/dist/parser/inlines/autolink-extended.js +364 -0
  130. package/dist/parser/inlines/autolink.d.ts +9 -0
  131. package/dist/parser/inlines/autolink.js +44 -0
  132. package/dist/parser/inlines/code-span.d.ts +9 -0
  133. package/dist/parser/inlines/code-span.js +48 -0
  134. package/dist/parser/inlines/emphasis.d.ts +14 -0
  135. package/dist/parser/inlines/emphasis.js +64 -0
  136. package/dist/parser/inlines/entity.d.ts +8 -0
  137. package/dist/parser/inlines/entity.js +25 -0
  138. package/dist/parser/inlines/escape.d.ts +8 -0
  139. package/dist/parser/inlines/escape.js +25 -0
  140. package/dist/parser/inlines/footnote.d.ts +9 -0
  141. package/dist/parser/inlines/footnote.js +41 -0
  142. package/dist/parser/inlines/hard-break.d.ts +9 -0
  143. package/dist/parser/inlines/hard-break.js +45 -0
  144. package/dist/parser/inlines/image.d.ts +4 -0
  145. package/dist/parser/inlines/image.js +9 -0
  146. package/dist/parser/inlines/link.d.ts +33 -0
  147. package/dist/parser/inlines/link.js +166 -0
  148. package/dist/parser/inlines/soft-break.d.ts +9 -0
  149. package/dist/parser/inlines/soft-break.js +18 -0
  150. package/dist/parser/inlines/strikethrough.d.ts +16 -0
  151. package/dist/parser/inlines/strikethrough.js +48 -0
  152. package/dist/parser/inlines/text.d.ts +6 -0
  153. package/dist/parser/inlines/text.js +20 -0
  154. package/dist/parser/renderer.d.ts +33 -0
  155. package/dist/parser/renderer.js +345 -0
  156. package/dist/parser/types.d.ts +152 -0
  157. package/dist/parser/types.js +5 -0
  158. package/dist/parser/utils.d.ts +32 -0
  159. package/dist/parser/utils.js +277 -0
  160. package/dist/types/cli.d.ts +6 -0
  161. package/dist/types/gfm.d.ts +6 -0
  162. package/dist/types/index.d.ts +5 -0
  163. package/dist/types/parser/block-parser.d.ts +25 -0
  164. package/dist/types/parser/blocks/blockquote.d.ts +8 -0
  165. package/dist/types/parser/blocks/code-block-fenced.d.ts +14 -0
  166. package/dist/types/parser/blocks/code-block-indented.d.ts +7 -0
  167. package/dist/types/parser/blocks/heading-atx.d.ts +10 -0
  168. package/dist/types/parser/blocks/heading-setext.d.ts +8 -0
  169. package/dist/types/parser/blocks/html-block.d.ts +9 -0
  170. package/dist/types/parser/blocks/link-reference.d.ts +11 -0
  171. package/dist/types/parser/blocks/list.d.ts +25 -0
  172. package/dist/types/parser/blocks/paragraph.d.ts +7 -0
  173. package/dist/types/parser/blocks/table.d.ts +13 -0
  174. package/dist/types/parser/blocks/task-list.d.ts +10 -0
  175. package/dist/types/parser/blocks/thematic-break.d.ts +6 -0
  176. package/dist/types/parser/entities.d.ts +4 -0
  177. package/dist/types/parser/gfm/block-parser.d.ts +32 -0
  178. package/dist/types/parser/gfm/index.d.ts +31 -0
  179. package/dist/types/parser/gfm/inline-parser.d.ts +25 -0
  180. package/dist/types/parser/gfm/renderer.d.ts +43 -0
  181. package/dist/types/parser/index.d.ts +33 -0
  182. package/dist/types/parser/inline-parser.d.ts +16 -0
  183. package/dist/types/parser/inlines/autolink-extended.d.ts +24 -0
  184. package/dist/types/parser/inlines/autolink.d.ts +9 -0
  185. package/dist/types/parser/inlines/code-span.d.ts +9 -0
  186. package/dist/types/parser/inlines/emphasis.d.ts +14 -0
  187. package/dist/types/parser/inlines/entity.d.ts +8 -0
  188. package/dist/types/parser/inlines/escape.d.ts +8 -0
  189. package/dist/types/parser/inlines/footnote.d.ts +9 -0
  190. package/dist/types/parser/inlines/hard-break.d.ts +9 -0
  191. package/dist/types/parser/inlines/image.d.ts +4 -0
  192. package/dist/types/parser/inlines/link.d.ts +33 -0
  193. package/dist/types/parser/inlines/soft-break.d.ts +9 -0
  194. package/dist/types/parser/inlines/strikethrough.d.ts +16 -0
  195. package/dist/types/parser/inlines/text.d.ts +6 -0
  196. package/dist/types/parser/renderer.d.ts +33 -0
  197. package/dist/types/parser/types.d.ts +152 -0
  198. package/dist/types/parser/utils.d.ts +32 -0
  199. package/package.json +54 -0
@@ -0,0 +1,357 @@
1
+ /**
2
+ * Parser for GFM extended autolinks that recognize bare URLs and emails.
3
+ */
4
+ const VALID_PRECEDING_CHARS = new Set([
5
+ '\n', ' ', '\t', '*', '_', '~', '(', '"', "'",
6
+ ]);
7
+ const VALID_EMAIL_PRECEDING_CHARS = new Set([
8
+ '\n', ' ', '\t', '*', '_', '~', '(', '"', "'", ':', '/',
9
+ ]);
10
+ function isValidPrecedingChar(char) {
11
+ if (char === undefined)
12
+ return true;
13
+ return VALID_PRECEDING_CHARS.has(char);
14
+ }
15
+ function isValidEmailPrecedingChar(char) {
16
+ if (char === undefined)
17
+ return true;
18
+ return VALID_EMAIL_PRECEDING_CHARS.has(char);
19
+ }
20
+ function precededByOpenAngle(text, pos) {
21
+ let i = pos - 1;
22
+ while (i >= 0 && (text[i] === ' ' || text[i] === '\t' || text[i] === '\n')) {
23
+ i--;
24
+ }
25
+ return i >= 0 && text[i] === '<';
26
+ }
27
+ function isDomainChar(char) {
28
+ if (/[a-zA-Z0-9_-]/.test(char))
29
+ return true;
30
+ const code = char.codePointAt(0);
31
+ return code !== undefined && code > 127;
32
+ }
33
+ function isHighSurrogate(code) {
34
+ return code >= 0xD800 && code <= 0xDBFF;
35
+ }
36
+ function parseValidDomain(text, startPos, requirePeriod = true) {
37
+ let pos = startPos;
38
+ const segments = [];
39
+ let currentSegment = '';
40
+ while (pos < text.length) {
41
+ const char = text[pos];
42
+ const code = char.charCodeAt(0);
43
+ if (isHighSurrogate(code) && pos + 1 < text.length) {
44
+ const fullChar = text.slice(pos, pos + 2);
45
+ currentSegment += fullChar;
46
+ pos += 2;
47
+ }
48
+ else if (isDomainChar(char)) {
49
+ currentSegment += char;
50
+ pos++;
51
+ }
52
+ else if (char === '.') {
53
+ if (currentSegment.length === 0)
54
+ break;
55
+ segments.push(currentSegment);
56
+ currentSegment = '';
57
+ pos++;
58
+ }
59
+ else {
60
+ break;
61
+ }
62
+ }
63
+ if (currentSegment.length > 0) {
64
+ segments.push(currentSegment);
65
+ }
66
+ if (requirePeriod && segments.length < 2)
67
+ return null;
68
+ if (!requirePeriod && segments.length < 1)
69
+ return null;
70
+ if (segments.length >= 2) {
71
+ const lastTwo = segments.slice(-2);
72
+ for (const seg of lastTwo) {
73
+ if (seg.includes('_'))
74
+ return null;
75
+ }
76
+ }
77
+ return {
78
+ domain: segments.join('.'),
79
+ endPos: pos,
80
+ };
81
+ }
82
+ function trimAutolinkPath(url) {
83
+ let result = url;
84
+ const ltIndex = result.indexOf('<');
85
+ if (ltIndex !== -1) {
86
+ result = result.slice(0, ltIndex);
87
+ }
88
+ while (result.length > 0) {
89
+ const lastChar = result[result.length - 1];
90
+ if ('?!.,:*_~"\''.includes(lastChar)) {
91
+ result = result.slice(0, -1);
92
+ continue;
93
+ }
94
+ if (lastChar === ')') {
95
+ const openCount = (result.match(/\(/g) || []).length;
96
+ const closeCount = (result.match(/\)/g) || []).length;
97
+ if (closeCount > openCount) {
98
+ result = result.slice(0, -1);
99
+ continue;
100
+ }
101
+ }
102
+ if (lastChar === ';') {
103
+ const entityMatch = result.match(/&[a-zA-Z0-9]+;$/);
104
+ if (entityMatch) {
105
+ result = result.slice(0, -entityMatch[0].length);
106
+ continue;
107
+ }
108
+ }
109
+ break;
110
+ }
111
+ return result;
112
+ }
113
+ export function parseWwwAutolink(text, pos, charBefore) {
114
+ if (!isValidPrecedingChar(charBefore))
115
+ return null;
116
+ if (precededByOpenAngle(text, pos))
117
+ return null;
118
+ if (!text.slice(pos).toLowerCase().startsWith('www.'))
119
+ return null;
120
+ const domainStart = pos + 4;
121
+ const domainResult = parseValidDomain(text, domainStart, true);
122
+ if (!domainResult)
123
+ return null;
124
+ let endPos = domainResult.endPos;
125
+ while (endPos < text.length) {
126
+ const char = text[endPos];
127
+ if (char === ' ' || char === '\t' || char === '\n' || char === '<')
128
+ break;
129
+ endPos++;
130
+ }
131
+ const fullMatch = text.slice(pos, endPos);
132
+ const trimmedUrl = trimAutolinkPath(fullMatch);
133
+ if (trimmedUrl.length <= 4)
134
+ return null;
135
+ return {
136
+ node: {
137
+ type: 'link',
138
+ destination: 'http://' + trimmedUrl,
139
+ title: '',
140
+ children: [{ type: 'text', literal: trimmedUrl }],
141
+ },
142
+ length: trimmedUrl.length,
143
+ };
144
+ }
145
+ export function parseUrlAutolink(text, pos, charBefore) {
146
+ if (!isValidPrecedingChar(charBefore))
147
+ return null;
148
+ if (precededByOpenAngle(text, pos))
149
+ return null;
150
+ const remaining = text.slice(pos);
151
+ let scheme = null;
152
+ if (remaining.toLowerCase().startsWith('https://')) {
153
+ scheme = 'https://';
154
+ }
155
+ else if (remaining.toLowerCase().startsWith('http://')) {
156
+ scheme = 'http://';
157
+ }
158
+ else if (remaining.toLowerCase().startsWith('ftp://')) {
159
+ scheme = 'ftp://';
160
+ }
161
+ if (!scheme)
162
+ return null;
163
+ const domainStart = pos + scheme.length;
164
+ const domainResult = parseValidDomain(text, domainStart, false);
165
+ if (!domainResult)
166
+ return null;
167
+ let endPos = domainResult.endPos;
168
+ while (endPos < text.length) {
169
+ const char = text[endPos];
170
+ if (char === ' ' || char === '\t' || char === '\n' || char === '<')
171
+ break;
172
+ endPos++;
173
+ }
174
+ const fullMatch = text.slice(pos, endPos);
175
+ const trimmedUrl = trimAutolinkPath(fullMatch);
176
+ if (trimmedUrl.length <= scheme.length)
177
+ return null;
178
+ return {
179
+ node: {
180
+ type: 'link',
181
+ destination: trimmedUrl,
182
+ title: '',
183
+ children: [{ type: 'text', literal: trimmedUrl }],
184
+ },
185
+ length: trimmedUrl.length,
186
+ };
187
+ }
188
+ export function parseEmailAutolink(text, pos, charBefore) {
189
+ if (!isValidEmailPrecedingChar(charBefore))
190
+ return null;
191
+ if (precededByOpenAngle(text, pos))
192
+ return null;
193
+ let localEnd = pos;
194
+ while (localEnd < text.length) {
195
+ const char = text[localEnd];
196
+ if (/[a-zA-Z0-9.\-_+]/.test(char)) {
197
+ localEnd++;
198
+ }
199
+ else {
200
+ break;
201
+ }
202
+ }
203
+ if (localEnd === pos)
204
+ return null;
205
+ if (text[localEnd] !== '@')
206
+ return null;
207
+ const localPart = text.slice(pos, localEnd);
208
+ const domainStart = localEnd + 1;
209
+ let domainEnd = domainStart;
210
+ let lastDotPos = -1;
211
+ let segmentStart = domainStart;
212
+ while (domainEnd < text.length) {
213
+ const char = text[domainEnd];
214
+ if (/[a-zA-Z0-9\-_]/.test(char)) {
215
+ domainEnd++;
216
+ }
217
+ else if (char === '.') {
218
+ if (domainEnd === segmentStart)
219
+ break;
220
+ lastDotPos = domainEnd;
221
+ segmentStart = domainEnd + 1;
222
+ domainEnd++;
223
+ }
224
+ else {
225
+ break;
226
+ }
227
+ }
228
+ if (lastDotPos === -1)
229
+ return null;
230
+ const lastDomainChar = text[domainEnd - 1];
231
+ if (lastDomainChar === '-' || lastDomainChar === '_')
232
+ return null;
233
+ let finalEnd = domainEnd;
234
+ while (finalEnd > domainStart && text[finalEnd - 1] === '.') {
235
+ finalEnd--;
236
+ }
237
+ const domain = text.slice(domainStart, finalEnd);
238
+ if (!domain.includes('.'))
239
+ return null;
240
+ const email = localPart + '@' + domain;
241
+ return {
242
+ node: {
243
+ type: 'link',
244
+ destination: 'mailto:' + email,
245
+ title: '',
246
+ children: [{ type: 'text', literal: email }],
247
+ },
248
+ length: email.length,
249
+ };
250
+ }
251
+ export function parseProtocolAutolink(text, pos, charBefore) {
252
+ if (!isValidEmailPrecedingChar(charBefore))
253
+ return null;
254
+ if (precededByOpenAngle(text, pos))
255
+ return null;
256
+ const remaining = text.slice(pos);
257
+ let protocol = null;
258
+ if (remaining.toLowerCase().startsWith('mailto:')) {
259
+ protocol = 'mailto:';
260
+ }
261
+ else if (remaining.toLowerCase().startsWith('xmpp:')) {
262
+ protocol = 'xmpp:';
263
+ }
264
+ if (!protocol)
265
+ return null;
266
+ const emailStart = pos + protocol.length;
267
+ let localEnd = emailStart;
268
+ while (localEnd < text.length) {
269
+ const char = text[localEnd];
270
+ if (/[a-zA-Z0-9.\-_+]/.test(char)) {
271
+ localEnd++;
272
+ }
273
+ else {
274
+ break;
275
+ }
276
+ }
277
+ if (localEnd === emailStart)
278
+ return null;
279
+ if (text[localEnd] !== '@')
280
+ return null;
281
+ const localPart = text.slice(emailStart, localEnd);
282
+ const domainStart = localEnd + 1;
283
+ let domainEnd = domainStart;
284
+ let lastDotPos = -1;
285
+ let segmentStart = domainStart;
286
+ while (domainEnd < text.length) {
287
+ const char = text[domainEnd];
288
+ if (/[a-zA-Z0-9\-_]/.test(char)) {
289
+ domainEnd++;
290
+ }
291
+ else if (char === '.') {
292
+ if (domainEnd === segmentStart)
293
+ break;
294
+ lastDotPos = domainEnd;
295
+ segmentStart = domainEnd + 1;
296
+ domainEnd++;
297
+ }
298
+ else {
299
+ break;
300
+ }
301
+ }
302
+ if (lastDotPos === -1)
303
+ return null;
304
+ const lastDomainChar = text[domainEnd - 1];
305
+ if (lastDomainChar === '-' || lastDomainChar === '_')
306
+ return null;
307
+ let finalEnd = domainEnd;
308
+ while (finalEnd > domainStart && text[finalEnd - 1] === '.') {
309
+ finalEnd--;
310
+ }
311
+ const domain = text.slice(domainStart, finalEnd);
312
+ if (!domain.includes('.'))
313
+ return null;
314
+ let fullUrl = protocol + localPart + '@' + domain;
315
+ let consumedLength = fullUrl.length;
316
+ if (protocol === 'xmpp:' && finalEnd < text.length && text[finalEnd] === '/') {
317
+ let resourceEnd = finalEnd + 1;
318
+ while (resourceEnd < text.length) {
319
+ const char = text[resourceEnd];
320
+ if (/[a-zA-Z0-9@.]/.test(char)) {
321
+ resourceEnd++;
322
+ }
323
+ else {
324
+ break;
325
+ }
326
+ }
327
+ if (resourceEnd > finalEnd + 1) {
328
+ let resource = text.slice(finalEnd, resourceEnd);
329
+ const secondSlash = resource.indexOf('/', 1);
330
+ if (secondSlash !== -1) {
331
+ resource = resource.slice(0, secondSlash);
332
+ }
333
+ while (resource.length > 1 && '?!.,:*_~'.includes(resource[resource.length - 1])) {
334
+ resource = resource.slice(0, -1);
335
+ }
336
+ if (resource.length > 1) {
337
+ fullUrl += resource;
338
+ consumedLength = fullUrl.length;
339
+ }
340
+ }
341
+ }
342
+ return {
343
+ node: {
344
+ type: 'link',
345
+ destination: fullUrl,
346
+ title: '',
347
+ children: [{ type: 'text', literal: fullUrl }],
348
+ },
349
+ length: consumedLength,
350
+ };
351
+ }
352
+ export function parseExtendedAutolink(text, pos, charBefore) {
353
+ return (parseProtocolAutolink(text, pos, charBefore) ||
354
+ parseUrlAutolink(text, pos, charBefore) ||
355
+ parseWwwAutolink(text, pos, charBefore) ||
356
+ parseEmailAutolink(text, pos, charBefore));
357
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Parser for autolinks enclosed in angle brackets.
3
+ */
4
+ const AUTOLINK_URI_REGEX = /^<([a-zA-Z][a-zA-Z0-9+.-]{1,31}:[^\s<>]*)>/;
5
+ const AUTOLINK_EMAIL_REGEX = /^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/;
6
+ export function parseAutolink(text, pos) {
7
+ if (text[pos] !== '<')
8
+ return null;
9
+ const remaining = text.slice(pos);
10
+ const uriMatch = remaining.match(AUTOLINK_URI_REGEX);
11
+ if (uriMatch) {
12
+ const uri = uriMatch[1];
13
+ return {
14
+ node: {
15
+ type: 'link',
16
+ destination: uri,
17
+ title: '',
18
+ children: [{ type: 'text', literal: uri }],
19
+ },
20
+ length: uriMatch[0].length,
21
+ };
22
+ }
23
+ const emailMatch = remaining.match(AUTOLINK_EMAIL_REGEX);
24
+ if (emailMatch) {
25
+ const email = emailMatch[1];
26
+ return {
27
+ node: {
28
+ type: 'link',
29
+ destination: `mailto:${email}`,
30
+ title: '',
31
+ children: [{ type: 'text', literal: email }],
32
+ },
33
+ length: emailMatch[0].length,
34
+ };
35
+ }
36
+ return null;
37
+ }
38
+ export function isAutolinkStart(text, pos) {
39
+ return text[pos] === '<';
40
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Parser for inline code spans delimited by backticks.
3
+ */
4
+ export function parseCodeSpan(text, pos) {
5
+ if (text[pos] !== '`')
6
+ return null;
7
+ let openingLength = 0;
8
+ let i = pos;
9
+ while (i < text.length && text[i] === '`') {
10
+ openingLength++;
11
+ i++;
12
+ }
13
+ let searchPos = i;
14
+ while (searchPos < text.length) {
15
+ const nextBacktick = text.indexOf('`', searchPos);
16
+ if (nextBacktick === -1)
17
+ return null;
18
+ let closingLength = 0;
19
+ let j = nextBacktick;
20
+ while (j < text.length && text[j] === '`') {
21
+ closingLength++;
22
+ j++;
23
+ }
24
+ if (closingLength === openingLength) {
25
+ let content = text.slice(i, nextBacktick);
26
+ content = content.replace(/\n/g, ' ');
27
+ if (content.length >= 2 && content[0] === ' ' && content[content.length - 1] === ' ' && content.trim() !== '') {
28
+ content = content.slice(1, -1);
29
+ }
30
+ return {
31
+ node: {
32
+ type: 'code_span',
33
+ literal: content,
34
+ },
35
+ length: j - pos,
36
+ };
37
+ }
38
+ searchPos = j;
39
+ }
40
+ return null;
41
+ }
42
+ export function isCodeSpanStart(text, pos) {
43
+ return text[pos] === '`';
44
+ }
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Parser for emphasis and strong emphasis using * and _ delimiters.
3
+ */
4
+ import { isUnicodeWhitespace, isUnicodePunctuation } from '../utils.js';
5
+ export function parseDelimiterRun(text, pos) {
6
+ const char = text[pos];
7
+ if (char !== '*' && char !== '_')
8
+ return null;
9
+ let length = 0;
10
+ let i = pos;
11
+ while (i < text.length && text[i] === char) {
12
+ length++;
13
+ i++;
14
+ }
15
+ const charBefore = pos > 0 ? text[pos - 1] : '\n';
16
+ const charAfter = i < text.length ? text[i] : '\n';
17
+ const beforeIsWhitespace = isUnicodeWhitespace(charBefore) || charBefore === '\n';
18
+ const afterIsWhitespace = isUnicodeWhitespace(charAfter) || charAfter === '\n';
19
+ const beforeIsPunctuation = isUnicodePunctuation(charBefore);
20
+ const afterIsPunctuation = isUnicodePunctuation(charAfter);
21
+ const leftFlanking = !afterIsWhitespace &&
22
+ (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation);
23
+ const rightFlanking = !beforeIsWhitespace &&
24
+ (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation);
25
+ let canOpen;
26
+ let canClose;
27
+ if (char === '*') {
28
+ canOpen = leftFlanking;
29
+ canClose = rightFlanking;
30
+ }
31
+ else {
32
+ canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation);
33
+ canClose = rightFlanking && (!leftFlanking || afterIsPunctuation);
34
+ }
35
+ return {
36
+ char: char,
37
+ length,
38
+ canOpen,
39
+ canClose,
40
+ position: pos,
41
+ origLength: length,
42
+ };
43
+ }
44
+ export function isEmphasisDelimiter(text, pos) {
45
+ const char = text[pos];
46
+ return char === '*' || char === '_';
47
+ }
48
+ export function canDelimitersMatch(opener, closer) {
49
+ if (opener.char !== closer.char)
50
+ return false;
51
+ if ((opener.canOpen && opener.canClose) || (closer.canOpen && closer.canClose)) {
52
+ if ((opener.origLength + closer.origLength) % 3 === 0) {
53
+ if (opener.origLength % 3 !== 0 || closer.origLength % 3 !== 0) {
54
+ return false;
55
+ }
56
+ }
57
+ }
58
+ return true;
59
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Parser for HTML character entities in inline content.
3
+ */
4
+ import { decodeHtmlEntities } from '../utils.js';
5
+ const ENTITY_REGEX = /^&(?:#[xX][0-9a-fA-F]{1,6}|#[0-9]{1,7}|[a-zA-Z][a-zA-Z0-9]{0,31});/;
6
+ export function parseEntity(text, pos) {
7
+ if (text[pos] !== '&')
8
+ return null;
9
+ const remaining = text.slice(pos);
10
+ const match = remaining.match(ENTITY_REGEX);
11
+ if (!match)
12
+ return null;
13
+ const entity = match[0];
14
+ const decoded = decodeHtmlEntities(entity);
15
+ if (decoded === entity)
16
+ return null;
17
+ return { char: decoded, length: entity.length };
18
+ }
19
+ export function isEntityStart(text, pos) {
20
+ return text[pos] === '&';
21
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Parser for backslash escape sequences and escaped hard breaks.
3
+ */
4
+ import { isAsciiPunctuation } from '../utils.js';
5
+ export function parseEscape(text, pos) {
6
+ if (text[pos] !== '\\')
7
+ return null;
8
+ if (pos + 1 >= text.length)
9
+ return null;
10
+ const nextChar = text[pos + 1];
11
+ if (nextChar === '\n') {
12
+ return { char: '\n', length: 2 };
13
+ }
14
+ if (isAsciiPunctuation(nextChar)) {
15
+ return { char: nextChar, length: 2 };
16
+ }
17
+ return null;
18
+ }
19
+ export function isEscapedChar(text, pos) {
20
+ return parseEscape(text, pos) !== null;
21
+ }
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Parser for GFM footnote labels.
3
+ */
4
+ import { normalizeLabel } from '../utils.js';
5
+ export function parseFootnoteLabel(text, pos) {
6
+ if (text[pos] !== '[' || text[pos + 1] !== '^')
7
+ return null;
8
+ let i = pos + 2;
9
+ let label = '';
10
+ while (i < text.length) {
11
+ const char = text[i];
12
+ if (char === '\\' && i + 1 < text.length) {
13
+ const next = text[i + 1];
14
+ if (next === '[' || next === ']' || next === '\\') {
15
+ label += next;
16
+ i += 2;
17
+ continue;
18
+ }
19
+ label += '\\';
20
+ i++;
21
+ continue;
22
+ }
23
+ if (char === ']') {
24
+ break;
25
+ }
26
+ label += char;
27
+ i++;
28
+ }
29
+ if (i >= text.length || text[i] !== ']')
30
+ return null;
31
+ if (label.length === 0 || label.length > 999)
32
+ return null;
33
+ return {
34
+ label,
35
+ normalized: normalizeLabel(label),
36
+ length: i - pos + 1,
37
+ };
38
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Parser for hard line breaks created by backslash-newline or trailing spaces.
3
+ */
4
+ export function parseHardBreak(text, pos) {
5
+ if (text[pos] === '\\' && text[pos + 1] === '\n') {
6
+ return {
7
+ node: { type: 'hardbreak' },
8
+ length: 2,
9
+ };
10
+ }
11
+ if (text[pos] === ' ') {
12
+ let spaces = 0;
13
+ let i = pos;
14
+ while (i < text.length && text[i] === ' ') {
15
+ spaces++;
16
+ i++;
17
+ }
18
+ if (spaces >= 2 && text[i] === '\n') {
19
+ return {
20
+ node: { type: 'hardbreak' },
21
+ length: spaces + 1,
22
+ };
23
+ }
24
+ }
25
+ return null;
26
+ }
27
+ export function isHardBreakStart(text, pos) {
28
+ if (text[pos] === '\\' && text[pos + 1] === '\n')
29
+ return true;
30
+ if (text[pos] === ' ') {
31
+ let i = pos;
32
+ let spaces = 0;
33
+ while (i < text.length && text[i] === ' ') {
34
+ spaces++;
35
+ i++;
36
+ }
37
+ if (spaces >= 2 && text[i] === '\n')
38
+ return true;
39
+ }
40
+ return false;
41
+ }
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Image parsing helpers that reuse link logic and create image nodes.
3
+ */
4
+ export { createImageNode, isImageStart } from './link.js';