@fuzdev/fuz_ui 0.185.2 → 0.187.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/ApiModule.svelte +22 -6
  2. package/dist/ApiModule.svelte.d.ts.map +1 -1
  3. package/dist/DeclarationLink.svelte +1 -1
  4. package/dist/DocsLink.svelte +2 -2
  5. package/dist/DocsTertiaryNav.svelte +2 -2
  6. package/dist/Mdz.svelte +5 -0
  7. package/dist/Mdz.svelte.d.ts +1 -0
  8. package/dist/Mdz.svelte.d.ts.map +1 -1
  9. package/dist/MdzNodeView.svelte +19 -8
  10. package/dist/MdzNodeView.svelte.d.ts +1 -1
  11. package/dist/MdzNodeView.svelte.d.ts.map +1 -1
  12. package/dist/ModuleLink.svelte +1 -1
  13. package/dist/TypeLink.svelte +1 -1
  14. package/dist/library.svelte.d.ts +24 -27
  15. package/dist/library.svelte.d.ts.map +1 -1
  16. package/dist/library.svelte.js +16 -16
  17. package/dist/mdz.d.ts +13 -0
  18. package/dist/mdz.d.ts.map +1 -1
  19. package/dist/mdz.js +73 -280
  20. package/dist/mdz_components.d.ts +12 -0
  21. package/dist/mdz_components.d.ts.map +1 -1
  22. package/dist/mdz_components.js +8 -0
  23. package/dist/mdz_helpers.d.ts +108 -0
  24. package/dist/mdz_helpers.d.ts.map +1 -0
  25. package/dist/mdz_helpers.js +237 -0
  26. package/dist/mdz_lexer.d.ts +93 -0
  27. package/dist/mdz_lexer.d.ts.map +1 -0
  28. package/dist/mdz_lexer.js +727 -0
  29. package/dist/mdz_to_svelte.d.ts +5 -2
  30. package/dist/mdz_to_svelte.d.ts.map +1 -1
  31. package/dist/mdz_to_svelte.js +13 -2
  32. package/dist/mdz_token_parser.d.ts +14 -0
  33. package/dist/mdz_token_parser.d.ts.map +1 -0
  34. package/dist/mdz_token_parser.js +374 -0
  35. package/dist/svelte_preprocess_mdz.js +23 -7
  36. package/package.json +10 -9
  37. package/src/lib/library.svelte.ts +36 -35
  38. package/src/lib/mdz.ts +106 -302
  39. package/src/lib/mdz_components.ts +9 -0
  40. package/src/lib/mdz_helpers.ts +251 -0
  41. package/src/lib/mdz_lexer.ts +1003 -0
  42. package/src/lib/mdz_to_svelte.ts +15 -2
  43. package/src/lib/mdz_token_parser.ts +460 -0
  44. package/src/lib/svelte_preprocess_mdz.ts +23 -7
@@ -0,0 +1,727 @@
1
+ /**
2
+ * mdz lexer — tokenizes input into a flat `MdzToken[]` stream.
3
+ *
4
+ * Phase 1 of the two-phase lexer+parser alternative to the single-pass parser
5
+ * in `mdz.ts`. Phase 2 is in `mdz_token_parser.ts`.
6
+ *
7
+ * @module
8
+ */
9
+ import { mdz_is_url } from './mdz.js';
10
+ import { is_letter, is_tag_name_char, is_word_char, is_valid_path_char, trim_trailing_punctuation, BACKTICK, ASTERISK, UNDERSCORE, TILDE, NEWLINE, HYPHEN, HASH, SPACE, TAB, LEFT_ANGLE, RIGHT_ANGLE, SLASH, PERIOD, LEFT_BRACKET, LEFT_PAREN, RIGHT_PAREN, RIGHT_BRACKET, A_UPPER, Z_UPPER, HR_HYPHEN_COUNT, MIN_CODEBLOCK_BACKTICKS, MAX_HEADING_LEVEL, HTTPS_PREFIX_LENGTH, HTTP_PREFIX_LENGTH, is_at_absolute_path, is_at_relative_path, } from './mdz_helpers.js';
11
+ // ============================================================================
12
+ // Lexer
13
+ // ============================================================================
14
+ export class MdzLexer {
15
+ #text;
16
+ #index = 0;
17
+ #tokens = [];
18
+ #max_search_index = Number.MAX_SAFE_INTEGER;
19
+ constructor(text) {
20
+ this.#text = text;
21
+ }
22
+ tokenize() {
23
+ // Skip leading newlines
24
+ this.#skip_newlines();
25
+ while (this.#index < this.#text.length) {
26
+ // Check for block elements at column 0
27
+ if (this.#is_at_column_0()) {
28
+ if (this.#tokenize_heading())
29
+ continue;
30
+ if (this.#tokenize_hr())
31
+ continue;
32
+ if (this.#tokenize_codeblock())
33
+ continue;
34
+ }
35
+ // Check for paragraph break
36
+ if (this.#is_at_paragraph_break()) {
37
+ const start = this.#index;
38
+ this.#skip_newlines();
39
+ this.#tokens.push({ type: 'paragraph_break', start, end: this.#index });
40
+ continue;
41
+ }
42
+ // Inline tokenization
43
+ this.#tokenize_inline();
44
+ }
45
+ return this.#tokens;
46
+ }
47
+ #is_at_column_0() {
48
+ return this.#index === 0 || this.#text.charCodeAt(this.#index - 1) === NEWLINE;
49
+ }
50
+ #is_at_paragraph_break() {
51
+ return (this.#index + 1 < this.#text.length &&
52
+ this.#text.charCodeAt(this.#index) === NEWLINE &&
53
+ this.#text.charCodeAt(this.#index + 1) === NEWLINE);
54
+ }
55
+ #match(str) {
56
+ return this.#text.startsWith(str, this.#index);
57
+ }
58
+ #skip_newlines() {
59
+ while (this.#index < this.#text.length && this.#text.charCodeAt(this.#index) === NEWLINE) {
60
+ this.#index++;
61
+ }
62
+ }
63
+ // -- Block tokenizers --
64
+ #tokenize_heading() {
65
+ let i = this.#index;
66
+ // Count hashes (must be 1-6)
67
+ let hash_count = 0;
68
+ while (i < this.#text.length &&
69
+ this.#text.charCodeAt(i) === HASH &&
70
+ hash_count <= MAX_HEADING_LEVEL) {
71
+ hash_count++;
72
+ i++;
73
+ }
74
+ if (hash_count === 0 || hash_count > MAX_HEADING_LEVEL)
75
+ return false;
76
+ // Must have space after hashes
77
+ if (i >= this.#text.length || this.#text.charCodeAt(i) !== SPACE)
78
+ return false;
79
+ i++; // consume the space
80
+ // Must have at least one non-whitespace character after the space
81
+ let has_content = false;
82
+ for (let j = i; j < this.#text.length; j++) {
83
+ const char_code = this.#text.charCodeAt(j);
84
+ if (char_code === NEWLINE)
85
+ break;
86
+ if (char_code !== SPACE && char_code !== TAB) {
87
+ has_content = true;
88
+ break;
89
+ }
90
+ }
91
+ if (!has_content)
92
+ return false;
93
+ const start = this.#index;
94
+ // Advance past "## " (hashes + space)
95
+ this.#index = start + hash_count + 1;
96
+ this.#tokens.push({
97
+ type: 'heading_start',
98
+ level: hash_count,
99
+ start,
100
+ end: this.#index, // end of "## " prefix
101
+ });
102
+ // Tokenize inline content until newline or EOF
103
+ // tokenize_text may consume a newline as part of block-element lookahead,
104
+ // so we check emitted text tokens for embedded newlines and trim.
105
+ while (this.#index < this.#text.length) {
106
+ if (this.#text.charCodeAt(this.#index) === NEWLINE) {
107
+ break;
108
+ }
109
+ const token_count_before = this.#tokens.length;
110
+ this.#tokenize_inline();
111
+ // Check if the last emitted token is text containing a newline
112
+ if (this.#tokens.length > token_count_before) {
113
+ const last_token = this.#tokens[this.#tokens.length - 1];
114
+ if (last_token.type === 'text') {
115
+ const newline_idx = last_token.content.indexOf('\n');
116
+ if (newline_idx !== -1) {
117
+ const trimmed = last_token.content.slice(0, newline_idx);
118
+ if (trimmed) {
119
+ last_token.content = trimmed;
120
+ last_token.end = last_token.start + trimmed.length;
121
+ }
122
+ else {
123
+ this.#tokens.pop();
124
+ }
125
+ this.#index = last_token.start + newline_idx;
126
+ break;
127
+ }
128
+ }
129
+ }
130
+ }
131
+ // Emit heading_end marker so the token parser knows where heading content stops
132
+ this.#tokens.push({ type: 'heading_end', start: this.#index, end: this.#index });
133
+ // Skip newlines after heading (like original parser's main loop)
134
+ this.#skip_newlines();
135
+ return true;
136
+ }
137
+ #tokenize_hr() {
138
+ let i = this.#index;
139
+ // Must have exactly three hyphens
140
+ if (i + HR_HYPHEN_COUNT > this.#text.length ||
141
+ this.#text.charCodeAt(i) !== HYPHEN ||
142
+ this.#text.charCodeAt(i + 1) !== HYPHEN ||
143
+ this.#text.charCodeAt(i + 2) !== HYPHEN) {
144
+ return false;
145
+ }
146
+ i += HR_HYPHEN_COUNT;
147
+ // After the three hyphens, only whitespace and newline (or EOF) allowed
148
+ while (i < this.#text.length) {
149
+ const char_code = this.#text.charCodeAt(i);
150
+ if (char_code === NEWLINE)
151
+ break;
152
+ if (char_code !== SPACE)
153
+ return false;
154
+ i++;
155
+ }
156
+ const start = this.#index;
157
+ this.#index = i; // consume up to but not including newline
158
+ this.#tokens.push({ type: 'hr', start, end: this.#index });
159
+ // Skip newlines after HR
160
+ this.#skip_newlines();
161
+ return true;
162
+ }
163
+ #tokenize_codeblock() {
164
+ let i = this.#index;
165
+ // Count backticks
166
+ let backtick_count = 0;
167
+ while (i < this.#text.length && this.#text.charCodeAt(i) === BACKTICK) {
168
+ backtick_count++;
169
+ i++;
170
+ }
171
+ if (backtick_count < MIN_CODEBLOCK_BACKTICKS)
172
+ return false;
173
+ // Skip optional language hint
174
+ const lang_start = i;
175
+ while (i < this.#text.length) {
176
+ const char_code = this.#text.charCodeAt(i);
177
+ if (char_code === SPACE || char_code === NEWLINE)
178
+ break;
179
+ i++;
180
+ }
181
+ const lang = i > lang_start ? this.#text.slice(lang_start, i) : null;
182
+ // Skip trailing spaces on opening fence line
183
+ while (i < this.#text.length && this.#text.charCodeAt(i) === SPACE) {
184
+ i++;
185
+ }
186
+ // Must have newline after opening fence
187
+ if (i >= this.#text.length || this.#text.charCodeAt(i) !== NEWLINE)
188
+ return false;
189
+ i++; // consume the newline
190
+ const content_start = i;
191
+ // Search for closing fence
192
+ const closing_fence = '`'.repeat(backtick_count);
193
+ while (i < this.#text.length) {
194
+ if (this.#text.startsWith(closing_fence, i)) {
195
+ const prev_char = i > 0 ? this.#text.charCodeAt(i - 1) : NEWLINE;
196
+ if (prev_char === NEWLINE || i === 0) {
197
+ const content = this.#text.slice(content_start, i);
198
+ const final_content = content.endsWith('\n') ? content.slice(0, -1) : content;
199
+ if (final_content.length === 0)
200
+ return false; // Empty code block
201
+ // Verify closing fence line
202
+ let j = i + backtick_count;
203
+ while (j < this.#text.length && this.#text.charCodeAt(j) === SPACE) {
204
+ j++;
205
+ }
206
+ if (j < this.#text.length && this.#text.charCodeAt(j) !== NEWLINE) {
207
+ return false;
208
+ }
209
+ const start = this.#index;
210
+ this.#index = j; // advance past closing fence and trailing spaces
211
+ this.#tokens.push({
212
+ type: 'codeblock',
213
+ lang,
214
+ content: final_content,
215
+ start,
216
+ end: this.#index,
217
+ });
218
+ // Skip newlines after codeblock
219
+ this.#skip_newlines();
220
+ return true;
221
+ }
222
+ }
223
+ i++;
224
+ }
225
+ return false; // No closing fence found
226
+ }
227
+ // -- Inline tokenizers --
228
+ #tokenize_inline() {
229
+ const char_code = this.#text.charCodeAt(this.#index);
230
+ switch (char_code) {
231
+ case BACKTICK:
232
+ this.#tokenize_code();
233
+ return;
234
+ case ASTERISK:
235
+ this.#tokenize_bold();
236
+ return;
237
+ case UNDERSCORE:
238
+ this.#tokenize_single_delimiter('_', 'italic');
239
+ return;
240
+ case TILDE:
241
+ this.#tokenize_single_delimiter('~', 'strikethrough');
242
+ return;
243
+ case LEFT_BRACKET:
244
+ this.#tokenize_markdown_link();
245
+ return;
246
+ case LEFT_ANGLE:
247
+ this.#tokenize_tag();
248
+ return;
249
+ default:
250
+ this.#tokenize_text();
251
+ return;
252
+ }
253
+ }
254
+ #tokenize_code() {
255
+ const start = this.#index;
256
+ this.#index++; // consume `
257
+ // Find closing backtick, stop at newline or search boundary
258
+ let content_end = -1;
259
+ const search_limit = Math.min(this.#max_search_index, this.#text.length);
260
+ for (let i = this.#index; i < search_limit; i++) {
261
+ const char_code = this.#text.charCodeAt(i);
262
+ if (char_code === BACKTICK) {
263
+ content_end = i;
264
+ break;
265
+ }
266
+ if (char_code === NEWLINE)
267
+ break;
268
+ }
269
+ if (content_end === -1) {
270
+ // Unclosed backtick
271
+ this.#emit_text('`', start);
272
+ return;
273
+ }
274
+ const content = this.#text.slice(this.#index, content_end);
275
+ // Empty inline code
276
+ if (content.length === 0) {
277
+ this.#index = content_end + 1;
278
+ this.#emit_text('``', start);
279
+ return;
280
+ }
281
+ this.#index = content_end + 1;
282
+ this.#tokens.push({ type: 'code', content, start, end: this.#index });
283
+ }
284
+ #tokenize_bold() {
285
+ const start = this.#index;
286
+ // Check for **
287
+ if (!this.#match('**')) {
288
+ // Single asterisk - text
289
+ this.#index++;
290
+ this.#emit_text('*', start);
291
+ return;
292
+ }
293
+ // Find closing ** within current search boundary
294
+ const search_end = Math.min(this.#max_search_index, this.#text.length);
295
+ let close_index = this.#text.indexOf('**', start + 2);
296
+ if (close_index !== -1 && close_index >= search_end) {
297
+ close_index = -1;
298
+ }
299
+ if (close_index === -1) {
300
+ // Unclosed **
301
+ this.#index += 2;
302
+ this.#emit_text('**', start);
303
+ return;
304
+ }
305
+ // Check for paragraph break between open and close
306
+ if (this.#has_paragraph_break_between(start + 2, close_index)) {
307
+ this.#index += 2;
308
+ this.#emit_text('**', start);
309
+ return;
310
+ }
311
+ // Emit bold_open, tokenize children up to close, emit bold_close
312
+ this.#index += 2;
313
+ const open_token_index = this.#tokens.length;
314
+ this.#tokens.push({ type: 'bold_open', start, end: this.#index });
315
+ // Set search boundary for nested parsers
316
+ const saved_max = this.#max_search_index;
317
+ this.#max_search_index = close_index;
318
+ // Tokenize children inline content up to close_index
319
+ while (this.#index < close_index) {
320
+ if (this.#is_at_paragraph_break())
321
+ break;
322
+ this.#tokenize_inline();
323
+ }
324
+ this.#max_search_index = saved_max;
325
+ if (this.#index === close_index && this.#match('**')) {
326
+ // Check if empty
327
+ const has_children = open_token_index < this.#tokens.length - 1;
328
+ if (!has_children) {
329
+ // Empty bold - convert to text
330
+ this.#tokens.splice(open_token_index, 1);
331
+ this.#index = start;
332
+ this.#index += 4;
333
+ this.#emit_text('****', start);
334
+ return;
335
+ }
336
+ this.#tokens.push({ type: 'bold_close', start: close_index, end: close_index + 2 });
337
+ this.#index = close_index + 2;
338
+ }
339
+ else {
340
+ // Didn't reach closing - convert opening to text
341
+ this.#tokens[open_token_index] = { type: 'text', content: '**', start, end: start + 2 };
342
+ }
343
+ }
344
+ #tokenize_single_delimiter(delimiter, kind) {
345
+ const start = this.#index;
346
+ // Check opening word boundary
347
+ if (!this.#is_at_word_boundary(this.#index, true, false)) {
348
+ this.#index++;
349
+ this.#emit_text(delimiter, start);
350
+ return;
351
+ }
352
+ // Find closing delimiter within search boundary
353
+ const search_end = Math.min(this.#max_search_index, this.#text.length);
354
+ let close_index = this.#text.indexOf(delimiter, start + 1);
355
+ if (close_index !== -1 && close_index >= search_end) {
356
+ close_index = -1;
357
+ }
358
+ if (close_index === -1) {
359
+ this.#index++;
360
+ this.#emit_text(delimiter, start);
361
+ return;
362
+ }
363
+ // Check closing word boundary
364
+ if (!this.#is_at_word_boundary(close_index + 1, false, true)) {
365
+ this.#index++;
366
+ this.#emit_text(delimiter, start);
367
+ return;
368
+ }
369
+ // Check for paragraph break between
370
+ if (this.#has_paragraph_break_between(start + 1, close_index)) {
371
+ this.#index++;
372
+ this.#emit_text(delimiter, start);
373
+ return;
374
+ }
375
+ // Emit open token
376
+ this.#index++;
377
+ const open_type = kind === 'italic' ? 'italic_open' : 'strikethrough_open';
378
+ const close_type = kind === 'italic' ? 'italic_close' : 'strikethrough_close';
379
+ const open_token_index = this.#tokens.length;
380
+ this.#tokens.push({ type: open_type, start, end: this.#index });
381
+ // Set search boundary for nested parsers
382
+ const saved_max = this.#max_search_index;
383
+ this.#max_search_index = close_index;
384
+ // Tokenize children up to close_index
385
+ while (this.#index < close_index) {
386
+ if (this.#is_at_paragraph_break())
387
+ break;
388
+ this.#tokenize_inline();
389
+ }
390
+ this.#max_search_index = saved_max;
391
+ if (this.#index === close_index && this.#match(delimiter)) {
392
+ // Check if empty
393
+ const has_children = open_token_index < this.#tokens.length - 1;
394
+ if (!has_children) {
395
+ // Empty - convert to text
396
+ this.#tokens.splice(open_token_index, 1);
397
+ this.#index = start;
398
+ this.#index += 2;
399
+ this.#emit_text(delimiter + delimiter, start);
400
+ return;
401
+ }
402
+ this.#tokens.push({
403
+ type: close_type,
404
+ start: close_index,
405
+ end: close_index + 1,
406
+ });
407
+ this.#index = close_index + 1;
408
+ }
409
+ else {
410
+ // Convert opening to text
411
+ this.#tokens[open_token_index] = {
412
+ type: 'text',
413
+ content: delimiter,
414
+ start,
415
+ end: start + 1,
416
+ };
417
+ }
418
+ }
419
+ #tokenize_markdown_link() {
420
+ const start = this.#index;
421
+ // Consume [
422
+ if (this.#text.charCodeAt(this.#index) !== LEFT_BRACKET) {
423
+ this.#index++;
424
+ this.#emit_text(this.#text[start], start);
425
+ return;
426
+ }
427
+ this.#index++;
428
+ // Emit link_text_open
429
+ this.#tokens.push({ type: 'link_text_open', start, end: this.#index });
430
+ // Tokenize children until ]
431
+ while (this.#index < this.#text.length) {
432
+ if (this.#text.charCodeAt(this.#index) === RIGHT_BRACKET)
433
+ break;
434
+ if (this.#is_at_paragraph_break())
435
+ break;
436
+ // Stop at ] and ) as delimiters
437
+ if (this.#text.charCodeAt(this.#index) === RIGHT_PAREN)
438
+ break;
439
+ this.#tokenize_inline();
440
+ }
441
+ // Check for ]
442
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== RIGHT_BRACKET) {
443
+ // Revert - remove link_text_open and all children tokens added
444
+ this.#revert_tokens_from_link_open(start);
445
+ return;
446
+ }
447
+ const bracket_close_start = this.#index;
448
+ this.#index++; // consume ]
449
+ this.#tokens.push({
450
+ type: 'link_text_close',
451
+ start: bracket_close_start,
452
+ end: this.#index,
453
+ });
454
+ // Check for (
455
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== LEFT_PAREN) {
456
+ this.#revert_tokens_from_link_open(start);
457
+ return;
458
+ }
459
+ this.#index++; // consume (
460
+ // Find closing )
461
+ const close_paren = this.#text.indexOf(')', this.#index);
462
+ if (close_paren === -1) {
463
+ this.#revert_tokens_from_link_open(start);
464
+ return;
465
+ }
466
+ const reference = this.#text.slice(this.#index, close_paren);
467
+ // Validate reference
468
+ if (!reference.trim()) {
469
+ this.#revert_tokens_from_link_open(start);
470
+ return;
471
+ }
472
+ // Validate all characters
473
+ for (let i = 0; i < reference.length; i++) {
474
+ const char_code = reference.charCodeAt(i);
475
+ if (!is_valid_path_char(char_code)) {
476
+ this.#revert_tokens_from_link_open(start);
477
+ return;
478
+ }
479
+ }
480
+ this.#index = close_paren + 1;
481
+ const link_type = mdz_is_url(reference) ? 'external' : 'internal';
482
+ this.#tokens.push({
483
+ type: 'link_ref',
484
+ reference,
485
+ link_type,
486
+ start: bracket_close_start + 1, // after ]
487
+ end: this.#index,
488
+ });
489
+ }
490
+ #revert_tokens_from_link_open(start) {
491
+ // Find and remove link_text_open and all tokens after it
492
+ let open_idx = -1;
493
+ for (let i = this.#tokens.length - 1; i >= 0; i--) {
494
+ if (this.#tokens[i].type === 'link_text_open' && this.#tokens[i].start === start) {
495
+ open_idx = i;
496
+ break;
497
+ }
498
+ }
499
+ if (open_idx !== -1) {
500
+ this.#tokens.splice(open_idx);
501
+ }
502
+ this.#index = start + 1;
503
+ this.#emit_text('[', start);
504
+ }
505
+ #tokenize_tag() {
506
+ const start = this.#index;
507
+ this.#index++; // consume <
508
+ // Tag name must start with a letter
509
+ if (this.#index >= this.#text.length || !is_letter(this.#text.charCodeAt(this.#index))) {
510
+ this.#emit_text('<', start);
511
+ return;
512
+ }
513
+ // Collect tag name
514
+ const tag_name_start = this.#index;
515
+ while (this.#index < this.#text.length &&
516
+ is_tag_name_char(this.#text.charCodeAt(this.#index))) {
517
+ this.#index++;
518
+ }
519
+ const tag_name = this.#text.slice(tag_name_start, this.#index);
520
+ if (tag_name.length === 0) {
521
+ this.#emit_text('<', start);
522
+ return;
523
+ }
524
+ const first_char_code = tag_name.charCodeAt(0);
525
+ const is_component = first_char_code >= A_UPPER && first_char_code <= Z_UPPER;
526
+ // Skip whitespace
527
+ while (this.#index < this.#text.length && this.#text.charCodeAt(this.#index) === SPACE) {
528
+ this.#index++;
529
+ }
530
+ // Check for self-closing />
531
+ if (this.#index + 1 < this.#text.length &&
532
+ this.#text.charCodeAt(this.#index) === SLASH &&
533
+ this.#text.charCodeAt(this.#index + 1) === RIGHT_ANGLE) {
534
+ this.#index += 2;
535
+ this.#tokens.push({
536
+ type: 'tag_self_close',
537
+ name: tag_name,
538
+ is_component,
539
+ start,
540
+ end: this.#index,
541
+ });
542
+ return;
543
+ }
544
+ // Check for >
545
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== RIGHT_ANGLE) {
546
+ this.#index = start + 1;
547
+ this.#emit_text('<', start);
548
+ return;
549
+ }
550
+ this.#index++; // consume >
551
+ // Check for closing tag existence before committing
552
+ const closing_tag = `</${tag_name}>`;
553
+ const closing_tag_pos = this.#text.indexOf(closing_tag, this.#index);
554
+ if (closing_tag_pos === -1) {
555
+ // No closing tag - revert
556
+ this.#index = start + 1;
557
+ this.#emit_text('<', start);
558
+ return;
559
+ }
560
+ // Emit tag_open
561
+ this.#tokens.push({ type: 'tag_open', name: tag_name, is_component, start, end: this.#index });
562
+ // Tokenize children until closing tag
563
+ while (this.#index < this.#text.length) {
564
+ if (this.#match(closing_tag)) {
565
+ const close_start = this.#index;
566
+ this.#index += closing_tag.length;
567
+ this.#tokens.push({
568
+ type: 'tag_close',
569
+ name: tag_name,
570
+ start: close_start,
571
+ end: this.#index,
572
+ });
573
+ return;
574
+ }
575
+ this.#tokenize_inline();
576
+ }
577
+ // Shouldn't reach here since we verified closing tag exists
578
+ // But if we do, the tag_open token stays and parser will handle it
579
+ }
580
+ #tokenize_text() {
581
+ const start = this.#index;
582
+ // Check for URL or internal path at current position
583
+ if (this.#is_at_url()) {
584
+ this.#tokenize_auto_link_url();
585
+ return;
586
+ }
587
+ if (is_at_absolute_path(this.#text, this.#index)) {
588
+ this.#tokenize_auto_link_internal();
589
+ return;
590
+ }
591
+ if (is_at_relative_path(this.#text, this.#index)) {
592
+ this.#tokenize_auto_link_internal();
593
+ return;
594
+ }
595
+ while (this.#index < this.#text.length) {
596
+ const char_code = this.#text.charCodeAt(this.#index);
597
+ // Stop at special characters
598
+ if (char_code === BACKTICK ||
599
+ char_code === ASTERISK ||
600
+ char_code === UNDERSCORE ||
601
+ char_code === TILDE ||
602
+ char_code === LEFT_BRACKET ||
603
+ char_code === RIGHT_BRACKET ||
604
+ char_code === RIGHT_PAREN ||
605
+ char_code === LEFT_ANGLE) {
606
+ break;
607
+ }
608
+ // Check for paragraph break
609
+ if (this.#is_at_paragraph_break())
610
+ break;
611
+ // When next line could start a block element, consume the newline and stop
612
+ if (char_code === NEWLINE) {
613
+ const next_i = this.#index + 1;
614
+ if (next_i < this.#text.length) {
615
+ const next_char = this.#text.charCodeAt(next_i);
616
+ if (next_char === HASH || next_char === HYPHEN || next_char === BACKTICK) {
617
+ this.#index++; // consume the newline
618
+ break;
619
+ }
620
+ }
621
+ }
622
+ // Check for URL or internal path mid-text (char code guard avoids startsWith on every char)
623
+ if ((char_code === 104 /* h */ && this.#is_at_url()) ||
624
+ (char_code === SLASH && is_at_absolute_path(this.#text, this.#index)) ||
625
+ (char_code === PERIOD && is_at_relative_path(this.#text, this.#index))) {
626
+ break;
627
+ }
628
+ this.#index++;
629
+ }
630
+ // Ensure we always consume at least one character
631
+ if (this.#index === start && this.#index < this.#text.length) {
632
+ this.#index++;
633
+ }
634
+ const content = this.#text.slice(start, this.#index);
635
+ this.#emit_text(content, start);
636
+ }
637
+ // -- Auto-link tokenizers --
638
+ #tokenize_auto_link_url() {
639
+ const start = this.#index;
640
+ // Consume protocol
641
+ if (this.#match('https://')) {
642
+ this.#index += HTTPS_PREFIX_LENGTH;
643
+ }
644
+ else if (this.#match('http://')) {
645
+ this.#index += HTTP_PREFIX_LENGTH;
646
+ }
647
+ // Collect URL characters
648
+ while (this.#index < this.#text.length) {
649
+ const char_code = this.#text.charCodeAt(this.#index);
650
+ if (char_code === SPACE || char_code === NEWLINE || !is_valid_path_char(char_code)) {
651
+ break;
652
+ }
653
+ this.#index++;
654
+ }
655
+ let reference = this.#text.slice(start, this.#index);
656
+ reference = trim_trailing_punctuation(reference);
657
+ this.#index = start + reference.length;
658
+ this.#tokens.push({
659
+ type: 'autolink',
660
+ reference,
661
+ link_type: 'external',
662
+ start,
663
+ end: this.#index,
664
+ });
665
+ }
666
+ #tokenize_auto_link_internal() {
667
+ const start = this.#index;
668
+ // Collect path characters
669
+ while (this.#index < this.#text.length) {
670
+ const char_code = this.#text.charCodeAt(this.#index);
671
+ if (char_code === SPACE || char_code === NEWLINE || !is_valid_path_char(char_code)) {
672
+ break;
673
+ }
674
+ this.#index++;
675
+ }
676
+ let reference = this.#text.slice(start, this.#index);
677
+ reference = trim_trailing_punctuation(reference);
678
+ this.#index = start + reference.length;
679
+ this.#tokens.push({
680
+ type: 'autolink',
681
+ reference,
682
+ link_type: 'internal',
683
+ start,
684
+ end: this.#index,
685
+ });
686
+ }
687
+ // -- Helper methods --
688
+ #emit_text(content, start) {
689
+ this.#tokens.push({ type: 'text', content, start, end: start + content.length });
690
+ }
691
+ #has_paragraph_break_between(from, to) {
692
+ for (let i = from; i < to - 1; i++) {
693
+ if (this.#text.charCodeAt(i) === NEWLINE && this.#text.charCodeAt(i + 1) === NEWLINE) {
694
+ return true;
695
+ }
696
+ }
697
+ return false;
698
+ }
699
+ #is_at_url() {
700
+ if (this.#match('https://')) {
701
+ if (this.#index + HTTPS_PREFIX_LENGTH >= this.#text.length)
702
+ return false;
703
+ const next_char = this.#text.charCodeAt(this.#index + HTTPS_PREFIX_LENGTH);
704
+ return next_char !== SPACE && next_char !== NEWLINE;
705
+ }
706
+ if (this.#match('http://')) {
707
+ if (this.#index + HTTP_PREFIX_LENGTH >= this.#text.length)
708
+ return false;
709
+ const next_char = this.#text.charCodeAt(this.#index + HTTP_PREFIX_LENGTH);
710
+ return next_char !== SPACE && next_char !== NEWLINE;
711
+ }
712
+ return false;
713
+ }
714
+ #is_at_word_boundary(index, check_before, check_after) {
715
+ if (check_before && index > 0) {
716
+ const prev = this.#text.charCodeAt(index - 1);
717
+ if (is_word_char(prev))
718
+ return false;
719
+ }
720
+ if (check_after && index < this.#text.length) {
721
+ const next = this.#text.charCodeAt(index);
722
+ if (is_word_char(next))
723
+ return false;
724
+ }
725
+ return true;
726
+ }
727
+ }