@fuzdev/fuz_ui 0.185.2 → 0.187.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/ApiModule.svelte +22 -6
  2. package/dist/ApiModule.svelte.d.ts.map +1 -1
  3. package/dist/DeclarationLink.svelte +1 -1
  4. package/dist/DocsLink.svelte +2 -2
  5. package/dist/DocsTertiaryNav.svelte +2 -2
  6. package/dist/Mdz.svelte +5 -0
  7. package/dist/Mdz.svelte.d.ts +1 -0
  8. package/dist/Mdz.svelte.d.ts.map +1 -1
  9. package/dist/MdzNodeView.svelte +19 -8
  10. package/dist/MdzNodeView.svelte.d.ts +1 -1
  11. package/dist/MdzNodeView.svelte.d.ts.map +1 -1
  12. package/dist/ModuleLink.svelte +1 -1
  13. package/dist/TypeLink.svelte +1 -1
  14. package/dist/library.svelte.d.ts +24 -27
  15. package/dist/library.svelte.d.ts.map +1 -1
  16. package/dist/library.svelte.js +16 -16
  17. package/dist/mdz.d.ts +13 -0
  18. package/dist/mdz.d.ts.map +1 -1
  19. package/dist/mdz.js +73 -280
  20. package/dist/mdz_components.d.ts +12 -0
  21. package/dist/mdz_components.d.ts.map +1 -1
  22. package/dist/mdz_components.js +8 -0
  23. package/dist/mdz_helpers.d.ts +108 -0
  24. package/dist/mdz_helpers.d.ts.map +1 -0
  25. package/dist/mdz_helpers.js +237 -0
  26. package/dist/mdz_lexer.d.ts +93 -0
  27. package/dist/mdz_lexer.d.ts.map +1 -0
  28. package/dist/mdz_lexer.js +727 -0
  29. package/dist/mdz_to_svelte.d.ts +5 -2
  30. package/dist/mdz_to_svelte.d.ts.map +1 -1
  31. package/dist/mdz_to_svelte.js +13 -2
  32. package/dist/mdz_token_parser.d.ts +14 -0
  33. package/dist/mdz_token_parser.d.ts.map +1 -0
  34. package/dist/mdz_token_parser.js +374 -0
  35. package/dist/svelte_preprocess_mdz.js +23 -7
  36. package/package.json +10 -9
  37. package/src/lib/library.svelte.ts +36 -35
  38. package/src/lib/mdz.ts +106 -302
  39. package/src/lib/mdz_components.ts +9 -0
  40. package/src/lib/mdz_helpers.ts +251 -0
  41. package/src/lib/mdz_lexer.ts +1003 -0
  42. package/src/lib/mdz_to_svelte.ts +15 -2
  43. package/src/lib/mdz_token_parser.ts +460 -0
  44. package/src/lib/svelte_preprocess_mdz.ts +23 -7
@@ -0,0 +1,1003 @@
1
+ /**
2
+ * mdz lexer — tokenizes input into a flat `MdzToken[]` stream.
3
+ *
4
+ * Phase 1 of the two-phase lexer+parser alternative to the single-pass parser
5
+ * in `mdz.ts`. Phase 2 is in `mdz_token_parser.ts`.
6
+ *
7
+ * @module
8
+ */
9
+
10
+ import {mdz_is_url} from './mdz.js';
11
+ import {
12
+ is_letter,
13
+ is_tag_name_char,
14
+ is_word_char,
15
+ is_valid_path_char,
16
+ trim_trailing_punctuation,
17
+ BACKTICK,
18
+ ASTERISK,
19
+ UNDERSCORE,
20
+ TILDE,
21
+ NEWLINE,
22
+ HYPHEN,
23
+ HASH,
24
+ SPACE,
25
+ TAB,
26
+ LEFT_ANGLE,
27
+ RIGHT_ANGLE,
28
+ SLASH,
29
+ PERIOD,
30
+ LEFT_BRACKET,
31
+ LEFT_PAREN,
32
+ RIGHT_PAREN,
33
+ RIGHT_BRACKET,
34
+ A_UPPER,
35
+ Z_UPPER,
36
+ HR_HYPHEN_COUNT,
37
+ MIN_CODEBLOCK_BACKTICKS,
38
+ MAX_HEADING_LEVEL,
39
+ HTTPS_PREFIX_LENGTH,
40
+ HTTP_PREFIX_LENGTH,
41
+ is_at_absolute_path,
42
+ is_at_relative_path,
43
+ } from './mdz_helpers.js';
44
+
45
+ // ============================================================================
46
+ // Token types
47
+ // ============================================================================
48
+
49
+ export interface MdzTokenBase {
50
+ start: number;
51
+ end: number;
52
+ }
53
+
54
+ export type MdzToken =
55
+ | MdzTokenText
56
+ | MdzTokenCode
57
+ | MdzTokenCodeblock
58
+ | MdzTokenBoldOpen
59
+ | MdzTokenBoldClose
60
+ | MdzTokenItalicOpen
61
+ | MdzTokenItalicClose
62
+ | MdzTokenStrikethroughOpen
63
+ | MdzTokenStrikethroughClose
64
+ | MdzTokenLinkTextOpen
65
+ | MdzTokenLinkTextClose
66
+ | MdzTokenLinkRef
67
+ | MdzTokenAutolink
68
+ | MdzTokenHeadingStart
69
+ | MdzTokenHr
70
+ | MdzTokenTagOpen
71
+ | MdzTokenTagSelfClose
72
+ | MdzTokenTagClose
73
+ | MdzTokenHeadingEnd
74
+ | MdzTokenParagraphBreak;
75
+
76
+ export interface MdzTokenText extends MdzTokenBase {
77
+ type: 'text';
78
+ content: string;
79
+ }
80
+
81
+ export interface MdzTokenCode extends MdzTokenBase {
82
+ type: 'code';
83
+ content: string;
84
+ }
85
+
86
+ export interface MdzTokenCodeblock extends MdzTokenBase {
87
+ type: 'codeblock';
88
+ lang: string | null;
89
+ content: string;
90
+ }
91
+
92
+ export interface MdzTokenBoldOpen extends MdzTokenBase {
93
+ type: 'bold_open';
94
+ }
95
+
96
+ export interface MdzTokenBoldClose extends MdzTokenBase {
97
+ type: 'bold_close';
98
+ }
99
+
100
+ export interface MdzTokenItalicOpen extends MdzTokenBase {
101
+ type: 'italic_open';
102
+ }
103
+
104
+ export interface MdzTokenItalicClose extends MdzTokenBase {
105
+ type: 'italic_close';
106
+ }
107
+
108
+ export interface MdzTokenStrikethroughOpen extends MdzTokenBase {
109
+ type: 'strikethrough_open';
110
+ }
111
+
112
+ export interface MdzTokenStrikethroughClose extends MdzTokenBase {
113
+ type: 'strikethrough_close';
114
+ }
115
+
116
+ export interface MdzTokenLinkTextOpen extends MdzTokenBase {
117
+ type: 'link_text_open';
118
+ }
119
+
120
+ export interface MdzTokenLinkTextClose extends MdzTokenBase {
121
+ type: 'link_text_close';
122
+ }
123
+
124
+ export interface MdzTokenLinkRef extends MdzTokenBase {
125
+ type: 'link_ref';
126
+ reference: string;
127
+ link_type: 'external' | 'internal';
128
+ }
129
+
130
+ export interface MdzTokenAutolink extends MdzTokenBase {
131
+ type: 'autolink';
132
+ reference: string;
133
+ link_type: 'external' | 'internal';
134
+ }
135
+
136
+ export interface MdzTokenHeadingStart extends MdzTokenBase {
137
+ type: 'heading_start';
138
+ level: 1 | 2 | 3 | 4 | 5 | 6;
139
+ }
140
+
141
+ export interface MdzTokenHr extends MdzTokenBase {
142
+ type: 'hr';
143
+ }
144
+
145
+ export interface MdzTokenTagOpen extends MdzTokenBase {
146
+ type: 'tag_open';
147
+ name: string;
148
+ is_component: boolean;
149
+ }
150
+
151
+ export interface MdzTokenTagSelfClose extends MdzTokenBase {
152
+ type: 'tag_self_close';
153
+ name: string;
154
+ is_component: boolean;
155
+ }
156
+
157
+ export interface MdzTokenTagClose extends MdzTokenBase {
158
+ type: 'tag_close';
159
+ name: string;
160
+ }
161
+
162
+ export interface MdzTokenHeadingEnd extends MdzTokenBase {
163
+ type: 'heading_end';
164
+ }
165
+
166
+ export interface MdzTokenParagraphBreak extends MdzTokenBase {
167
+ type: 'paragraph_break';
168
+ }
169
+
170
+ // ============================================================================
171
+ // Lexer
172
+ // ============================================================================
173
+
174
+ export class MdzLexer {
175
+ #text: string;
176
+ #index: number = 0;
177
+ #tokens: Array<MdzToken> = [];
178
+ #max_search_index: number = Number.MAX_SAFE_INTEGER;
179
+
180
+ constructor(text: string) {
181
+ this.#text = text;
182
+ }
183
+
184
+ tokenize(): Array<MdzToken> {
185
+ // Skip leading newlines
186
+ this.#skip_newlines();
187
+
188
+ while (this.#index < this.#text.length) {
189
+ // Check for block elements at column 0
190
+ if (this.#is_at_column_0()) {
191
+ if (this.#tokenize_heading()) continue;
192
+ if (this.#tokenize_hr()) continue;
193
+ if (this.#tokenize_codeblock()) continue;
194
+ }
195
+
196
+ // Check for paragraph break
197
+ if (this.#is_at_paragraph_break()) {
198
+ const start = this.#index;
199
+ this.#skip_newlines();
200
+ this.#tokens.push({type: 'paragraph_break', start, end: this.#index});
201
+ continue;
202
+ }
203
+
204
+ // Inline tokenization
205
+ this.#tokenize_inline();
206
+ }
207
+
208
+ return this.#tokens;
209
+ }
210
+
211
+ #is_at_column_0(): boolean {
212
+ return this.#index === 0 || this.#text.charCodeAt(this.#index - 1) === NEWLINE;
213
+ }
214
+
215
+ #is_at_paragraph_break(): boolean {
216
+ return (
217
+ this.#index + 1 < this.#text.length &&
218
+ this.#text.charCodeAt(this.#index) === NEWLINE &&
219
+ this.#text.charCodeAt(this.#index + 1) === NEWLINE
220
+ );
221
+ }
222
+
223
+ #match(str: string): boolean {
224
+ return this.#text.startsWith(str, this.#index);
225
+ }
226
+
227
+ #skip_newlines(): void {
228
+ while (this.#index < this.#text.length && this.#text.charCodeAt(this.#index) === NEWLINE) {
229
+ this.#index++;
230
+ }
231
+ }
232
+
233
+ // -- Block tokenizers --
234
+
235
+ #tokenize_heading(): boolean {
236
+ let i = this.#index;
237
+
238
+ // Count hashes (must be 1-6)
239
+ let hash_count = 0;
240
+ while (
241
+ i < this.#text.length &&
242
+ this.#text.charCodeAt(i) === HASH &&
243
+ hash_count <= MAX_HEADING_LEVEL
244
+ ) {
245
+ hash_count++;
246
+ i++;
247
+ }
248
+
249
+ if (hash_count === 0 || hash_count > MAX_HEADING_LEVEL) return false;
250
+
251
+ // Must have space after hashes
252
+ if (i >= this.#text.length || this.#text.charCodeAt(i) !== SPACE) return false;
253
+ i++; // consume the space
254
+
255
+ // Must have at least one non-whitespace character after the space
256
+ let has_content = false;
257
+ for (let j = i; j < this.#text.length; j++) {
258
+ const char_code = this.#text.charCodeAt(j);
259
+ if (char_code === NEWLINE) break;
260
+ if (char_code !== SPACE && char_code !== TAB) {
261
+ has_content = true;
262
+ break;
263
+ }
264
+ }
265
+
266
+ if (!has_content) return false;
267
+
268
+ const start = this.#index;
269
+ // Advance past "## " (hashes + space)
270
+ this.#index = start + hash_count + 1;
271
+
272
+ this.#tokens.push({
273
+ type: 'heading_start',
274
+ level: hash_count as 1 | 2 | 3 | 4 | 5 | 6,
275
+ start,
276
+ end: this.#index, // end of "## " prefix
277
+ });
278
+
279
+ // Tokenize inline content until newline or EOF
280
+ // tokenize_text may consume a newline as part of block-element lookahead,
281
+ // so we check emitted text tokens for embedded newlines and trim.
282
+ while (this.#index < this.#text.length) {
283
+ if (this.#text.charCodeAt(this.#index) === NEWLINE) {
284
+ break;
285
+ }
286
+
287
+ const token_count_before = this.#tokens.length;
288
+ this.#tokenize_inline();
289
+
290
+ // Check if the last emitted token is text containing a newline
291
+ if (this.#tokens.length > token_count_before) {
292
+ const last_token = this.#tokens[this.#tokens.length - 1]!;
293
+ if (last_token.type === 'text') {
294
+ const newline_idx = last_token.content.indexOf('\n');
295
+ if (newline_idx !== -1) {
296
+ const trimmed = last_token.content.slice(0, newline_idx);
297
+ if (trimmed) {
298
+ last_token.content = trimmed;
299
+ last_token.end = last_token.start + trimmed.length;
300
+ } else {
301
+ this.#tokens.pop();
302
+ }
303
+ this.#index = last_token.start + newline_idx;
304
+ break;
305
+ }
306
+ }
307
+ }
308
+ }
309
+
310
+ // Emit heading_end marker so the token parser knows where heading content stops
311
+ this.#tokens.push({type: 'heading_end', start: this.#index, end: this.#index});
312
+
313
+ // Skip newlines after heading (like original parser's main loop)
314
+ this.#skip_newlines();
315
+
316
+ return true;
317
+ }
318
+
319
+ #tokenize_hr(): boolean {
320
+ let i = this.#index;
321
+
322
+ // Must have exactly three hyphens
323
+ if (
324
+ i + HR_HYPHEN_COUNT > this.#text.length ||
325
+ this.#text.charCodeAt(i) !== HYPHEN ||
326
+ this.#text.charCodeAt(i + 1) !== HYPHEN ||
327
+ this.#text.charCodeAt(i + 2) !== HYPHEN
328
+ ) {
329
+ return false;
330
+ }
331
+ i += HR_HYPHEN_COUNT;
332
+
333
+ // After the three hyphens, only whitespace and newline (or EOF) allowed
334
+ while (i < this.#text.length) {
335
+ const char_code = this.#text.charCodeAt(i);
336
+ if (char_code === NEWLINE) break;
337
+ if (char_code !== SPACE) return false;
338
+ i++;
339
+ }
340
+
341
+ const start = this.#index;
342
+ this.#index = i; // consume up to but not including newline
343
+
344
+ this.#tokens.push({type: 'hr', start, end: this.#index});
345
+
346
+ // Skip newlines after HR
347
+ this.#skip_newlines();
348
+
349
+ return true;
350
+ }
351
+
352
+ #tokenize_codeblock(): boolean {
353
+ let i = this.#index;
354
+
355
+ // Count backticks
356
+ let backtick_count = 0;
357
+ while (i < this.#text.length && this.#text.charCodeAt(i) === BACKTICK) {
358
+ backtick_count++;
359
+ i++;
360
+ }
361
+
362
+ if (backtick_count < MIN_CODEBLOCK_BACKTICKS) return false;
363
+
364
+ // Skip optional language hint
365
+ const lang_start = i;
366
+ while (i < this.#text.length) {
367
+ const char_code = this.#text.charCodeAt(i);
368
+ if (char_code === SPACE || char_code === NEWLINE) break;
369
+ i++;
370
+ }
371
+ const lang = i > lang_start ? this.#text.slice(lang_start, i) : null;
372
+
373
+ // Skip trailing spaces on opening fence line
374
+ while (i < this.#text.length && this.#text.charCodeAt(i) === SPACE) {
375
+ i++;
376
+ }
377
+
378
+ // Must have newline after opening fence
379
+ if (i >= this.#text.length || this.#text.charCodeAt(i) !== NEWLINE) return false;
380
+ i++; // consume the newline
381
+
382
+ const content_start = i;
383
+
384
+ // Search for closing fence
385
+ const closing_fence = '`'.repeat(backtick_count);
386
+ while (i < this.#text.length) {
387
+ if (this.#text.startsWith(closing_fence, i)) {
388
+ const prev_char = i > 0 ? this.#text.charCodeAt(i - 1) : NEWLINE;
389
+ if (prev_char === NEWLINE || i === 0) {
390
+ const content = this.#text.slice(content_start, i);
391
+ const final_content = content.endsWith('\n') ? content.slice(0, -1) : content;
392
+ if (final_content.length === 0) return false; // Empty code block
393
+
394
+ // Verify closing fence line
395
+ let j = i + backtick_count;
396
+ while (j < this.#text.length && this.#text.charCodeAt(j) === SPACE) {
397
+ j++;
398
+ }
399
+ if (j < this.#text.length && this.#text.charCodeAt(j) !== NEWLINE) {
400
+ return false;
401
+ }
402
+
403
+ const start = this.#index;
404
+ this.#index = j; // advance past closing fence and trailing spaces
405
+
406
+ this.#tokens.push({
407
+ type: 'codeblock',
408
+ lang,
409
+ content: final_content,
410
+ start,
411
+ end: this.#index,
412
+ });
413
+
414
+ // Skip newlines after codeblock
415
+ this.#skip_newlines();
416
+
417
+ return true;
418
+ }
419
+ }
420
+ i++;
421
+ }
422
+
423
+ return false; // No closing fence found
424
+ }
425
+
426
+ // -- Inline tokenizers --
427
+
428
+ #tokenize_inline(): void {
429
+ const char_code = this.#text.charCodeAt(this.#index);
430
+
431
+ switch (char_code) {
432
+ case BACKTICK:
433
+ this.#tokenize_code();
434
+ return;
435
+ case ASTERISK:
436
+ this.#tokenize_bold();
437
+ return;
438
+ case UNDERSCORE:
439
+ this.#tokenize_single_delimiter('_', 'italic');
440
+ return;
441
+ case TILDE:
442
+ this.#tokenize_single_delimiter('~', 'strikethrough');
443
+ return;
444
+ case LEFT_BRACKET:
445
+ this.#tokenize_markdown_link();
446
+ return;
447
+ case LEFT_ANGLE:
448
+ this.#tokenize_tag();
449
+ return;
450
+ default:
451
+ this.#tokenize_text();
452
+ return;
453
+ }
454
+ }
455
+
456
+ #tokenize_code(): void {
457
+ const start = this.#index;
458
+ this.#index++; // consume `
459
+
460
+ // Find closing backtick, stop at newline or search boundary
461
+ let content_end = -1;
462
+ const search_limit = Math.min(this.#max_search_index, this.#text.length);
463
+ for (let i = this.#index; i < search_limit; i++) {
464
+ const char_code = this.#text.charCodeAt(i);
465
+ if (char_code === BACKTICK) {
466
+ content_end = i;
467
+ break;
468
+ }
469
+ if (char_code === NEWLINE) break;
470
+ }
471
+
472
+ if (content_end === -1) {
473
+ // Unclosed backtick
474
+ this.#emit_text('`', start);
475
+ return;
476
+ }
477
+
478
+ const content = this.#text.slice(this.#index, content_end);
479
+
480
+ // Empty inline code
481
+ if (content.length === 0) {
482
+ this.#index = content_end + 1;
483
+ this.#emit_text('``', start);
484
+ return;
485
+ }
486
+
487
+ this.#index = content_end + 1;
488
+ this.#tokens.push({type: 'code', content, start, end: this.#index});
489
+ }
490
+
491
+ #tokenize_bold(): void {
492
+ const start = this.#index;
493
+
494
+ // Check for **
495
+ if (!this.#match('**')) {
496
+ // Single asterisk - text
497
+ this.#index++;
498
+ this.#emit_text('*', start);
499
+ return;
500
+ }
501
+
502
+ // Find closing ** within current search boundary
503
+ const search_end = Math.min(this.#max_search_index, this.#text.length);
504
+ let close_index = this.#text.indexOf('**', start + 2);
505
+ if (close_index !== -1 && close_index >= search_end) {
506
+ close_index = -1;
507
+ }
508
+ if (close_index === -1) {
509
+ // Unclosed **
510
+ this.#index += 2;
511
+ this.#emit_text('**', start);
512
+ return;
513
+ }
514
+
515
+ // Check for paragraph break between open and close
516
+ if (this.#has_paragraph_break_between(start + 2, close_index)) {
517
+ this.#index += 2;
518
+ this.#emit_text('**', start);
519
+ return;
520
+ }
521
+
522
+ // Emit bold_open, tokenize children up to close, emit bold_close
523
+ this.#index += 2;
524
+ const open_token_index = this.#tokens.length;
525
+ this.#tokens.push({type: 'bold_open', start, end: this.#index});
526
+
527
+ // Set search boundary for nested parsers
528
+ const saved_max = this.#max_search_index;
529
+ this.#max_search_index = close_index;
530
+
531
+ // Tokenize children inline content up to close_index
532
+ while (this.#index < close_index) {
533
+ if (this.#is_at_paragraph_break()) break;
534
+ this.#tokenize_inline();
535
+ }
536
+
537
+ this.#max_search_index = saved_max;
538
+
539
+ if (this.#index === close_index && this.#match('**')) {
540
+ // Check if empty
541
+ const has_children = open_token_index < this.#tokens.length - 1;
542
+
543
+ if (!has_children) {
544
+ // Empty bold - convert to text
545
+ this.#tokens.splice(open_token_index, 1);
546
+ this.#index = start;
547
+ this.#index += 4;
548
+ this.#emit_text('****', start);
549
+ return;
550
+ }
551
+
552
+ this.#tokens.push({type: 'bold_close', start: close_index, end: close_index + 2});
553
+ this.#index = close_index + 2;
554
+ } else {
555
+ // Didn't reach closing - convert opening to text
556
+ this.#tokens[open_token_index] = {type: 'text', content: '**', start, end: start + 2};
557
+ }
558
+ }
559
+
560
+ #tokenize_single_delimiter(delimiter: '_' | '~', kind: 'italic' | 'strikethrough'): void {
561
+ const start = this.#index;
562
+
563
+ // Check opening word boundary
564
+ if (!this.#is_at_word_boundary(this.#index, true, false)) {
565
+ this.#index++;
566
+ this.#emit_text(delimiter, start);
567
+ return;
568
+ }
569
+
570
+ // Find closing delimiter within search boundary
571
+ const search_end = Math.min(this.#max_search_index, this.#text.length);
572
+ let close_index = this.#text.indexOf(delimiter, start + 1);
573
+ if (close_index !== -1 && close_index >= search_end) {
574
+ close_index = -1;
575
+ }
576
+ if (close_index === -1) {
577
+ this.#index++;
578
+ this.#emit_text(delimiter, start);
579
+ return;
580
+ }
581
+
582
+ // Check closing word boundary
583
+ if (!this.#is_at_word_boundary(close_index + 1, false, true)) {
584
+ this.#index++;
585
+ this.#emit_text(delimiter, start);
586
+ return;
587
+ }
588
+
589
+ // Check for paragraph break between
590
+ if (this.#has_paragraph_break_between(start + 1, close_index)) {
591
+ this.#index++;
592
+ this.#emit_text(delimiter, start);
593
+ return;
594
+ }
595
+
596
+ // Emit open token
597
+ this.#index++;
598
+ const open_type = kind === 'italic' ? 'italic_open' : 'strikethrough_open';
599
+ const close_type = kind === 'italic' ? 'italic_close' : 'strikethrough_close';
600
+ const open_token_index = this.#tokens.length;
601
+ this.#tokens.push({type: open_type, start, end: this.#index} as MdzToken);
602
+
603
+ // Set search boundary for nested parsers
604
+ const saved_max = this.#max_search_index;
605
+ this.#max_search_index = close_index;
606
+
607
+ // Tokenize children up to close_index
608
+ while (this.#index < close_index) {
609
+ if (this.#is_at_paragraph_break()) break;
610
+ this.#tokenize_inline();
611
+ }
612
+
613
+ this.#max_search_index = saved_max;
614
+
615
+ if (this.#index === close_index && this.#match(delimiter)) {
616
+ // Check if empty
617
+ const has_children = open_token_index < this.#tokens.length - 1;
618
+
619
+ if (!has_children) {
620
+ // Empty - convert to text
621
+ this.#tokens.splice(open_token_index, 1);
622
+ this.#index = start;
623
+ this.#index += 2;
624
+ this.#emit_text(delimiter + delimiter, start);
625
+ return;
626
+ }
627
+
628
+ this.#tokens.push({
629
+ type: close_type,
630
+ start: close_index,
631
+ end: close_index + 1,
632
+ } as MdzToken);
633
+ this.#index = close_index + 1;
634
+ } else {
635
+ // Convert opening to text
636
+ this.#tokens[open_token_index] = {
637
+ type: 'text',
638
+ content: delimiter,
639
+ start,
640
+ end: start + 1,
641
+ };
642
+ }
643
+ }
644
+
645
+ #tokenize_markdown_link(): void {
646
+ const start = this.#index;
647
+
648
+ // Consume [
649
+ if (this.#text.charCodeAt(this.#index) !== LEFT_BRACKET) {
650
+ this.#index++;
651
+ this.#emit_text(this.#text[start]!, start);
652
+ return;
653
+ }
654
+ this.#index++;
655
+
656
+ // Emit link_text_open
657
+ this.#tokens.push({type: 'link_text_open', start, end: this.#index});
658
+
659
+ // Tokenize children until ]
660
+ while (this.#index < this.#text.length) {
661
+ if (this.#text.charCodeAt(this.#index) === RIGHT_BRACKET) break;
662
+ if (this.#is_at_paragraph_break()) break;
663
+ // Stop at ] and ) as delimiters
664
+ if (this.#text.charCodeAt(this.#index) === RIGHT_PAREN) break;
665
+ this.#tokenize_inline();
666
+ }
667
+
668
+ // Check for ]
669
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== RIGHT_BRACKET) {
670
+ // Revert - remove link_text_open and all children tokens added
671
+ this.#revert_tokens_from_link_open(start);
672
+ return;
673
+ }
674
+
675
+ const bracket_close_start = this.#index;
676
+ this.#index++; // consume ]
677
+ this.#tokens.push({
678
+ type: 'link_text_close',
679
+ start: bracket_close_start,
680
+ end: this.#index,
681
+ });
682
+
683
+ // Check for (
684
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== LEFT_PAREN) {
685
+ this.#revert_tokens_from_link_open(start);
686
+ return;
687
+ }
688
+ this.#index++; // consume (
689
+
690
+ // Find closing )
691
+ const close_paren = this.#text.indexOf(')', this.#index);
692
+ if (close_paren === -1) {
693
+ this.#revert_tokens_from_link_open(start);
694
+ return;
695
+ }
696
+
697
+ const reference = this.#text.slice(this.#index, close_paren);
698
+
699
+ // Validate reference
700
+ if (!reference.trim()) {
701
+ this.#revert_tokens_from_link_open(start);
702
+ return;
703
+ }
704
+
705
+ // Validate all characters
706
+ for (let i = 0; i < reference.length; i++) {
707
+ const char_code = reference.charCodeAt(i);
708
+ if (!is_valid_path_char(char_code)) {
709
+ this.#revert_tokens_from_link_open(start);
710
+ return;
711
+ }
712
+ }
713
+
714
+ this.#index = close_paren + 1;
715
+
716
+ const link_type = mdz_is_url(reference) ? 'external' : 'internal';
717
+
718
+ this.#tokens.push({
719
+ type: 'link_ref',
720
+ reference,
721
+ link_type,
722
+ start: bracket_close_start + 1, // after ]
723
+ end: this.#index,
724
+ });
725
+ }
726
+
727
+ #revert_tokens_from_link_open(start: number): void {
728
+ // Find and remove link_text_open and all tokens after it
729
+ let open_idx = -1;
730
+ for (let i = this.#tokens.length - 1; i >= 0; i--) {
731
+ if (this.#tokens[i]!.type === 'link_text_open' && this.#tokens[i]!.start === start) {
732
+ open_idx = i;
733
+ break;
734
+ }
735
+ }
736
+ if (open_idx !== -1) {
737
+ this.#tokens.splice(open_idx);
738
+ }
739
+ this.#index = start + 1;
740
+ this.#emit_text('[', start);
741
+ }
742
+
743
+ #tokenize_tag(): void {
744
+ const start = this.#index;
745
+ this.#index++; // consume <
746
+
747
+ // Tag name must start with a letter
748
+ if (this.#index >= this.#text.length || !is_letter(this.#text.charCodeAt(this.#index))) {
749
+ this.#emit_text('<', start);
750
+ return;
751
+ }
752
+
753
+ // Collect tag name
754
+ const tag_name_start = this.#index;
755
+ while (
756
+ this.#index < this.#text.length &&
757
+ is_tag_name_char(this.#text.charCodeAt(this.#index))
758
+ ) {
759
+ this.#index++;
760
+ }
761
+ const tag_name = this.#text.slice(tag_name_start, this.#index);
762
+
763
+ if (tag_name.length === 0) {
764
+ this.#emit_text('<', start);
765
+ return;
766
+ }
767
+
768
+ const first_char_code = tag_name.charCodeAt(0);
769
+ const is_component = first_char_code >= A_UPPER && first_char_code <= Z_UPPER;
770
+
771
+ // Skip whitespace
772
+ while (this.#index < this.#text.length && this.#text.charCodeAt(this.#index) === SPACE) {
773
+ this.#index++;
774
+ }
775
+
776
+ // Check for self-closing />
777
+ if (
778
+ this.#index + 1 < this.#text.length &&
779
+ this.#text.charCodeAt(this.#index) === SLASH &&
780
+ this.#text.charCodeAt(this.#index + 1) === RIGHT_ANGLE
781
+ ) {
782
+ this.#index += 2;
783
+ this.#tokens.push({
784
+ type: 'tag_self_close',
785
+ name: tag_name,
786
+ is_component,
787
+ start,
788
+ end: this.#index,
789
+ });
790
+ return;
791
+ }
792
+
793
+ // Check for >
794
+ if (this.#index >= this.#text.length || this.#text.charCodeAt(this.#index) !== RIGHT_ANGLE) {
795
+ this.#index = start + 1;
796
+ this.#emit_text('<', start);
797
+ return;
798
+ }
799
+ this.#index++; // consume >
800
+
801
+ // Check for closing tag existence before committing
802
+ const closing_tag = `</${tag_name}>`;
803
+ const closing_tag_pos = this.#text.indexOf(closing_tag, this.#index);
804
+ if (closing_tag_pos === -1) {
805
+ // No closing tag - revert
806
+ this.#index = start + 1;
807
+ this.#emit_text('<', start);
808
+ return;
809
+ }
810
+
811
+ // Emit tag_open
812
+ this.#tokens.push({type: 'tag_open', name: tag_name, is_component, start, end: this.#index});
813
+
814
+ // Tokenize children until closing tag
815
+ while (this.#index < this.#text.length) {
816
+ if (this.#match(closing_tag)) {
817
+ const close_start = this.#index;
818
+ this.#index += closing_tag.length;
819
+ this.#tokens.push({
820
+ type: 'tag_close',
821
+ name: tag_name,
822
+ start: close_start,
823
+ end: this.#index,
824
+ });
825
+ return;
826
+ }
827
+ this.#tokenize_inline();
828
+ }
829
+
830
+ // Shouldn't reach here since we verified closing tag exists
831
+ // But if we do, the tag_open token stays and parser will handle it
832
+ }
833
+
834
+ #tokenize_text(): void {
835
+ const start = this.#index;
836
+
837
+ // Check for URL or internal path at current position
838
+ if (this.#is_at_url()) {
839
+ this.#tokenize_auto_link_url();
840
+ return;
841
+ }
842
+ if (is_at_absolute_path(this.#text, this.#index)) {
843
+ this.#tokenize_auto_link_internal();
844
+ return;
845
+ }
846
+ if (is_at_relative_path(this.#text, this.#index)) {
847
+ this.#tokenize_auto_link_internal();
848
+ return;
849
+ }
850
+
851
+ while (this.#index < this.#text.length) {
852
+ const char_code = this.#text.charCodeAt(this.#index);
853
+
854
+ // Stop at special characters
855
+ if (
856
+ char_code === BACKTICK ||
857
+ char_code === ASTERISK ||
858
+ char_code === UNDERSCORE ||
859
+ char_code === TILDE ||
860
+ char_code === LEFT_BRACKET ||
861
+ char_code === RIGHT_BRACKET ||
862
+ char_code === RIGHT_PAREN ||
863
+ char_code === LEFT_ANGLE
864
+ ) {
865
+ break;
866
+ }
867
+
868
+ // Check for paragraph break
869
+ if (this.#is_at_paragraph_break()) break;
870
+
871
+ // When next line could start a block element, consume the newline and stop
872
+ if (char_code === NEWLINE) {
873
+ const next_i = this.#index + 1;
874
+ if (next_i < this.#text.length) {
875
+ const next_char = this.#text.charCodeAt(next_i);
876
+ if (next_char === HASH || next_char === HYPHEN || next_char === BACKTICK) {
877
+ this.#index++; // consume the newline
878
+ break;
879
+ }
880
+ }
881
+ }
882
+
883
+ // Check for URL or internal path mid-text (char code guard avoids startsWith on every char)
884
+ if (
885
+ (char_code === 104 /* h */ && this.#is_at_url()) ||
886
+ (char_code === SLASH && is_at_absolute_path(this.#text, this.#index)) ||
887
+ (char_code === PERIOD && is_at_relative_path(this.#text, this.#index))
888
+ ) {
889
+ break;
890
+ }
891
+
892
+ this.#index++;
893
+ }
894
+
895
+ // Ensure we always consume at least one character
896
+ if (this.#index === start && this.#index < this.#text.length) {
897
+ this.#index++;
898
+ }
899
+
900
+ const content = this.#text.slice(start, this.#index);
901
+ this.#emit_text(content, start);
902
+ }
903
+
904
+ // -- Auto-link tokenizers --
905
+
906
+ #tokenize_auto_link_url(): void {
907
+ const start = this.#index;
908
+
909
+ // Consume protocol
910
+ if (this.#match('https://')) {
911
+ this.#index += HTTPS_PREFIX_LENGTH;
912
+ } else if (this.#match('http://')) {
913
+ this.#index += HTTP_PREFIX_LENGTH;
914
+ }
915
+
916
+ // Collect URL characters
917
+ while (this.#index < this.#text.length) {
918
+ const char_code = this.#text.charCodeAt(this.#index);
919
+ if (char_code === SPACE || char_code === NEWLINE || !is_valid_path_char(char_code)) {
920
+ break;
921
+ }
922
+ this.#index++;
923
+ }
924
+
925
+ let reference = this.#text.slice(start, this.#index);
926
+ reference = trim_trailing_punctuation(reference);
927
+ this.#index = start + reference.length;
928
+
929
+ this.#tokens.push({
930
+ type: 'autolink',
931
+ reference,
932
+ link_type: 'external',
933
+ start,
934
+ end: this.#index,
935
+ });
936
+ }
937
+
938
+ #tokenize_auto_link_internal(): void {
939
+ const start = this.#index;
940
+
941
+ // Collect path characters
942
+ while (this.#index < this.#text.length) {
943
+ const char_code = this.#text.charCodeAt(this.#index);
944
+ if (char_code === SPACE || char_code === NEWLINE || !is_valid_path_char(char_code)) {
945
+ break;
946
+ }
947
+ this.#index++;
948
+ }
949
+
950
+ let reference = this.#text.slice(start, this.#index);
951
+ reference = trim_trailing_punctuation(reference);
952
+ this.#index = start + reference.length;
953
+
954
+ this.#tokens.push({
955
+ type: 'autolink',
956
+ reference,
957
+ link_type: 'internal',
958
+ start,
959
+ end: this.#index,
960
+ });
961
+ }
962
+
963
+ // -- Helper methods --
964
+
965
+ #emit_text(content: string, start: number): void {
966
+ this.#tokens.push({type: 'text', content, start, end: start + content.length});
967
+ }
968
+
969
+ #has_paragraph_break_between(from: number, to: number): boolean {
970
+ for (let i = from; i < to - 1; i++) {
971
+ if (this.#text.charCodeAt(i) === NEWLINE && this.#text.charCodeAt(i + 1) === NEWLINE) {
972
+ return true;
973
+ }
974
+ }
975
+ return false;
976
+ }
977
+
978
+ #is_at_url(): boolean {
979
+ if (this.#match('https://')) {
980
+ if (this.#index + HTTPS_PREFIX_LENGTH >= this.#text.length) return false;
981
+ const next_char = this.#text.charCodeAt(this.#index + HTTPS_PREFIX_LENGTH);
982
+ return next_char !== SPACE && next_char !== NEWLINE;
983
+ }
984
+ if (this.#match('http://')) {
985
+ if (this.#index + HTTP_PREFIX_LENGTH >= this.#text.length) return false;
986
+ const next_char = this.#text.charCodeAt(this.#index + HTTP_PREFIX_LENGTH);
987
+ return next_char !== SPACE && next_char !== NEWLINE;
988
+ }
989
+ return false;
990
+ }
991
+
992
+ #is_at_word_boundary(index: number, check_before: boolean, check_after: boolean): boolean {
993
+ if (check_before && index > 0) {
994
+ const prev = this.#text.charCodeAt(index - 1);
995
+ if (is_word_char(prev)) return false;
996
+ }
997
+ if (check_after && index < this.#text.length) {
998
+ const next = this.#text.charCodeAt(index);
999
+ if (is_word_char(next)) return false;
1000
+ }
1001
+ return true;
1002
+ }
1003
+ }