@mlightcad/mtext-parser 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.ts ADDED
@@ -0,0 +1,1906 @@
1
+ /**
2
+ * Token types used in MText parsing
3
+ */
4
+ export enum TokenType {
5
+ /** No token */
6
+ NONE = 0,
7
+ /** Word token with string data */
8
+ WORD = 1,
9
+ /** Stack token with [numerator, denominator, type] data */
10
+ STACK = 2,
11
+ /** Space token with no data */
12
+ SPACE = 3,
13
+ /** Non-breaking space token with no data */
14
+ NBSP = 4,
15
+ /** Tab token with no data */
16
+ TABULATOR = 5,
17
+ /** New paragraph token with no data */
18
+ NEW_PARAGRAPH = 6,
19
+ /** New column token with no data */
20
+ NEW_COLUMN = 7,
21
+ /** Wrap at dimension line token with no data */
22
+ WRAP_AT_DIMLINE = 8,
23
+ /** Properties changed token with string data (full command) */
24
+ PROPERTIES_CHANGED = 9,
25
+ }
26
+
27
+ /**
28
+ * Represents a factor value that can be either absolute or relative.
29
+ * Used for properties like height, width, and character tracking in MText formatting.
30
+ */
31
+ export interface FactorValue {
32
+ /** The numeric value of the factor */
33
+ value: number;
34
+ /** Whether the value is relative (true) or absolute (false) */
35
+ isRelative: boolean;
36
+ }
37
+
38
+ /**
39
+ * Format properties of MText word tokens.
40
+ * This interface defines all the formatting properties that can be applied to MText content,
41
+ * including text styling, colors, alignment, font properties, and paragraph formatting.
42
+ */
43
+ export interface Properties {
44
+ /** Whether text is underlined */
45
+ underline?: boolean;
46
+ /** Whether text has an overline */
47
+ overline?: boolean;
48
+ /** Whether text has strike-through */
49
+ strikeThrough?: boolean;
50
+ /** AutoCAD Color Index (ACI) color value (0-256), or null if not set */
51
+ aci?: number | null;
52
+ /** RGB color tuple [r, g, b], or null if not set */
53
+ rgb?: RGB | null;
54
+ /** Line alignment for the text */
55
+ align?: MTextLineAlignment;
56
+ /** Font face properties including family, style, and weight */
57
+ fontFace?: FontFace;
58
+ /** Capital letter height factor (can be relative or absolute) */
59
+ capHeight?: FactorValue;
60
+ /** Character width factor (can be relative or absolute) */
61
+ widthFactor?: FactorValue;
62
+ /** Character tracking factor for spacing between characters (can be relative or absolute) */
63
+ charTrackingFactor?: FactorValue;
64
+ /** Oblique angle in degrees for text slant */
65
+ oblique?: number;
66
+ /** Paragraph formatting properties (partial to allow selective updates) */
67
+ paragraph?: Partial<ParagraphProperties>;
68
+ }
69
+
70
+ /**
71
+ * Represents a change in MText properties, including the command, the changed properties, and the context depth.
72
+ */
73
+ export interface ChangedProperties {
74
+ /**
75
+ * The property command that triggered the change (e.g., 'L', 'C', 'f').
76
+ * The command will be undefined if it is to restore context.
77
+ */
78
+ command: string | undefined;
79
+ /**
80
+ * The set of properties that have changed as a result of the command.
81
+ */
82
+ changes: Properties;
83
+ /**
84
+ * The current context stack depth when the property change occurs.
85
+ * - 0: The change is global (applies outside of any `{}` block).
86
+ * - >0: The change is local (applies within one or more nested `{}` blocks).
87
+ */
88
+ depth: number; // 0 = global, >0 = local
89
+ }
90
+
91
+ /**
92
+ * Type for token data based on token type
93
+ */
94
+ export type TokenData = {
95
+ [TokenType.NONE]: null;
96
+ [TokenType.WORD]: string;
97
+ [TokenType.STACK]: [string, string, string];
98
+ [TokenType.SPACE]: null;
99
+ [TokenType.NBSP]: null;
100
+ [TokenType.TABULATOR]: null;
101
+ [TokenType.NEW_PARAGRAPH]: null;
102
+ [TokenType.NEW_COLUMN]: null;
103
+ [TokenType.WRAP_AT_DIMLINE]: null;
104
+ [TokenType.PROPERTIES_CHANGED]: ChangedProperties;
105
+ };
106
+
107
+ /**
108
+ * Line alignment options for MText
109
+ */
110
+ export enum MTextLineAlignment {
111
+ /** Align text to bottom */
112
+ BOTTOM = 0,
113
+ /** Align text to middle */
114
+ MIDDLE = 1,
115
+ /** Align text to top */
116
+ TOP = 2,
117
+ }
118
+
119
+ /**
120
+ * Paragraph alignment options for MText
121
+ */
122
+ export enum MTextParagraphAlignment {
123
+ /** Default alignment */
124
+ DEFAULT = 0,
125
+ /** Left alignment */
126
+ LEFT = 1,
127
+ /** Right alignment */
128
+ RIGHT = 2,
129
+ /** Center alignment */
130
+ CENTER = 3,
131
+ /** Justified alignment */
132
+ JUSTIFIED = 4,
133
+ /** Distributed alignment */
134
+ DISTRIBUTED = 5,
135
+ }
136
+
137
+ /**
138
+ * Text stroke options for MText
139
+ */
140
+ export enum MTextStroke {
141
+ /** No stroke */
142
+ NONE = 0,
143
+ /** Underline stroke */
144
+ UNDERLINE = 1,
145
+ /** Overline stroke */
146
+ OVERLINE = 2,
147
+ /** Strike-through stroke */
148
+ STRIKE_THROUGH = 4,
149
+ }
150
+
151
+ /**
152
+ * RGB color tuple
153
+ */
154
+ export type RGB = [number, number, number];
155
+
156
+ /**
157
+ * Font style type
158
+ */
159
+ export type FontStyle = 'Regular' | 'Italic';
160
+
161
+ /**
162
+ * Font face properties
163
+ */
164
+ export interface FontFace {
165
+ /** Font family name */
166
+ family: string;
167
+ /** Font style (e.g., 'Regular', 'Italic') */
168
+ style: FontStyle;
169
+ /** Font weight (e.g., 400 for normal, 700 for bold) */
170
+ weight: number;
171
+ }
172
+
173
+ /**
174
+ * Paragraph properties
175
+ */
176
+ export interface ParagraphProperties {
177
+ /** Indentation value */
178
+ indent: number;
179
+ /** Left margin value */
180
+ left: number;
181
+ /** Right margin value */
182
+ right: number;
183
+ /** Paragraph alignment */
184
+ align: MTextParagraphAlignment;
185
+ /** Tab stop positions and types */
186
+ tabs: (number | string)[];
187
+ }
188
+
189
+ /**
190
+ * Special character encoding mapping
191
+ */
192
+ const SPECIAL_CHAR_ENCODING: Record<string, string> = {
193
+ c: 'Ø',
194
+ d: '°',
195
+ p: '±',
196
+ '%': '%',
197
+ };
198
+
199
+ /**
200
+ * Character to paragraph alignment mapping
201
+ */
202
+ const CHAR_TO_ALIGN: Record<string, MTextParagraphAlignment> = {
203
+ l: MTextParagraphAlignment.LEFT,
204
+ r: MTextParagraphAlignment.RIGHT,
205
+ c: MTextParagraphAlignment.CENTER,
206
+ j: MTextParagraphAlignment.JUSTIFIED,
207
+ d: MTextParagraphAlignment.DISTRIBUTED,
208
+ };
209
+
210
+ /**
211
+ * Convert RGB tuple to integer color value
212
+ * @param rgb - RGB color tuple
213
+ * @returns Integer color value
214
+ */
215
+ export function rgb2int(rgb: RGB): number {
216
+ const [r, g, b] = rgb;
217
+ return (r << 16) | (g << 8) | b;
218
+ }
219
+
220
+ /**
221
+ * Convert integer color value to RGB tuple
222
+ * @param value - Integer color value
223
+ * @returns RGB color tuple
224
+ */
225
+ export function int2rgb(value: number): RGB {
226
+ const r = (value >> 16) & 0xff;
227
+ const g = (value >> 8) & 0xff;
228
+ const b = value & 0xff;
229
+ return [r, g, b];
230
+ }
231
+
232
+ /**
233
+ * Escape DXF line endings
234
+ * @param text - Text to escape
235
+ * @returns Escaped text
236
+ */
237
+ export function escapeDxfLineEndings(text: string): string {
238
+ return text.replace(/\r\n|\r|\n/g, '\\P');
239
+ }
240
+
241
+ /**
242
+ * Check if text contains inline formatting codes
243
+ * @param text - Text to check
244
+ * @returns True if text contains formatting codes
245
+ */
246
+ export function hasInlineFormattingCodes(text: string): boolean {
247
+ return text.replace(/\\P/g, '').replace(/\\~/g, '').includes('\\');
248
+ }
249
+
250
+ /**
251
+ * Extracts all unique font names used in an MText string.
252
+ * This function searches for font commands in the format \f{fontname}| or \f{fontname}; and returns a set of unique font names.
253
+ * Font names are converted to lowercase to ensure case-insensitive uniqueness.
254
+ *
255
+ * @param mtext - The MText string to analyze for font names
256
+ * @param removeExtension - Whether to remove font file extensions (e.g., .ttf, .shx) from font names. Defaults to false.
257
+ * @returns A Set containing all unique font names found in the MText string, converted to lowercase
258
+ * @example
259
+ * ```ts
260
+ * const mtext = "\\fArial.ttf|Hello\\fTimes New Roman.otf|World";
261
+ * const fonts = getFonts(mtext, true);
262
+ * // Returns: Set(2) { "arial", "times new roman" }
263
+ * ```
264
+ */
265
+ export function getFonts(mtext: string, removeExtension: boolean = false) {
266
+ const fonts: Set<string> = new Set();
267
+ const regex = /\\[fF](.*?)[;|]/g;
268
+
269
+ [...mtext.matchAll(regex)].forEach(match => {
270
+ let fontName = match[1].toLowerCase();
271
+ if (removeExtension) {
272
+ fontName = fontName.replace(/\.(ttf|otf|woff|shx)$/, '');
273
+ }
274
+ fonts.add(fontName);
275
+ });
276
+
277
+ return fonts;
278
+ }
279
+
280
+ /**
281
+ * ContextStack manages a stack of MTextContext objects for character-level formatting.
282
+ *
283
+ * - Character-level formatting (underline, color, font, etc.) is scoped to `{}` blocks and managed by the stack.
284
+ * - Paragraph-level formatting (\p) is not scoped, but when a block ends, any paragraph property changes are merged into the parent context.
285
+ * - On pop, paragraph properties from the popped context are always merged into the new top context.
286
+ */
287
+ class ContextStack {
288
+ private stack: MTextContext[] = [];
289
+
290
+ /**
291
+ * Creates a new ContextStack with an initial context.
292
+ * @param initial The initial MTextContext to use as the base of the stack.
293
+ */
294
+ constructor(initial: MTextContext) {
295
+ this.stack.push(initial);
296
+ }
297
+
298
+ /**
299
+ * Pushes a copy of the given context onto the stack.
300
+ * @param ctx The MTextContext to push (copied).
301
+ */
302
+ push(ctx: MTextContext) {
303
+ this.stack.push(ctx);
304
+ }
305
+
306
+ /**
307
+ * Pops the top context from the stack and merges its paragraph properties into the new top context.
308
+ * If only one context remains, nothing is popped.
309
+ * @returns The popped MTextContext, or undefined if the stack has only one context.
310
+ */
311
+ pop(): MTextContext | undefined {
312
+ if (this.stack.length <= 1) return undefined;
313
+ const popped = this.stack.pop()!;
314
+ // Merge paragraph properties into the new top context
315
+ const top = this.stack[this.stack.length - 1];
316
+ if (JSON.stringify(top.paragraph) !== JSON.stringify(popped.paragraph)) {
317
+ top.paragraph = { ...popped.paragraph };
318
+ }
319
+ return popped;
320
+ }
321
+
322
+ /**
323
+ * Returns the current (top) context on the stack.
324
+ */
325
+ get current(): MTextContext {
326
+ return this.stack[this.stack.length - 1];
327
+ }
328
+
329
+ /**
330
+ * Returns the current stack depth (number of nested blocks), not counting the root context.
331
+ */
332
+ get depth(): number {
333
+ return this.stack.length - 1;
334
+ }
335
+
336
+ /**
337
+ * Returns the root (bottom) context, which represents the global formatting state.
338
+ * Used for paragraph property application.
339
+ */
340
+ get root(): MTextContext {
341
+ return this.stack[0];
342
+ }
343
+
344
+ /**
345
+ * Replaces the current (top) context with the given context.
346
+ * @param ctx The new context to set as the current context.
347
+ */
348
+ setCurrent(ctx: MTextContext) {
349
+ this.stack[this.stack.length - 1] = ctx;
350
+ }
351
+ }
352
+
353
+ /**
354
+ * Configuration options for the MText parser.
355
+ * These options control how the parser behaves during tokenization and property handling.
356
+ */
357
+ export interface MTextParserOptions {
358
+ /**
359
+ * Whether to yield PROPERTIES_CHANGED tokens when formatting properties change.
360
+ * When true, the parser will emit tokens whenever properties like color, font, or alignment change.
361
+ * When false, property changes are applied silently to the context without generating tokens.
362
+ * @default false
363
+ */
364
+ yieldPropertyCommands?: boolean;
365
+ /**
366
+ * Whether to reset paragraph parameters when encountering a new paragraph token.
367
+ * When true, paragraph properties (indent, margins, alignment, tab stops) are reset to defaults
368
+ * at the start of each new paragraph.
369
+ * @default false
370
+ */
371
+ resetParagraphParameters?: boolean;
372
+ /**
373
+ * Custom decoder function for MIF (Multibyte Interchange Format) codes.
374
+ * If provided, this function will be used instead of the default decodeMultiByteChar.
375
+ * The function receives the hex code string and should return the decoded character.
376
+ * @param hex - Hex code string (e.g., "C4E3" or "1A2B3")
377
+ * @returns Decoded character or empty square (▯) if invalid
378
+ * @default undefined (uses default decoder)
379
+ */
380
+ mifDecoder?: (hex: string) => string;
381
+ /**
382
+ * The length of MIF hex codes to parse. MIF codes in AutoCAD can vary in length
383
+ * depending on the specific SHX big font used (typically 4 or 5 digits).
384
+ * If not specified, the parser will try to auto-detect the length by attempting
385
+ * to match 4 digits first, then 5 digits if needed.
386
+ * @default undefined (auto-detect)
387
+ */
388
+ mifCodeLength?: 4 | 5 | 'auto';
389
+ }
390
+
391
+ /**
392
+ * Main parser class for MText content
393
+ */
394
+ export class MTextParser {
395
+ private scanner: TextScanner;
396
+ private ctxStack: ContextStack;
397
+ private continueStroke: boolean = false;
398
+ private yieldPropertyCommands: boolean;
399
+ private resetParagraphParameters: boolean;
400
+ private inStackContext: boolean = false;
401
+ private mifDecoder: (hex: string) => string;
402
+ private mifCodeLength: 4 | 5 | 'auto';
403
+
404
+ /**
405
+ * Creates a new MTextParser instance
406
+ * @param content - The MText content to parse
407
+ * @param ctx - Optional initial MText context
408
+ * @param options - Parser options
409
+ */
410
+ constructor(content: string, ctx?: MTextContext, options: MTextParserOptions = {}) {
411
+ this.scanner = new TextScanner(content);
412
+ const initialCtx = ctx ?? new MTextContext();
413
+ this.ctxStack = new ContextStack(initialCtx);
414
+ this.yieldPropertyCommands = options.yieldPropertyCommands ?? false;
415
+ this.resetParagraphParameters = options.resetParagraphParameters ?? false;
416
+ this.mifDecoder = options.mifDecoder ?? this.decodeMultiByteChar.bind(this);
417
+ this.mifCodeLength = options.mifCodeLength ?? 'auto';
418
+ }
419
+
420
+ /**
421
+ * Decode multi-byte character from hex code
422
+ * @param hex - Hex code string (e.g. "C4E3" or "1A2B3")
423
+ * @returns Decoded character or empty square if invalid
424
+ */
425
+ private decodeMultiByteChar(hex: string): string {
426
+ try {
427
+ // For 5-digit codes, return placeholder directly
428
+ if (hex.length === 5) {
429
+ const prefix = hex[0];
430
+
431
+ // Notes:
432
+ // I know AutoCAD uses prefix 1 for Shift-JIS, 2 for big5, and 5 for gbk.
433
+ // But I don't know whether there are other prefixes and their meanings.
434
+ let encoding = 'gbk';
435
+ if (prefix === '1') {
436
+ encoding = 'shift-jis';
437
+ } else if (prefix === '2') {
438
+ encoding = 'big5';
439
+ }
440
+ const bytes = new Uint8Array([
441
+ parseInt(hex.substr(1, 2), 16),
442
+ parseInt(hex.substr(3, 2), 16),
443
+ ]);
444
+ const decoder = new TextDecoder(encoding);
445
+ const result = decoder.decode(bytes);
446
+ return result;
447
+ } else if (hex.length === 4) {
448
+ // For 4-digit hex codes, decode as 2-byte character
449
+ const bytes = new Uint8Array([
450
+ parseInt(hex.substr(0, 2), 16),
451
+ parseInt(hex.substr(2, 2), 16),
452
+ ]);
453
+
454
+ // Try GBK first
455
+ const gbkDecoder = new TextDecoder('gbk');
456
+ const gbkResult = gbkDecoder.decode(bytes);
457
+ if (gbkResult !== '▯') {
458
+ return gbkResult;
459
+ }
460
+
461
+ // Try BIG5 if GBK fails
462
+ const big5Decoder = new TextDecoder('big5');
463
+ const big5Result = big5Decoder.decode(bytes);
464
+ if (big5Result !== '▯') {
465
+ return big5Result;
466
+ }
467
+ }
468
+
469
+ return '▯';
470
+ } catch {
471
+ return '▯';
472
+ }
473
+ }
474
+
475
+ /**
476
+ * Extract MIF hex code from scanner
477
+ * @param length - The length of the hex code to extract (4 or 5), or 'auto' to detect
478
+ * @returns The extracted hex code, or null if not found
479
+ */
480
+ private extractMifCode(length: 4 | 5 | 'auto'): string | null {
481
+ if (length === 'auto') {
482
+ // Try 5 digits first if available, then fall back to 4 digits
483
+ const code5 = this.scanner.tail.match(/^[0-9A-Fa-f]{5}/)?.[0];
484
+ if (code5) {
485
+ return code5;
486
+ }
487
+ const code4 = this.scanner.tail.match(/^[0-9A-Fa-f]{4}/)?.[0];
488
+ if (code4) {
489
+ return code4;
490
+ }
491
+ return null;
492
+ } else {
493
+ const code = this.scanner.tail.match(new RegExp(`^[0-9A-Fa-f]{${length}}`))?.[0];
494
+ return code ?? null;
495
+ }
496
+ }
497
+
498
+ /**
499
+ * Push current context onto the stack
500
+ */
501
+ private pushCtx(): void {
502
+ this.ctxStack.push(this.ctxStack.current);
503
+ }
504
+
505
+ /**
506
+ * Pop context from the stack
507
+ */
508
+ private popCtx(): void {
509
+ this.ctxStack.pop();
510
+ }
511
+
512
+ /**
513
+ * Parse stacking expression (numerator/denominator)
514
+ * @returns Tuple of [TokenType.STACK, [numerator, denominator, type]]
515
+ */
516
+ private parseStacking(): [TokenType, [string, string, string]] {
517
+ const scanner = new TextScanner(this.extractExpression(true));
518
+ let numerator = '';
519
+ let denominator = '';
520
+ let stackingType = '';
521
+
522
+ const getNextChar = (): [string, boolean] => {
523
+ let c = scanner.peek();
524
+ let escape = false;
525
+ if (c.charCodeAt(0) < 32) {
526
+ c = ' ';
527
+ }
528
+ if (c === '\\') {
529
+ escape = true;
530
+ scanner.consume(1);
531
+ c = scanner.peek();
532
+ }
533
+ scanner.consume(1);
534
+ return [c, escape];
535
+ };
536
+
537
+ const parseNumerator = (): [string, string] => {
538
+ let word = '';
539
+ while (scanner.hasData) {
540
+ const [c, escape] = getNextChar();
541
+ // Check for stacking operators first
542
+ if (!escape && (c === '/' || c === '#' || c === '^')) {
543
+ return [word, c];
544
+ }
545
+ word += c;
546
+ }
547
+ return [word, ''];
548
+ };
549
+
550
+ const parseDenominator = (skipLeadingSpace: boolean): string => {
551
+ let word = '';
552
+ let skipping = skipLeadingSpace;
553
+ while (scanner.hasData) {
554
+ const [c, escape] = getNextChar();
555
+ if (skipping && c === ' ') {
556
+ continue;
557
+ }
558
+ skipping = false;
559
+ // Stop at terminator unless escaped
560
+ if (!escape && c === ';') {
561
+ break;
562
+ }
563
+ word += c;
564
+ }
565
+ return word;
566
+ };
567
+
568
+ [numerator, stackingType] = parseNumerator();
569
+ if (stackingType) {
570
+ // Only skip leading space for caret divider
571
+ denominator = parseDenominator(stackingType === '^');
572
+ }
573
+
574
+ // Special case for \S^!/^?;
575
+ if (numerator === '' && denominator.includes('I/')) {
576
+ return [TokenType.STACK, [' ', ' ', '/']];
577
+ }
578
+
579
+ // Handle caret as a stacking operator
580
+ if (stackingType === '^') {
581
+ return [TokenType.STACK, [numerator, denominator, '^']];
582
+ }
583
+
584
+ return [TokenType.STACK, [numerator, denominator, stackingType]];
585
+ }
586
+
587
+ /**
588
+ * Parse MText properties
589
+ * @param cmd - The property command to parse
590
+ * @returns Property changes if yieldPropertyCommands is true and changes occurred
591
+ */
592
+ private parseProperties(cmd: string): TokenData[TokenType.PROPERTIES_CHANGED] | void {
593
+ const prevCtx = this.ctxStack.current.copy();
594
+ const newCtx = this.ctxStack.current.copy();
595
+ switch (cmd) {
596
+ case 'L':
597
+ newCtx.underline = true;
598
+ this.continueStroke = true;
599
+ break;
600
+ case 'l':
601
+ newCtx.underline = false;
602
+ if (!newCtx.hasAnyStroke) {
603
+ this.continueStroke = false;
604
+ }
605
+ break;
606
+ case 'O':
607
+ newCtx.overline = true;
608
+ this.continueStroke = true;
609
+ break;
610
+ case 'o':
611
+ newCtx.overline = false;
612
+ if (!newCtx.hasAnyStroke) {
613
+ this.continueStroke = false;
614
+ }
615
+ break;
616
+ case 'K':
617
+ newCtx.strikeThrough = true;
618
+ this.continueStroke = true;
619
+ break;
620
+ case 'k':
621
+ newCtx.strikeThrough = false;
622
+ if (!newCtx.hasAnyStroke) {
623
+ this.continueStroke = false;
624
+ }
625
+ break;
626
+ case 'A':
627
+ this.parseAlign(newCtx);
628
+ break;
629
+ case 'C':
630
+ this.parseAciColor(newCtx);
631
+ break;
632
+ case 'c':
633
+ this.parseRgbColor(newCtx);
634
+ break;
635
+ case 'H':
636
+ this.parseHeight(newCtx);
637
+ break;
638
+ case 'W':
639
+ this.parseWidth(newCtx);
640
+ break;
641
+ case 'Q':
642
+ this.parseOblique(newCtx);
643
+ break;
644
+ case 'T':
645
+ this.parseCharTracking(newCtx);
646
+ break;
647
+ case 'p':
648
+ this.parseParagraphProperties(newCtx);
649
+ break;
650
+ case 'f':
651
+ case 'F':
652
+ this.parseFontProperties(newCtx);
653
+ break;
654
+ default:
655
+ throw new Error(`Unknown command: ${cmd}`);
656
+ }
657
+
658
+ // Update continueStroke based on current stroke state
659
+ this.continueStroke = newCtx.hasAnyStroke;
660
+ newCtx.continueStroke = this.continueStroke;
661
+ // Use setCurrent to replace the current context
662
+ this.ctxStack.setCurrent(newCtx);
663
+
664
+ if (this.yieldPropertyCommands) {
665
+ const changes = this.getPropertyChanges(prevCtx, newCtx);
666
+ if (Object.keys(changes).length > 0) {
667
+ return {
668
+ command: cmd,
669
+ changes,
670
+ depth: this.ctxStack.depth,
671
+ };
672
+ }
673
+ }
674
+ }
675
+
676
+ /**
677
+ * Get property changes between two contexts
678
+ * @param oldCtx - The old context
679
+ * @param newCtx - The new context
680
+ * @returns Object containing changed properties
681
+ */
682
+ private getPropertyChanges(
683
+ oldCtx: MTextContext,
684
+ newCtx: MTextContext
685
+ ): TokenData[TokenType.PROPERTIES_CHANGED]['changes'] {
686
+ const changes: TokenData[TokenType.PROPERTIES_CHANGED]['changes'] = {};
687
+
688
+ if (oldCtx.underline !== newCtx.underline) {
689
+ changes.underline = newCtx.underline;
690
+ }
691
+ if (oldCtx.overline !== newCtx.overline) {
692
+ changes.overline = newCtx.overline;
693
+ }
694
+ if (oldCtx.strikeThrough !== newCtx.strikeThrough) {
695
+ changes.strikeThrough = newCtx.strikeThrough;
696
+ }
697
+ if (oldCtx.color.aci !== newCtx.color.aci) {
698
+ changes.aci = newCtx.color.aci;
699
+ }
700
+ if (oldCtx.color.rgbValue !== newCtx.color.rgbValue) {
701
+ changes.rgb = newCtx.color.rgb;
702
+ }
703
+ if (oldCtx.align !== newCtx.align) {
704
+ changes.align = newCtx.align;
705
+ }
706
+ if (JSON.stringify(oldCtx.fontFace) !== JSON.stringify(newCtx.fontFace)) {
707
+ changes.fontFace = newCtx.fontFace;
708
+ }
709
+ if (
710
+ oldCtx.capHeight.value !== newCtx.capHeight.value ||
711
+ oldCtx.capHeight.isRelative !== newCtx.capHeight.isRelative
712
+ ) {
713
+ changes.capHeight = newCtx.capHeight;
714
+ }
715
+ if (
716
+ oldCtx.widthFactor.value !== newCtx.widthFactor.value ||
717
+ oldCtx.widthFactor.isRelative !== newCtx.widthFactor.isRelative
718
+ ) {
719
+ changes.widthFactor = newCtx.widthFactor;
720
+ }
721
+ if (
722
+ oldCtx.charTrackingFactor.value !== newCtx.charTrackingFactor.value ||
723
+ oldCtx.charTrackingFactor.isRelative !== newCtx.charTrackingFactor.isRelative
724
+ ) {
725
+ changes.charTrackingFactor = newCtx.charTrackingFactor;
726
+ }
727
+ if (oldCtx.oblique !== newCtx.oblique) {
728
+ changes.oblique = newCtx.oblique;
729
+ }
730
+ if (JSON.stringify(oldCtx.paragraph) !== JSON.stringify(newCtx.paragraph)) {
731
+ // Only include changed paragraph properties
732
+ const changedProps: Partial<ParagraphProperties> = {};
733
+ if (oldCtx.paragraph.indent !== newCtx.paragraph.indent) {
734
+ changedProps.indent = newCtx.paragraph.indent;
735
+ }
736
+ if (oldCtx.paragraph.align !== newCtx.paragraph.align) {
737
+ changedProps.align = newCtx.paragraph.align;
738
+ }
739
+ if (oldCtx.paragraph.left !== newCtx.paragraph.left) {
740
+ changedProps.left = newCtx.paragraph.left;
741
+ }
742
+ if (oldCtx.paragraph.right !== newCtx.paragraph.right) {
743
+ changedProps.right = newCtx.paragraph.right;
744
+ }
745
+ if (JSON.stringify(oldCtx.paragraph.tabs) !== JSON.stringify(newCtx.paragraph.tabs)) {
746
+ changedProps.tabs = newCtx.paragraph.tabs;
747
+ }
748
+ if (Object.keys(changedProps).length > 0) {
749
+ changes.paragraph = changedProps;
750
+ }
751
+ }
752
+
753
+ return changes;
754
+ }
755
+
756
+ /**
757
+ * Parse alignment property
758
+ * @param ctx - The context to update
759
+ */
760
+ private parseAlign(ctx: MTextContext): void {
761
+ const char = this.scanner.get();
762
+ if ('012'.includes(char)) {
763
+ ctx.align = parseInt(char) as MTextLineAlignment;
764
+ } else {
765
+ ctx.align = MTextLineAlignment.BOTTOM;
766
+ }
767
+ this.consumeOptionalTerminator();
768
+ }
769
+
770
+ /**
771
+ * Parse height property
772
+ * @param ctx - The context to update
773
+ */
774
+ private parseHeight(ctx: MTextContext): void {
775
+ const expr = this.extractFloatExpression(true);
776
+ if (expr) {
777
+ try {
778
+ if (expr.endsWith('x')) {
779
+ // For height command, treat x suffix as relative value
780
+ ctx.capHeight = {
781
+ value: parseFloat(expr.slice(0, -1)),
782
+ isRelative: true,
783
+ };
784
+ } else {
785
+ ctx.capHeight = {
786
+ value: parseFloat(expr),
787
+ isRelative: false,
788
+ };
789
+ }
790
+ } catch {
791
+ // If parsing fails, treat the entire command as literal text
792
+ this.scanner.consume(-expr.length); // Rewind to before the expression
793
+ return;
794
+ }
795
+ }
796
+ this.consumeOptionalTerminator();
797
+ }
798
+
799
+ /**
800
+ * Parse width property
801
+ * @param ctx - The context to update
802
+ */
803
+ private parseWidth(ctx: MTextContext): void {
804
+ const expr = this.extractFloatExpression(true);
805
+ if (expr) {
806
+ try {
807
+ if (expr.endsWith('x')) {
808
+ // For width command, treat x suffix as relative value
809
+ ctx.widthFactor = {
810
+ value: parseFloat(expr.slice(0, -1)),
811
+ isRelative: true,
812
+ };
813
+ } else {
814
+ ctx.widthFactor = {
815
+ value: parseFloat(expr),
816
+ isRelative: false,
817
+ };
818
+ }
819
+ } catch {
820
+ // If parsing fails, treat the entire command as literal text
821
+ this.scanner.consume(-expr.length); // Rewind to before the expression
822
+ return;
823
+ }
824
+ }
825
+ this.consumeOptionalTerminator();
826
+ }
827
+
828
+ /**
829
+ * Parse character tracking property
830
+ * @param ctx - The context to update
831
+ */
832
+ private parseCharTracking(ctx: MTextContext): void {
833
+ const expr = this.extractFloatExpression(true);
834
+ if (expr) {
835
+ try {
836
+ if (expr.endsWith('x')) {
837
+ // For tracking command, treat x suffix as relative value
838
+ ctx.charTrackingFactor = {
839
+ value: Math.abs(parseFloat(expr.slice(0, -1))),
840
+ isRelative: true,
841
+ };
842
+ } else {
843
+ ctx.charTrackingFactor = {
844
+ value: Math.abs(parseFloat(expr)),
845
+ isRelative: false,
846
+ };
847
+ }
848
+ } catch {
849
+ // If parsing fails, treat the entire command as literal text
850
+ this.scanner.consume(-expr.length); // Rewind to before the expression
851
+ return;
852
+ }
853
+ }
854
+ this.consumeOptionalTerminator();
855
+ }
856
+
857
+ /**
858
+ * Parse float value or factor
859
+ * @param value - Current value to apply factor to
860
+ * @returns New value
861
+ */
862
+ private parseFloatValueOrFactor(value: number): number {
863
+ const expr = this.extractFloatExpression(true);
864
+ if (expr) {
865
+ if (expr.endsWith('x')) {
866
+ const factor = parseFloat(expr.slice(0, -1));
867
+ value *= factor; // Allow negative factors
868
+ } else {
869
+ value = parseFloat(expr); // Allow negative values
870
+ }
871
+ }
872
+ return value;
873
+ }
874
+
875
+ /**
876
+ * Parse oblique angle property
877
+ * @param ctx - The context to update
878
+ */
879
+ private parseOblique(ctx: MTextContext): void {
880
+ const obliqueExpr = this.extractFloatExpression(false);
881
+ if (obliqueExpr) {
882
+ ctx.oblique = parseFloat(obliqueExpr);
883
+ }
884
+ this.consumeOptionalTerminator();
885
+ }
886
+
887
+ /**
888
+ * Parse ACI color property
889
+ * @param ctx - The context to update
890
+ */
891
+ private parseAciColor(ctx: MTextContext): void {
892
+ const aciExpr = this.extractIntExpression();
893
+ if (aciExpr) {
894
+ const aci = parseInt(aciExpr);
895
+ if (aci < 257) {
896
+ ctx.color.aci = aci;
897
+ }
898
+ }
899
+ this.consumeOptionalTerminator();
900
+ }
901
+
902
+ /**
903
+ * Parse RGB color property
904
+ * @param ctx - The context to update
905
+ */
906
+ private parseRgbColor(ctx: MTextContext): void {
907
+ const rgbExpr = this.extractIntExpression();
908
+ if (rgbExpr) {
909
+ const value = parseInt(rgbExpr) & 0xffffff;
910
+ ctx.color.rgbValue = value;
911
+ }
912
+ this.consumeOptionalTerminator();
913
+ }
914
+
915
+ /**
916
+ * Extract float expression from scanner
917
+ * @param relative - Whether to allow relative values (ending in 'x')
918
+ * @returns Extracted expression
919
+ */
920
+ private extractFloatExpression(relative: boolean = false): string {
921
+ const pattern = relative
922
+ ? /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?x?/
923
+ : /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?/;
924
+ const match = this.scanner.tail.match(pattern);
925
+ if (match) {
926
+ const result = match[0];
927
+ this.scanner.consume(result.length);
928
+ return result;
929
+ }
930
+ return '';
931
+ }
932
+
933
+ /**
934
+ * Extract integer expression from scanner
935
+ * @returns Extracted expression
936
+ */
937
+ private extractIntExpression(): string {
938
+ const match = this.scanner.tail.match(/^\d+/);
939
+ if (match) {
940
+ const result = match[0];
941
+ this.scanner.consume(result.length);
942
+ return result;
943
+ }
944
+ return '';
945
+ }
946
+
947
+ /**
948
+ * Extract expression until semicolon or end
949
+ * @param escape - Whether to handle escaped semicolons
950
+ * @returns Extracted expression
951
+ */
952
+ private extractExpression(escape: boolean = false): string {
953
+ const stop = this.scanner.find(';', escape);
954
+ if (stop < 0) {
955
+ const expr = this.scanner.tail;
956
+ this.scanner.consume(expr.length);
957
+ return expr;
958
+ }
959
+ // Check if the semicolon is escaped by looking at the previous character
960
+ const prevChar = this.scanner.peek(stop - this.scanner.currentIndex - 1);
961
+ const isEscaped = prevChar === '\\';
962
+ const expr = this.scanner.tail.slice(0, stop - this.scanner.currentIndex + (isEscaped ? 1 : 0));
963
+ this.scanner.consume(expr.length + 1);
964
+ return expr;
965
+ }
966
+
967
+ /**
968
+ * Parse font properties
969
+ * @param ctx - The context to update
970
+ */
971
+ private parseFontProperties(ctx: MTextContext): void {
972
+ const parts = this.extractExpression().split('|');
973
+ if (parts.length > 0 && parts[0]) {
974
+ const name = parts[0];
975
+ let style: FontStyle = 'Regular';
976
+ let weight = 400;
977
+
978
+ for (const part of parts.slice(1)) {
979
+ if (part.startsWith('b1')) {
980
+ weight = 700;
981
+ } else if (part === 'i' || part.startsWith('i1')) {
982
+ style = 'Italic';
983
+ } else if (part === 'i0' || part.startsWith('i0')) {
984
+ style = 'Regular';
985
+ }
986
+ }
987
+
988
+ ctx.fontFace = {
989
+ family: name,
990
+ style,
991
+ weight,
992
+ };
993
+ }
994
+ }
995
+
996
+ /**
997
+ * Parse paragraph properties from the MText content
998
+ * Handles properties like indentation, alignment, and tab stops
999
+ * @param ctx - The context to update
1000
+ */
1001
+ private parseParagraphProperties(ctx: MTextContext): void {
1002
+ const scanner = new TextScanner(this.extractExpression());
1003
+ /** Current indentation value */
1004
+ let indent = ctx.paragraph.indent;
1005
+ /** Left margin value */
1006
+ let left = ctx.paragraph.left;
1007
+ /** Right margin value */
1008
+ let right = ctx.paragraph.right;
1009
+ /** Current paragraph alignment */
1010
+ let align = ctx.paragraph.align;
1011
+ /** Array of tab stop positions and types */
1012
+ let tabStops: (number | string)[] = [];
1013
+
1014
+ /**
1015
+ * Parse a floating point number from the scanner's current position
1016
+ * Handles optional sign, decimal point, and scientific notation
1017
+ * @returns The parsed float value, or 0 if no valid number is found
1018
+ */
1019
+ const parseFloatValue = (): number => {
1020
+ const match = scanner.tail.match(/^[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?/);
1021
+ if (match) {
1022
+ const value = parseFloat(match[0]);
1023
+ scanner.consume(match[0].length);
1024
+ while (scanner.peek() === ',') {
1025
+ scanner.consume(1);
1026
+ }
1027
+ return value;
1028
+ }
1029
+ return 0;
1030
+ };
1031
+
1032
+ while (scanner.hasData) {
1033
+ const cmd = scanner.get();
1034
+ switch (cmd) {
1035
+ case 'i': // Indentation
1036
+ indent = parseFloatValue();
1037
+ break;
1038
+ case 'l': // Left margin
1039
+ left = parseFloatValue();
1040
+ break;
1041
+ case 'r': // Right margin
1042
+ right = parseFloatValue();
1043
+ break;
1044
+ case 'x': // Skip
1045
+ break;
1046
+ case 'q': {
1047
+ // Alignment
1048
+ const adjustment = scanner.get();
1049
+ align = CHAR_TO_ALIGN[adjustment] || MTextParagraphAlignment.DEFAULT;
1050
+ while (scanner.peek() === ',') {
1051
+ scanner.consume(1);
1052
+ }
1053
+ break;
1054
+ }
1055
+ case 't': // Tab stops
1056
+ tabStops = [];
1057
+ while (scanner.hasData) {
1058
+ const type = scanner.peek();
1059
+ if (type === 'r' || type === 'c') {
1060
+ scanner.consume(1);
1061
+ const value = parseFloatValue();
1062
+ tabStops.push(type + value.toString());
1063
+ } else {
1064
+ const value = parseFloatValue();
1065
+ if (!isNaN(value)) {
1066
+ tabStops.push(value);
1067
+ } else {
1068
+ scanner.consume(1);
1069
+ }
1070
+ }
1071
+ }
1072
+ break;
1073
+ }
1074
+ }
1075
+
1076
+ ctx.paragraph = {
1077
+ indent,
1078
+ left,
1079
+ right,
1080
+ align,
1081
+ tabs: tabStops,
1082
+ };
1083
+ }
1084
+
1085
+ /**
1086
+ * Consume optional terminator (semicolon)
1087
+ */
1088
+ private consumeOptionalTerminator(): void {
1089
+ if (this.scanner.peek() === ';') {
1090
+ this.scanner.consume(1);
1091
+ }
1092
+ }
1093
+
1094
+ /**
1095
+ * Parse MText content into tokens
1096
+ * @yields MTextToken objects
1097
+ */
1098
+ *parse(): Generator<MTextToken> {
1099
+ const wordToken = TokenType.WORD;
1100
+ const spaceToken = TokenType.SPACE;
1101
+ let followupToken: TokenType | null = null;
1102
+
1103
+ function resetParagraph(ctx: MTextContext): Partial<ParagraphProperties> {
1104
+ const prev = { ...ctx.paragraph };
1105
+ ctx.paragraph = {
1106
+ indent: 0,
1107
+ left: 0,
1108
+ right: 0,
1109
+ align: MTextParagraphAlignment.DEFAULT,
1110
+ tabs: [],
1111
+ };
1112
+ const changed: Partial<ParagraphProperties> = {};
1113
+ if (prev.indent !== 0) changed.indent = 0;
1114
+ if (prev.left !== 0) changed.left = 0;
1115
+ if (prev.right !== 0) changed.right = 0;
1116
+ if (prev.align !== MTextParagraphAlignment.DEFAULT)
1117
+ changed.align = MTextParagraphAlignment.DEFAULT;
1118
+ if (JSON.stringify(prev.tabs) !== JSON.stringify([])) changed.tabs = [];
1119
+ return changed;
1120
+ }
1121
+
1122
+ const nextToken = (): [TokenType, TokenData[TokenType]] => {
1123
+ let word = '';
1124
+ while (this.scanner.hasData) {
1125
+ let escape = false;
1126
+ let letter = this.scanner.peek();
1127
+ const cmdStartIndex = this.scanner.currentIndex;
1128
+
1129
+ // Handle control characters first
1130
+ if (letter.charCodeAt(0) < 32) {
1131
+ this.scanner.consume(1); // Always consume the control character
1132
+ if (letter === '\t') {
1133
+ return [TokenType.TABULATOR, null];
1134
+ }
1135
+ if (letter === '\n') {
1136
+ return [TokenType.NEW_PARAGRAPH, null];
1137
+ }
1138
+ letter = ' ';
1139
+ }
1140
+
1141
+ if (letter === '\\') {
1142
+ if ('\\{}'.includes(this.scanner.peek(1))) {
1143
+ escape = true;
1144
+ this.scanner.consume(1);
1145
+ letter = this.scanner.peek();
1146
+ } else {
1147
+ if (word) {
1148
+ return [wordToken, word];
1149
+ }
1150
+ this.scanner.consume(1);
1151
+ const cmd = this.scanner.get();
1152
+ switch (cmd) {
1153
+ case '~':
1154
+ return [TokenType.NBSP, null];
1155
+ case 'P':
1156
+ return [TokenType.NEW_PARAGRAPH, null];
1157
+ case 'N':
1158
+ return [TokenType.NEW_COLUMN, null];
1159
+ case 'X':
1160
+ return [TokenType.WRAP_AT_DIMLINE, null];
1161
+ case 'S': {
1162
+ this.inStackContext = true;
1163
+ const result = this.parseStacking();
1164
+ this.inStackContext = false;
1165
+ return result;
1166
+ }
1167
+ case 'm':
1168
+ case 'M':
1169
+ // Handle multi-byte character encoding (MIF)
1170
+ if (this.scanner.peek() === '+') {
1171
+ this.scanner.consume(1); // Consume the '+'
1172
+ const hexCode = this.extractMifCode(this.mifCodeLength);
1173
+ if (hexCode) {
1174
+ this.scanner.consume(hexCode.length);
1175
+ const decodedChar = this.mifDecoder(hexCode);
1176
+ if (word) {
1177
+ return [wordToken, word];
1178
+ }
1179
+ return [wordToken, decodedChar];
1180
+ }
1181
+ // If no valid hex code found, rewind the '+' character
1182
+ this.scanner.consume(-1);
1183
+ }
1184
+ // If not a valid multi-byte code, treat as literal text
1185
+ word += '\\M';
1186
+ continue;
1187
+ case 'U':
1188
+ // Handle Unicode escape: \U+XXXX or \U+XXXXXXXX
1189
+ if (this.scanner.peek() === '+') {
1190
+ this.scanner.consume(1); // Consume the '+'
1191
+ const hexMatch = this.scanner.tail.match(/^[0-9A-Fa-f]{4,8}/);
1192
+ if (hexMatch) {
1193
+ const hexCode = hexMatch[0];
1194
+ this.scanner.consume(hexCode.length);
1195
+ const codePoint = parseInt(hexCode, 16);
1196
+ let decodedChar = '';
1197
+ try {
1198
+ decodedChar = String.fromCodePoint(codePoint);
1199
+ } catch {
1200
+ decodedChar = '▯';
1201
+ }
1202
+ if (word) {
1203
+ return [wordToken, word];
1204
+ }
1205
+ return [wordToken, decodedChar];
1206
+ }
1207
+ // If no valid hex code found, rewind the '+' character
1208
+ this.scanner.consume(-1);
1209
+ }
1210
+ // If not a valid Unicode code, treat as literal text
1211
+ word += '\\U';
1212
+ continue;
1213
+ default:
1214
+ if (cmd) {
1215
+ try {
1216
+ const propertyChanges = this.parseProperties(cmd);
1217
+ if (this.yieldPropertyCommands && propertyChanges) {
1218
+ return [TokenType.PROPERTIES_CHANGED, propertyChanges];
1219
+ }
1220
+ // After processing a property command, continue with normal parsing
1221
+ continue;
1222
+ } catch {
1223
+ const commandText = this.scanner.tail.slice(
1224
+ cmdStartIndex,
1225
+ this.scanner.currentIndex
1226
+ );
1227
+ word += commandText;
1228
+ }
1229
+ }
1230
+ }
1231
+ continue;
1232
+ }
1233
+ }
1234
+
1235
+ if (letter === '%' && this.scanner.peek(1) === '%') {
1236
+ const code = this.scanner.peek(2).toLowerCase();
1237
+ const specialChar = SPECIAL_CHAR_ENCODING[code];
1238
+ if (specialChar) {
1239
+ this.scanner.consume(3);
1240
+ word += specialChar;
1241
+ continue;
1242
+ } else {
1243
+ /**
1244
+ * Supports Control Codes: `%%ddd`, where ddd is a three-digit decimal number representing the ASCII code value of the character.
1245
+ *
1246
+ * Reference: https://help.autodesk.com/view/ACD/2026/ENU/?guid=GUID-968CBC1D-BA99-4519-ABDD-88419EB2BF92
1247
+ */
1248
+ const digits = [code, this.scanner.peek(3), this.scanner.peek(4)];
1249
+
1250
+ if (digits.every(d => d >= '0' && d <= '9')) {
1251
+ const charCode = Number.parseInt(digits.join(''), 10);
1252
+ this.scanner.consume(5);
1253
+ word += String.fromCharCode(charCode);
1254
+ } else {
1255
+ // Skip invalid special character codes
1256
+ this.scanner.consume(3);
1257
+ }
1258
+
1259
+ continue;
1260
+ }
1261
+ }
1262
+
1263
+ if (letter === ' ') {
1264
+ if (word) {
1265
+ this.scanner.consume(1);
1266
+ followupToken = spaceToken;
1267
+ return [wordToken, word];
1268
+ }
1269
+ this.scanner.consume(1);
1270
+ return [spaceToken, null];
1271
+ }
1272
+
1273
+ if (!escape) {
1274
+ if (letter === '{') {
1275
+ if (word) {
1276
+ return [wordToken, word];
1277
+ }
1278
+ this.scanner.consume(1);
1279
+ this.pushCtx();
1280
+ continue;
1281
+ } else if (letter === '}') {
1282
+ if (word) {
1283
+ return [wordToken, word];
1284
+ }
1285
+ this.scanner.consume(1);
1286
+ // Context restoration with yieldPropertyCommands
1287
+ if (this.yieldPropertyCommands) {
1288
+ const prevCtx = this.ctxStack.current;
1289
+ this.popCtx();
1290
+ const changes = this.getPropertyChanges(prevCtx, this.ctxStack.current);
1291
+ if (Object.keys(changes).length > 0) {
1292
+ return [
1293
+ TokenType.PROPERTIES_CHANGED,
1294
+ { command: undefined, changes, depth: this.ctxStack.depth },
1295
+ ];
1296
+ }
1297
+ } else {
1298
+ this.popCtx();
1299
+ }
1300
+ continue;
1301
+ }
1302
+ }
1303
+
1304
+ // Handle caret-encoded characters only when not in stack context
1305
+ if (!this.inStackContext && letter === '^') {
1306
+ const nextChar = this.scanner.peek(1);
1307
+ if (nextChar) {
1308
+ const code = nextChar.charCodeAt(0);
1309
+ this.scanner.consume(2); // Consume both ^ and the next character
1310
+ if (code === 32) {
1311
+ // Space
1312
+ word += '^';
1313
+ } else if (code === 73) {
1314
+ // Tab
1315
+ if (word) {
1316
+ return [wordToken, word];
1317
+ }
1318
+ return [TokenType.TABULATOR, null];
1319
+ } else if (code === 74) {
1320
+ // Line feed
1321
+ if (word) {
1322
+ return [wordToken, word];
1323
+ }
1324
+ return [TokenType.NEW_PARAGRAPH, null];
1325
+ } else if (code === 77) {
1326
+ // Carriage return
1327
+ // Ignore carriage return
1328
+ continue;
1329
+ } else {
1330
+ word += '▯';
1331
+ }
1332
+ continue;
1333
+ }
1334
+ }
1335
+
1336
+ this.scanner.consume(1);
1337
+ if (letter.charCodeAt(0) >= 32) {
1338
+ word += letter;
1339
+ }
1340
+ }
1341
+
1342
+ if (word) {
1343
+ return [wordToken, word];
1344
+ }
1345
+ return [TokenType.NONE, null];
1346
+ };
1347
+
1348
+ while (true) {
1349
+ const [type, data] = nextToken.call(this);
1350
+ if (type) {
1351
+ yield new MTextToken(type, this.ctxStack.current.copy(), data);
1352
+ if (type === TokenType.NEW_PARAGRAPH && this.resetParagraphParameters) {
1353
+ // Reset paragraph properties and emit PROPERTIES_CHANGED if needed
1354
+ const ctx = this.ctxStack.current;
1355
+ const changed = resetParagraph(ctx);
1356
+ if (this.yieldPropertyCommands && Object.keys(changed).length > 0) {
1357
+ yield new MTextToken(TokenType.PROPERTIES_CHANGED, ctx.copy(), {
1358
+ command: undefined,
1359
+ changes: { paragraph: changed },
1360
+ depth: this.ctxStack.depth,
1361
+ });
1362
+ }
1363
+ }
1364
+ if (followupToken) {
1365
+ yield new MTextToken(followupToken, this.ctxStack.current.copy(), null);
1366
+ followupToken = null;
1367
+ }
1368
+ } else {
1369
+ break;
1370
+ }
1371
+ }
1372
+ }
1373
+ }
1374
+
1375
+ /**
1376
+ * Text scanner for parsing MText content
1377
+ */
1378
+ export class TextScanner {
1379
+ private text: string;
1380
+ private textLen: number;
1381
+ private _index: number;
1382
+
1383
+ /**
1384
+ * Create a new text scanner
1385
+ * @param text - The text to scan
1386
+ */
1387
+ constructor(text: string) {
1388
+ this.text = text;
1389
+ this.textLen = text.length;
1390
+ this._index = 0;
1391
+ }
1392
+
1393
+ /**
1394
+ * Get the current index in the text
1395
+ */
1396
+ get currentIndex(): number {
1397
+ return this._index;
1398
+ }
1399
+
1400
+ /**
1401
+ * Check if the scanner has reached the end of the text
1402
+ */
1403
+ get isEmpty(): boolean {
1404
+ return this._index >= this.textLen;
1405
+ }
1406
+
1407
+ /**
1408
+ * Check if there is more text to scan
1409
+ */
1410
+ get hasData(): boolean {
1411
+ return this._index < this.textLen;
1412
+ }
1413
+
1414
+ /**
1415
+ * Get the next character and advance the index
1416
+ * @returns The next character, or empty string if at end
1417
+ */
1418
+ get(): string {
1419
+ if (this.isEmpty) {
1420
+ return '';
1421
+ }
1422
+ const char = this.text[this._index];
1423
+ this._index++;
1424
+ return char;
1425
+ }
1426
+
1427
+ /**
1428
+ * Advance the index by the specified count
1429
+ * @param count - Number of characters to advance
1430
+ */
1431
+ consume(count: number = 1): void {
1432
+ this._index = Math.max(0, Math.min(this._index + count, this.textLen));
1433
+ }
1434
+
1435
+ /**
1436
+ * Look at a character without advancing the index
1437
+ * @param offset - Offset from current position
1438
+ * @returns The character at the offset position, or empty string if out of bounds
1439
+ */
1440
+ peek(offset: number = 0): string {
1441
+ const index = this._index + offset;
1442
+ if (index >= this.textLen || index < 0) {
1443
+ return '';
1444
+ }
1445
+ return this.text[index];
1446
+ }
1447
+
1448
+ /**
1449
+ * Find the next occurrence of a character
1450
+ * @param char - The character to find
1451
+ * @param escape - Whether to handle escaped characters
1452
+ * @returns Index of the character, or -1 if not found
1453
+ */
1454
+ find(char: string, escape: boolean = false): number {
1455
+ let index = this._index;
1456
+ while (index < this.textLen) {
1457
+ if (escape && this.text[index] === '\\') {
1458
+ if (index + 1 < this.textLen) {
1459
+ if (this.text[index + 1] === char) {
1460
+ return index + 1;
1461
+ }
1462
+ index += 2;
1463
+ continue;
1464
+ }
1465
+ index++;
1466
+ continue;
1467
+ }
1468
+ if (this.text[index] === char) {
1469
+ return index;
1470
+ }
1471
+ index++;
1472
+ }
1473
+ return -1;
1474
+ }
1475
+
1476
+ /**
1477
+ * Get the remaining text from the current position
1478
+ */
1479
+ get tail(): string {
1480
+ return this.text.slice(this._index);
1481
+ }
1482
+
1483
+ /**
1484
+ * Check if the next character is a space
1485
+ */
1486
+ isNextSpace(): boolean {
1487
+ return this.peek() === ' ';
1488
+ }
1489
+
1490
+ /**
1491
+ * Consume spaces until a non-space character is found
1492
+ * @returns Number of spaces consumed
1493
+ */
1494
+ consumeSpaces(): number {
1495
+ let count = 0;
1496
+ while (this.isNextSpace()) {
1497
+ this.consume();
1498
+ count++;
1499
+ }
1500
+ return count;
1501
+ }
1502
+ }
1503
+
1504
+ /**
1505
+ * Class to handle ACI and RGB color logic for MText.
1506
+ *
1507
+ * This class encapsulates color state for MText, supporting both AutoCAD Color Index (ACI) and RGB color.
1508
+ * Only one color mode is active at a time: setting an RGB color disables ACI, and vice versa.
1509
+ * RGB is stored as a single 24-bit integer (0xRRGGBB) for efficient comparison and serialization.
1510
+ *
1511
+ * Example usage:
1512
+ * ```ts
1513
+ * const color1 = new MTextColor(1); // ACI color
1514
+ * const color2 = new MTextColor([255, 0, 0]); // RGB color
1515
+ * const color3 = new MTextColor(); // Default (ACI=256, "by layer")
1516
+ * ```
1517
+ */
1518
+ export class MTextColor {
1519
+ /**
1520
+ * The AutoCAD Color Index (ACI) value. Only used if no RGB color is set.
1521
+ * @default 256 ("by layer")
1522
+ */
1523
+ private _aci: number | null = 256;
1524
+ /**
1525
+ * The RGB color value as a single 24-bit integer (0xRRGGBB), or null if not set.
1526
+ * @default null
1527
+ */
1528
+ private _rgbValue: number | null = null; // Store as 0xRRGGBB or null
1529
+
1530
+ /**
1531
+ * Create a new MTextColor instance.
1532
+ * @param color The initial color: number for ACI, [r,g,b] for RGB, or null/undefined for default (ACI=256).
1533
+ */
1534
+ constructor(color?: number | RGB | null) {
1535
+ if (Array.isArray(color)) {
1536
+ this.rgb = color;
1537
+ } else if (typeof color === 'number') {
1538
+ this.aci = color;
1539
+ } else {
1540
+ this.aci = 256;
1541
+ }
1542
+ }
1543
+
1544
+ /**
1545
+ * Get the current ACI color value.
1546
+ * @returns The ACI color (0-256), or null if using RGB.
1547
+ */
1548
+ get aci(): number | null {
1549
+ return this._aci;
1550
+ }
1551
+
1552
+ /**
1553
+ * Set the ACI color value. Setting this disables any RGB color.
1554
+ * @param value The ACI color (0-256), or null to unset.
1555
+ * @throws Error if value is out of range.
1556
+ */
1557
+ set aci(value: number | null) {
1558
+ if (value === null) {
1559
+ this._aci = null;
1560
+ } else if (value >= 0 && value <= 256) {
1561
+ this._aci = value;
1562
+ this._rgbValue = null;
1563
+ } else {
1564
+ throw new Error('ACI not in range [0, 256]');
1565
+ }
1566
+ }
1567
+
1568
+ /**
1569
+ * Get the current RGB color as a tuple [r, g, b], or null if not set.
1570
+ * @returns The RGB color tuple, or null if using ACI.
1571
+ */
1572
+ get rgb(): RGB | null {
1573
+ if (this._rgbValue === null) return null;
1574
+ // Extract R, G, B from 0xRRGGBB
1575
+ const r = (this._rgbValue >> 16) & 0xff;
1576
+ const g = (this._rgbValue >> 8) & 0xff;
1577
+ const b = this._rgbValue & 0xff;
1578
+ return [r, g, b];
1579
+ }
1580
+
1581
+ /**
1582
+ * Set the RGB color. Setting this disables ACI color.
1583
+ * @param value The RGB color tuple [r, g, b], or null to use ACI.
1584
+ */
1585
+ set rgb(value: RGB | null) {
1586
+ if (value) {
1587
+ const [r, g, b] = value;
1588
+ this._rgbValue = ((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff);
1589
+ this._aci = null;
1590
+ } else {
1591
+ this._rgbValue = null;
1592
+ }
1593
+ }
1594
+
1595
+ /**
1596
+ * Returns true if the color is set by RGB, false if by ACI.
1597
+ */
1598
+ get isRgb(): boolean {
1599
+ return this._rgbValue !== null;
1600
+ }
1601
+
1602
+ /**
1603
+ * Returns true if the color is set by ACI, false if by RGB.
1604
+ */
1605
+ get isAci(): boolean {
1606
+ return this._rgbValue === null && this._aci !== null;
1607
+ }
1608
+
1609
+ /**
1610
+ * Get or set the internal RGB value as a number (0xRRGGBB), or null if not set.
1611
+ * Setting this will switch to RGB mode and set ACI to null.
1612
+ */
1613
+ get rgbValue(): number | null {
1614
+ return this._rgbValue;
1615
+ }
1616
+
1617
+ set rgbValue(val: number | null) {
1618
+ if (val === null) {
1619
+ this._rgbValue = null;
1620
+ } else {
1621
+ this._rgbValue = val & 0xffffff;
1622
+ this._aci = null;
1623
+ }
1624
+ }
1625
+
1626
+ /**
1627
+ * Returns a deep copy of this color.
1628
+ * @returns A new MTextColor instance with the same color state.
1629
+ */
1630
+ copy(): MTextColor {
1631
+ const c = new MTextColor();
1632
+ c._aci = this._aci;
1633
+ c._rgbValue = this._rgbValue;
1634
+ return c;
1635
+ }
1636
+
1637
+ /**
1638
+ * Returns a plain object for serialization.
1639
+ * @returns An object with aci, rgb (tuple), and rgbValue (number or null).
1640
+ */
1641
+ toObject(): { aci: number | null; rgb: RGB | null; rgbValue: number | null } {
1642
+ return { aci: this._aci, rgb: this.rgb, rgbValue: this._rgbValue };
1643
+ }
1644
+
1645
+ /**
1646
+ * Equality check for color.
1647
+ * @param other The other MTextColor to compare.
1648
+ * @returns True if both ACI and RGB values are equal.
1649
+ */
1650
+ equals(other: MTextColor): boolean {
1651
+ return this._aci === other._aci && this._rgbValue === other._rgbValue;
1652
+ }
1653
+ }
1654
+
1655
+ /**
1656
+ * MText context class for managing text formatting state
1657
+ */
1658
+ export class MTextContext {
1659
+ private _stroke: number = 0;
1660
+ /** Whether to continue stroke formatting */
1661
+ continueStroke: boolean = false;
1662
+ /** Color (ACI or RGB) */
1663
+ color: MTextColor = new MTextColor();
1664
+ /** Line alignment */
1665
+ align: MTextLineAlignment = MTextLineAlignment.BOTTOM;
1666
+ /** Font face properties */
1667
+ fontFace: FontFace = { family: '', style: 'Regular', weight: 400 };
1668
+ /** Capital letter height */
1669
+ private _capHeight: FactorValue = { value: 1.0, isRelative: false };
1670
+ /** Character width factor */
1671
+ private _widthFactor: FactorValue = { value: 1.0, isRelative: false };
1672
+ /**
1673
+ * Character tracking factor a multiplier applied to the default spacing between characters in the MText object.
1674
+ * - Value = 1.0 → Normal spacing.
1675
+ * - Value < 1.0 → Characters are closer together.
1676
+ * - Value > 1.0 → Characters are spaced farther apart.
1677
+ */
1678
+ private _charTrackingFactor: FactorValue = { value: 1.0, isRelative: false };
1679
+ /** Oblique angle */
1680
+ oblique: number = 0.0;
1681
+ /** Paragraph properties */
1682
+ paragraph: ParagraphProperties = {
1683
+ indent: 0,
1684
+ left: 0,
1685
+ right: 0,
1686
+ align: MTextParagraphAlignment.DEFAULT,
1687
+ tabs: [],
1688
+ };
1689
+
1690
+ /**
1691
+ * Get the capital letter height
1692
+ */
1693
+ get capHeight(): FactorValue {
1694
+ return this._capHeight;
1695
+ }
1696
+
1697
+ /**
1698
+ * Set the capital letter height
1699
+ * @param value - Height value
1700
+ */
1701
+ set capHeight(value: FactorValue) {
1702
+ this._capHeight = {
1703
+ value: Math.abs(value.value),
1704
+ isRelative: value.isRelative,
1705
+ };
1706
+ }
1707
+
1708
+ /**
1709
+ * Get the character width factor
1710
+ */
1711
+ get widthFactor(): FactorValue {
1712
+ return this._widthFactor;
1713
+ }
1714
+
1715
+ /**
1716
+ * Set the character width factor
1717
+ * @param value - Width factor value
1718
+ */
1719
+ set widthFactor(value: FactorValue) {
1720
+ this._widthFactor = {
1721
+ value: Math.abs(value.value),
1722
+ isRelative: value.isRelative,
1723
+ };
1724
+ }
1725
+
1726
+ /**
1727
+ * Get the character tracking factor
1728
+ */
1729
+ get charTrackingFactor(): FactorValue {
1730
+ return this._charTrackingFactor;
1731
+ }
1732
+
1733
+ /**
1734
+ * Set the character tracking factor
1735
+ * @param value - Tracking factor value
1736
+ */
1737
+ set charTrackingFactor(value: FactorValue) {
1738
+ this._charTrackingFactor = {
1739
+ value: Math.abs(value.value),
1740
+ isRelative: value.isRelative,
1741
+ };
1742
+ }
1743
+
1744
+ /**
1745
+ * Get the ACI color value
1746
+ */
1747
+ get aci(): number | null {
1748
+ return this.color.aci;
1749
+ }
1750
+
1751
+ /**
1752
+ * Set the ACI color value
1753
+ * @param value - ACI color value (0-256)
1754
+ * @throws Error if value is out of range
1755
+ */
1756
+ set aci(value: number) {
1757
+ this.color.aci = value;
1758
+ }
1759
+
1760
+ /**
1761
+ * Get the RGB color value
1762
+ */
1763
+ get rgb(): RGB | null {
1764
+ return this.color.rgb;
1765
+ }
1766
+
1767
+ /**
1768
+ * Set the RGB color value
1769
+ */
1770
+ set rgb(value: RGB | null) {
1771
+ this.color.rgb = value;
1772
+ }
1773
+
1774
+ /**
1775
+ * Gets whether the current text should be rendered in italic style.
1776
+ * @returns {boolean} True if the font style is 'Italic', otherwise false.
1777
+ */
1778
+ get italic(): boolean {
1779
+ return this.fontFace.style === 'Italic';
1780
+ }
1781
+ /**
1782
+ * Sets whether the current text should be rendered in italic style.
1783
+ * @param value - If true, sets the font style to 'Italic'; if false, sets it to 'Regular'.
1784
+ */
1785
+ set italic(value: boolean) {
1786
+ this.fontFace.style = value ? 'Italic' : 'Regular';
1787
+ }
1788
+
1789
+ /**
1790
+ * Gets whether the current text should be rendered in bold style.
1791
+ * This is primarily used for mesh fonts and affects font selection.
1792
+ * @returns {boolean} True if the font weight is 700 or higher, otherwise false.
1793
+ */
1794
+ get bold(): boolean {
1795
+ return (this.fontFace.weight || 400) >= 700;
1796
+ }
1797
+ /**
1798
+ * Sets whether the current text should be rendered in bold style.
1799
+ * This is primarily used for mesh fonts and affects font selection.
1800
+ * @param value - If true, sets the font weight to 700; if false, sets it to 400.
1801
+ */
1802
+ set bold(value: boolean) {
1803
+ this.fontFace.weight = value ? 700 : 400;
1804
+ }
1805
+
1806
+ /**
1807
+ * Get whether text is underlined
1808
+ */
1809
+ get underline(): boolean {
1810
+ return Boolean(this._stroke & MTextStroke.UNDERLINE);
1811
+ }
1812
+
1813
+ /**
1814
+ * Set whether text is underlined
1815
+ * @param value - Whether to underline
1816
+ */
1817
+ set underline(value: boolean) {
1818
+ this._setStrokeState(MTextStroke.UNDERLINE, value);
1819
+ }
1820
+
1821
+ /**
1822
+ * Get whether text has strike-through
1823
+ */
1824
+ get strikeThrough(): boolean {
1825
+ return Boolean(this._stroke & MTextStroke.STRIKE_THROUGH);
1826
+ }
1827
+
1828
+ /**
1829
+ * Set whether text has strike-through
1830
+ * @param value - Whether to strike through
1831
+ */
1832
+ set strikeThrough(value: boolean) {
1833
+ this._setStrokeState(MTextStroke.STRIKE_THROUGH, value);
1834
+ }
1835
+
1836
+ /**
1837
+ * Get whether text has overline
1838
+ */
1839
+ get overline(): boolean {
1840
+ return Boolean(this._stroke & MTextStroke.OVERLINE);
1841
+ }
1842
+
1843
+ /**
1844
+ * Set whether text has overline
1845
+ * @param value - Whether to overline
1846
+ */
1847
+ set overline(value: boolean) {
1848
+ this._setStrokeState(MTextStroke.OVERLINE, value);
1849
+ }
1850
+
1851
+ /**
1852
+ * Check if any stroke formatting is active
1853
+ */
1854
+ get hasAnyStroke(): boolean {
1855
+ return Boolean(this._stroke);
1856
+ }
1857
+
1858
+ /**
1859
+ * Set the state of a stroke type
1860
+ * @param stroke - The stroke type to set
1861
+ * @param state - Whether to enable or disable the stroke
1862
+ */
1863
+ private _setStrokeState(stroke: MTextStroke, state: boolean = true): void {
1864
+ if (state) {
1865
+ this._stroke |= stroke;
1866
+ } else {
1867
+ this._stroke &= ~stroke;
1868
+ }
1869
+ }
1870
+
1871
+ /**
1872
+ * Create a copy of this context
1873
+ * @returns A new context with the same properties
1874
+ */
1875
+ copy(): MTextContext {
1876
+ const ctx = new MTextContext();
1877
+ ctx._stroke = this._stroke;
1878
+ ctx.continueStroke = this.continueStroke;
1879
+ ctx.color = this.color.copy();
1880
+ ctx.align = this.align;
1881
+ ctx.fontFace = { ...this.fontFace };
1882
+ ctx._capHeight = { ...this._capHeight };
1883
+ ctx._widthFactor = { ...this._widthFactor };
1884
+ ctx._charTrackingFactor = { ...this._charTrackingFactor };
1885
+ ctx.oblique = this.oblique;
1886
+ ctx.paragraph = { ...this.paragraph };
1887
+ return ctx;
1888
+ }
1889
+ }
1890
+
1891
+ /**
1892
+ * Token class for MText parsing
1893
+ */
1894
+ export class MTextToken {
1895
+ /**
1896
+ * Create a new MText token
1897
+ * @param type - The token type
1898
+ * @param ctx - The text context at this token
1899
+ * @param data - Optional token data
1900
+ */
1901
+ constructor(
1902
+ public type: TokenType,
1903
+ public ctx: MTextContext,
1904
+ public data: TokenData[TokenType]
1905
+ ) {}
1906
+ }