@mlightcad/mtext-parser 1.3.3 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.ts ADDED
@@ -0,0 +1,1998 @@
1
+ /**
2
+ * Token types used in MText parsing
3
+ */
4
+ export enum TokenType {
5
+ /** No token */
6
+ NONE = 0,
7
+ /** Word token with string data */
8
+ WORD = 1,
9
+ /** Stack token with [numerator, denominator, type] data */
10
+ STACK = 2,
11
+ /** Space token with no data */
12
+ SPACE = 3,
13
+ /** Non-breaking space token with no data */
14
+ NBSP = 4,
15
+ /** Tab token with no data */
16
+ TABULATOR = 5,
17
+ /** New paragraph token with no data */
18
+ NEW_PARAGRAPH = 6,
19
+ /** New column token with no data */
20
+ NEW_COLUMN = 7,
21
+ /** Wrap at dimension line token with no data */
22
+ WRAP_AT_DIMLINE = 8,
23
+ /** Properties changed token with string data (full command) */
24
+ PROPERTIES_CHANGED = 9,
25
+ }
26
+
27
+ /**
28
+ * Represents a factor value that can be either absolute or relative.
29
+ * Used for properties like height, width, and character tracking in MText formatting.
30
+ */
31
+ export interface FactorValue {
32
+ /** The numeric value of the factor */
33
+ value: number;
34
+ /** Whether the value is relative (true) or absolute (false) */
35
+ isRelative: boolean;
36
+ }
37
+
38
+ /**
39
+ * Format properties of MText word tokens.
40
+ * This interface defines all the formatting properties that can be applied to MText content,
41
+ * including text styling, colors, alignment, font properties, and paragraph formatting.
42
+ */
43
+ export interface Properties {
44
+ /** Whether text is underlined */
45
+ underline?: boolean;
46
+ /** Whether text has an overline */
47
+ overline?: boolean;
48
+ /** Whether text has strike-through */
49
+ strikeThrough?: boolean;
50
+ /** AutoCAD Color Index (ACI) color value (0-256), or null if not set */
51
+ aci?: number | null;
52
+ /** RGB color tuple [r, g, b], or null if not set */
53
+ rgb?: RGB | null;
54
+ /** Line alignment for the text */
55
+ align?: MTextLineAlignment;
56
+ /** Font face properties including family, style, and weight */
57
+ fontFace?: FontFace;
58
+ /** Capital letter height factor (can be relative or absolute) */
59
+ capHeight?: FactorValue;
60
+ /** Character width factor (can be relative or absolute) */
61
+ widthFactor?: FactorValue;
62
+ /** Character tracking factor for spacing between characters (can be relative or absolute) */
63
+ charTrackingFactor?: FactorValue;
64
+ /** Oblique angle in degrees for text slant */
65
+ oblique?: number;
66
+ /** Paragraph formatting properties (partial to allow selective updates) */
67
+ paragraph?: Partial<ParagraphProperties>;
68
+ }
69
+
70
+ /**
71
+ * Represents a change in MText properties, including the command, the changed properties, and the context depth.
72
+ */
73
+ export interface ChangedProperties {
74
+ /**
75
+ * The property command that triggered the change (e.g., 'L', 'C', 'f').
76
+ * The command will be undefined if it is to restore context.
77
+ */
78
+ command: string | undefined;
79
+ /**
80
+ * The set of properties that have changed as a result of the command.
81
+ */
82
+ changes: Properties;
83
+ /**
84
+ * The current context stack depth when the property change occurs.
85
+ * - 0: The change is global (applies outside of any `{}` block).
86
+ * - >0: The change is local (applies within one or more nested `{}` blocks).
87
+ */
88
+ depth: number; // 0 = global, >0 = local
89
+ }
90
+
91
+ /**
92
+ * Type for token data based on token type
93
+ */
94
+ export type TokenData = {
95
+ [TokenType.NONE]: null;
96
+ [TokenType.WORD]: string;
97
+ [TokenType.STACK]: [string, string, string];
98
+ [TokenType.SPACE]: null;
99
+ [TokenType.NBSP]: null;
100
+ [TokenType.TABULATOR]: null;
101
+ [TokenType.NEW_PARAGRAPH]: null;
102
+ [TokenType.NEW_COLUMN]: null;
103
+ [TokenType.WRAP_AT_DIMLINE]: null;
104
+ [TokenType.PROPERTIES_CHANGED]: ChangedProperties;
105
+ };
106
+
107
+ /**
108
+ * Line alignment options for MText
109
+ */
110
+ export enum MTextLineAlignment {
111
+ /** Align text to bottom */
112
+ BOTTOM = 0,
113
+ /** Align text to middle */
114
+ MIDDLE = 1,
115
+ /** Align text to top */
116
+ TOP = 2,
117
+ }
118
+
119
+ /**
120
+ * Paragraph alignment options for MText
121
+ */
122
+ export enum MTextParagraphAlignment {
123
+ /** Default alignment */
124
+ DEFAULT = 0,
125
+ /** Left alignment */
126
+ LEFT = 1,
127
+ /** Right alignment */
128
+ RIGHT = 2,
129
+ /** Center alignment */
130
+ CENTER = 3,
131
+ /** Justified alignment */
132
+ JUSTIFIED = 4,
133
+ /** Distributed alignment */
134
+ DISTRIBUTED = 5,
135
+ }
136
+
137
+ /**
138
+ * Text stroke options for MText
139
+ */
140
+ export enum MTextStroke {
141
+ /** No stroke */
142
+ NONE = 0,
143
+ /** Underline stroke */
144
+ UNDERLINE = 1,
145
+ /** Overline stroke */
146
+ OVERLINE = 2,
147
+ /** Strike-through stroke */
148
+ STRIKE_THROUGH = 4,
149
+ }
150
+
151
+ /**
152
+ * RGB color tuple
153
+ */
154
+ export type RGB = [number, number, number];
155
+
156
+ /**
157
+ * Font style type
158
+ */
159
+ export type FontStyle = 'Regular' | 'Italic';
160
+
161
+ /**
162
+ * Font face properties
163
+ */
164
+ export interface FontFace {
165
+ /** Font family name */
166
+ family: string;
167
+ /** Font style (e.g., 'Regular', 'Italic') */
168
+ style: FontStyle;
169
+ /** Font weight (e.g., 400 for normal, 700 for bold) */
170
+ weight: number;
171
+ }
172
+
173
+ /**
174
+ * Paragraph properties
175
+ */
176
+ export interface ParagraphProperties {
177
+ /** Indentation value */
178
+ indent: number;
179
+ /** Left margin value */
180
+ left: number;
181
+ /** Right margin value */
182
+ right: number;
183
+ /** Paragraph alignment */
184
+ align: MTextParagraphAlignment;
185
+ /** Tab stop positions and types */
186
+ tabs: (number | string)[];
187
+ }
188
+
189
+ /**
190
+ * Special character encoding mapping
191
+ */
192
+ const SPECIAL_CHAR_ENCODING: Record<string, string> = {
193
+ c: 'Ø',
194
+ d: '°',
195
+ p: '±',
196
+ '%': '%',
197
+ };
198
+
199
+ /**
200
+ * Character to paragraph alignment mapping
201
+ */
202
+ const CHAR_TO_ALIGN: Record<string, MTextParagraphAlignment> = {
203
+ l: MTextParagraphAlignment.LEFT,
204
+ r: MTextParagraphAlignment.RIGHT,
205
+ c: MTextParagraphAlignment.CENTER,
206
+ j: MTextParagraphAlignment.JUSTIFIED,
207
+ d: MTextParagraphAlignment.DISTRIBUTED,
208
+ };
209
+
210
+ /**
211
+ * Convert RGB tuple to integer color value
212
+ * @param rgb - RGB color tuple
213
+ * @returns Integer color value
214
+ */
215
+ export function rgb2int(rgb: RGB): number {
216
+ const [r, g, b] = rgb;
217
+ return (r << 16) | (g << 8) | b;
218
+ }
219
+
220
+ /**
221
+ * Convert integer color value to RGB tuple
222
+ * @param value - Integer color value
223
+ * @returns RGB color tuple
224
+ */
225
+ export function int2rgb(value: number): RGB {
226
+ const r = (value >> 16) & 0xff;
227
+ const g = (value >> 8) & 0xff;
228
+ const b = value & 0xff;
229
+ return [r, g, b];
230
+ }
231
+
232
+ function clampColorChannel(value: number): number {
233
+ return Math.max(0, Math.min(255, Math.round(value)));
234
+ }
235
+
236
+ function normalizeColorNumber(color: number): number {
237
+ return Math.max(0, Math.min(0xffffff, Math.round(color)));
238
+ }
239
+
240
+ function colorNumberToHex(color: number | null): string | null {
241
+ if (color === null) return null;
242
+ return `#${normalizeColorNumber(color).toString(16).padStart(6, '0')}`;
243
+ }
244
+
245
+ function normalizeHexColor(value: string | null | undefined): string | null {
246
+ if (!value) return null;
247
+ const normalized = value.trim().toLowerCase();
248
+ if (/^#[0-9a-f]{6}$/.test(normalized)) return normalized;
249
+ if (/^[0-9a-f]{6}$/.test(normalized)) return `#${normalized}`;
250
+ if (/^#[0-9a-f]{3}$/.test(normalized)) {
251
+ const r = normalized[1];
252
+ const g = normalized[2];
253
+ const b = normalized[3];
254
+ return `#${r}${r}${g}${g}${b}${b}`;
255
+ }
256
+ if (/^[0-9a-f]{3}$/.test(normalized)) {
257
+ const r = normalized[0];
258
+ const g = normalized[1];
259
+ const b = normalized[2];
260
+ return `#${r}${r}${g}${g}${b}${b}`;
261
+ }
262
+ return null;
263
+ }
264
+
265
+ function cssColorToRgbValue(value: string | null | undefined): number | null {
266
+ if (!value) return null;
267
+ const raw = value.trim().toLowerCase();
268
+ if (raw === 'transparent') return null;
269
+
270
+ const hex = normalizeHexColor(raw);
271
+ if (hex) {
272
+ return normalizeColorNumber(Number.parseInt(hex.slice(1), 16));
273
+ }
274
+
275
+ const fnMatch = raw.match(/^rgba?\((.*)\)$/);
276
+ if (!fnMatch) return null;
277
+
278
+ const parts = fnMatch[1]
279
+ .replace(/\s*\/\s*/g, ' ')
280
+ .split(/[,\s]+/)
281
+ .map(p => p.trim())
282
+ .filter(Boolean);
283
+
284
+ if (parts.length < 3) return null;
285
+
286
+ const toChannel = (token: string): number => {
287
+ if (token.endsWith('%')) {
288
+ const percent = Number.parseFloat(token.slice(0, -1));
289
+ return clampColorChannel((percent / 100) * 255);
290
+ }
291
+ const num = Number.parseFloat(token);
292
+ return clampColorChannel(num);
293
+ };
294
+
295
+ const r = toChannel(parts[0]);
296
+ const g = toChannel(parts[1]);
297
+ const b = toChannel(parts[2]);
298
+ return rgb2int([r, g, b]);
299
+ }
300
+
301
+ /**
302
+ * Escape DXF line endings
303
+ * @param text - Text to escape
304
+ * @returns Escaped text
305
+ */
306
+ export function escapeDxfLineEndings(text: string): string {
307
+ return text.replace(/\r\n|\r|\n/g, '\\P');
308
+ }
309
+
310
+ /**
311
+ * Check if text contains inline formatting codes
312
+ * @param text - Text to check
313
+ * @returns True if text contains formatting codes
314
+ */
315
+ export function hasInlineFormattingCodes(text: string): boolean {
316
+ return text.replace(/\\P/g, '').replace(/\\~/g, '').includes('\\');
317
+ }
318
+
319
+ /**
320
+ * Extracts all unique font names used in an MText string.
321
+ * This function searches for font commands in the format \f{fontname}| or \f{fontname}; and returns a set of unique font names.
322
+ * Font names are converted to lowercase to ensure case-insensitive uniqueness.
323
+ *
324
+ * @param mtext - The MText string to analyze for font names
325
+ * @param removeExtension - Whether to remove font file extensions (e.g., .ttf, .shx) from font names. Defaults to false.
326
+ * @returns A Set containing all unique font names found in the MText string, converted to lowercase
327
+ * @example
328
+ * ```ts
329
+ * const mtext = "\\fArial.ttf|Hello\\fTimes New Roman.otf|World";
330
+ * const fonts = getFonts(mtext, true);
331
+ * // Returns: Set(2) { "arial", "times new roman" }
332
+ * ```
333
+ */
334
+ export function getFonts(mtext: string, removeExtension: boolean = false) {
335
+ const fonts: Set<string> = new Set();
336
+ const regex = /\\[fF](.*?)[;|]/g;
337
+
338
+ [...mtext.matchAll(regex)].forEach(match => {
339
+ let fontName = match[1].toLowerCase();
340
+ if (removeExtension) {
341
+ fontName = fontName.replace(/\.(ttf|otf|woff|shx)$/, '');
342
+ }
343
+ fonts.add(fontName);
344
+ });
345
+
346
+ return fonts;
347
+ }
348
+
349
+ /**
350
+ * ContextStack manages a stack of MTextContext objects for character-level formatting.
351
+ *
352
+ * - Character-level formatting (underline, color, font, etc.) is scoped to `{}` blocks and managed by the stack.
353
+ * - Paragraph-level formatting (\p) is not scoped, but when a block ends, any paragraph property changes are merged into the parent context.
354
+ * - On pop, paragraph properties from the popped context are always merged into the new top context.
355
+ */
356
+ class ContextStack {
357
+ private stack: MTextContext[] = [];
358
+
359
+ /**
360
+ * Creates a new ContextStack with an initial context.
361
+ * @param initial The initial MTextContext to use as the base of the stack.
362
+ */
363
+ constructor(initial: MTextContext) {
364
+ this.stack.push(initial);
365
+ }
366
+
367
+ /**
368
+ * Pushes a copy of the given context onto the stack.
369
+ * @param ctx The MTextContext to push (copied).
370
+ */
371
+ push(ctx: MTextContext) {
372
+ this.stack.push(ctx);
373
+ }
374
+
375
+ /**
376
+ * Pops the top context from the stack and merges its paragraph properties into the new top context.
377
+ * If only one context remains, nothing is popped.
378
+ * @returns The popped MTextContext, or undefined if the stack has only one context.
379
+ */
380
+ pop(): MTextContext | undefined {
381
+ if (this.stack.length <= 1) return undefined;
382
+ const popped = this.stack.pop()!;
383
+ // Merge paragraph properties into the new top context
384
+ const top = this.stack[this.stack.length - 1];
385
+ if (JSON.stringify(top.paragraph) !== JSON.stringify(popped.paragraph)) {
386
+ top.paragraph = { ...popped.paragraph };
387
+ }
388
+ return popped;
389
+ }
390
+
391
+ /**
392
+ * Returns the current (top) context on the stack.
393
+ */
394
+ get current(): MTextContext {
395
+ return this.stack[this.stack.length - 1];
396
+ }
397
+
398
+ /**
399
+ * Returns the current stack depth (number of nested blocks), not counting the root context.
400
+ */
401
+ get depth(): number {
402
+ return this.stack.length - 1;
403
+ }
404
+
405
+ /**
406
+ * Returns the root (bottom) context, which represents the global formatting state.
407
+ * Used for paragraph property application.
408
+ */
409
+ get root(): MTextContext {
410
+ return this.stack[0];
411
+ }
412
+
413
+ /**
414
+ * Replaces the current (top) context with the given context.
415
+ * @param ctx The new context to set as the current context.
416
+ */
417
+ setCurrent(ctx: MTextContext) {
418
+ this.stack[this.stack.length - 1] = ctx;
419
+ }
420
+ }
421
+
422
+ /**
423
+ * Configuration options for the MText parser.
424
+ * These options control how the parser behaves during tokenization and property handling.
425
+ */
426
+ export interface MTextParserOptions {
427
+ /**
428
+ * Whether to yield PROPERTIES_CHANGED tokens when formatting properties change.
429
+ * When true, the parser will emit tokens whenever properties like color, font, or alignment change.
430
+ * When false, property changes are applied silently to the context without generating tokens.
431
+ * @default false
432
+ */
433
+ yieldPropertyCommands?: boolean;
434
+ /**
435
+ * Whether to reset paragraph parameters when encountering a new paragraph token.
436
+ * When true, paragraph properties (indent, margins, alignment, tab stops) are reset to defaults
437
+ * at the start of each new paragraph.
438
+ * @default false
439
+ */
440
+ resetParagraphParameters?: boolean;
441
+ /**
442
+ * Custom decoder function for MIF (Multibyte Interchange Format) codes.
443
+ * If provided, this function will be used instead of the default decodeMultiByteChar.
444
+ * The function receives the hex code string and should return the decoded character.
445
+ * @param hex - Hex code string (e.g., "C4E3" or "1A2B3")
446
+ * @returns Decoded character or empty square (▯) if invalid
447
+ * @default undefined (uses default decoder)
448
+ */
449
+ mifDecoder?: (hex: string) => string;
450
+ /**
451
+ * The length of MIF hex codes to parse. MIF codes in AutoCAD can vary in length
452
+ * depending on the specific SHX big font used (typically 4 or 5 digits).
453
+ * If not specified, the parser will try to auto-detect the length by attempting
454
+ * to match 4 digits first, then 5 digits if needed.
455
+ * @default undefined (auto-detect)
456
+ */
457
+ mifCodeLength?: 4 | 5 | 'auto';
458
+ }
459
+
460
+ /**
461
+ * Main parser class for MText content
462
+ */
463
+ export class MTextParser {
464
+ private scanner: TextScanner;
465
+ private ctxStack: ContextStack;
466
+ private continueStroke: boolean = false;
467
+ private yieldPropertyCommands: boolean;
468
+ private resetParagraphParameters: boolean;
469
+ private inStackContext: boolean = false;
470
+ private mifDecoder: (hex: string) => string;
471
+ private mifCodeLength: 4 | 5 | 'auto';
472
+
473
+ /**
474
+ * Creates a new MTextParser instance
475
+ * @param content - The MText content to parse
476
+ * @param ctx - Optional initial MText context
477
+ * @param options - Parser options
478
+ */
479
+ constructor(content: string, ctx?: MTextContext, options: MTextParserOptions = {}) {
480
+ this.scanner = new TextScanner(content);
481
+ const initialCtx = ctx ?? new MTextContext();
482
+ this.ctxStack = new ContextStack(initialCtx);
483
+ this.yieldPropertyCommands = options.yieldPropertyCommands ?? false;
484
+ this.resetParagraphParameters = options.resetParagraphParameters ?? false;
485
+ this.mifDecoder = options.mifDecoder ?? this.decodeMultiByteChar.bind(this);
486
+ this.mifCodeLength = options.mifCodeLength ?? 'auto';
487
+ }
488
+
489
+ /**
490
+ * Decode multi-byte character from hex code
491
+ * @param hex - Hex code string (e.g. "C4E3" or "1A2B3")
492
+ * @returns Decoded character or empty square if invalid
493
+ */
494
+ private decodeMultiByteChar(hex: string): string {
495
+ try {
496
+ // For 5-digit codes, return placeholder directly
497
+ if (hex.length === 5) {
498
+ const prefix = hex[0];
499
+
500
+ // Notes:
501
+ // I know AutoCAD uses prefix 1 for Shift-JIS, 2 for big5, and 5 for gbk.
502
+ // But I don't know whether there are other prefixes and their meanings.
503
+ let encoding = 'gbk';
504
+ if (prefix === '1') {
505
+ encoding = 'shift-jis';
506
+ } else if (prefix === '2') {
507
+ encoding = 'big5';
508
+ }
509
+ const bytes = new Uint8Array([
510
+ parseInt(hex.substr(1, 2), 16),
511
+ parseInt(hex.substr(3, 2), 16),
512
+ ]);
513
+ const decoder = new TextDecoder(encoding);
514
+ const result = decoder.decode(bytes);
515
+ return result;
516
+ } else if (hex.length === 4) {
517
+ // For 4-digit hex codes, decode as 2-byte character
518
+ const bytes = new Uint8Array([
519
+ parseInt(hex.substr(0, 2), 16),
520
+ parseInt(hex.substr(2, 2), 16),
521
+ ]);
522
+
523
+ // Try GBK first
524
+ const gbkDecoder = new TextDecoder('gbk');
525
+ const gbkResult = gbkDecoder.decode(bytes);
526
+ if (gbkResult !== '▯') {
527
+ return gbkResult;
528
+ }
529
+
530
+ // Try BIG5 if GBK fails
531
+ const big5Decoder = new TextDecoder('big5');
532
+ const big5Result = big5Decoder.decode(bytes);
533
+ if (big5Result !== '▯') {
534
+ return big5Result;
535
+ }
536
+ }
537
+
538
+ return '▯';
539
+ } catch {
540
+ return '▯';
541
+ }
542
+ }
543
+
544
+ /**
545
+ * Extract MIF hex code from scanner
546
+ * @param length - The length of the hex code to extract (4 or 5), or 'auto' to detect
547
+ * @returns The extracted hex code, or null if not found
548
+ */
549
+ private extractMifCode(length: 4 | 5 | 'auto'): string | null {
550
+ if (length === 'auto') {
551
+ // Try 5 digits first if available, then fall back to 4 digits
552
+ const code5 = this.scanner.tail.match(/^[0-9A-Fa-f]{5}/)?.[0];
553
+ if (code5) {
554
+ return code5;
555
+ }
556
+ const code4 = this.scanner.tail.match(/^[0-9A-Fa-f]{4}/)?.[0];
557
+ if (code4) {
558
+ return code4;
559
+ }
560
+ return null;
561
+ } else {
562
+ const code = this.scanner.tail.match(new RegExp(`^[0-9A-Fa-f]{${length}}`))?.[0];
563
+ return code ?? null;
564
+ }
565
+ }
566
+
567
+ /**
568
+ * Push current context onto the stack
569
+ */
570
+ private pushCtx(): void {
571
+ this.ctxStack.push(this.ctxStack.current);
572
+ }
573
+
574
+ /**
575
+ * Pop context from the stack
576
+ */
577
+ private popCtx(): void {
578
+ this.ctxStack.pop();
579
+ }
580
+
581
+ /**
582
+ * Parse stacking expression (numerator/denominator)
583
+ * @returns Tuple of [TokenType.STACK, [numerator, denominator, type]]
584
+ */
585
+ private parseStacking(): [TokenType, [string, string, string]] {
586
+ const scanner = new TextScanner(this.extractExpression(true));
587
+ let numerator = '';
588
+ let denominator = '';
589
+ let stackingType = '';
590
+
591
+ const getNextChar = (): [string, boolean] => {
592
+ let c = scanner.peek();
593
+ let escape = false;
594
+ if (c.charCodeAt(0) < 32) {
595
+ c = ' ';
596
+ }
597
+ if (c === '\\') {
598
+ escape = true;
599
+ scanner.consume(1);
600
+ c = scanner.peek();
601
+ }
602
+ scanner.consume(1);
603
+ return [c, escape];
604
+ };
605
+
606
+ const parseNumerator = (): [string, string] => {
607
+ let word = '';
608
+ while (scanner.hasData) {
609
+ const [c, escape] = getNextChar();
610
+ // Check for stacking operators first
611
+ if (!escape && (c === '/' || c === '#' || c === '^')) {
612
+ return [word, c];
613
+ }
614
+ word += c;
615
+ }
616
+ return [word, ''];
617
+ };
618
+
619
+ const parseDenominator = (skipLeadingSpace: boolean): string => {
620
+ let word = '';
621
+ let skipping = skipLeadingSpace;
622
+ while (scanner.hasData) {
623
+ const [c, escape] = getNextChar();
624
+ if (skipping && c === ' ') {
625
+ continue;
626
+ }
627
+ skipping = false;
628
+ // Stop at terminator unless escaped
629
+ if (!escape && c === ';') {
630
+ break;
631
+ }
632
+ word += c;
633
+ }
634
+ return word;
635
+ };
636
+
637
+ [numerator, stackingType] = parseNumerator();
638
+ if (stackingType) {
639
+ // Only skip leading space for caret divider
640
+ denominator = parseDenominator(stackingType === '^');
641
+ }
642
+
643
+ // Special case for \S^!/^?;
644
+ if (numerator === '' && denominator.includes('I/')) {
645
+ return [TokenType.STACK, [' ', ' ', '/']];
646
+ }
647
+
648
+ // Handle caret as a stacking operator
649
+ if (stackingType === '^') {
650
+ return [TokenType.STACK, [numerator, denominator, '^']];
651
+ }
652
+
653
+ return [TokenType.STACK, [numerator, denominator, stackingType]];
654
+ }
655
+
656
+ /**
657
+ * Parse MText properties
658
+ * @param cmd - The property command to parse
659
+ * @returns Property changes if yieldPropertyCommands is true and changes occurred
660
+ */
661
+ private parseProperties(cmd: string): TokenData[TokenType.PROPERTIES_CHANGED] | void {
662
+ const prevCtx = this.ctxStack.current.copy();
663
+ const newCtx = this.ctxStack.current.copy();
664
+ switch (cmd) {
665
+ case 'L':
666
+ newCtx.underline = true;
667
+ this.continueStroke = true;
668
+ break;
669
+ case 'l':
670
+ newCtx.underline = false;
671
+ if (!newCtx.hasAnyStroke) {
672
+ this.continueStroke = false;
673
+ }
674
+ break;
675
+ case 'O':
676
+ newCtx.overline = true;
677
+ this.continueStroke = true;
678
+ break;
679
+ case 'o':
680
+ newCtx.overline = false;
681
+ if (!newCtx.hasAnyStroke) {
682
+ this.continueStroke = false;
683
+ }
684
+ break;
685
+ case 'K':
686
+ newCtx.strikeThrough = true;
687
+ this.continueStroke = true;
688
+ break;
689
+ case 'k':
690
+ newCtx.strikeThrough = false;
691
+ if (!newCtx.hasAnyStroke) {
692
+ this.continueStroke = false;
693
+ }
694
+ break;
695
+ case 'A':
696
+ this.parseAlign(newCtx);
697
+ break;
698
+ case 'C':
699
+ this.parseAciColor(newCtx);
700
+ break;
701
+ case 'c':
702
+ this.parseRgbColor(newCtx);
703
+ break;
704
+ case 'H':
705
+ this.parseHeight(newCtx);
706
+ break;
707
+ case 'W':
708
+ this.parseWidth(newCtx);
709
+ break;
710
+ case 'Q':
711
+ this.parseOblique(newCtx);
712
+ break;
713
+ case 'T':
714
+ this.parseCharTracking(newCtx);
715
+ break;
716
+ case 'p':
717
+ this.parseParagraphProperties(newCtx);
718
+ break;
719
+ case 'f':
720
+ case 'F':
721
+ this.parseFontProperties(newCtx);
722
+ break;
723
+ default:
724
+ throw new Error(`Unknown command: ${cmd}`);
725
+ }
726
+
727
+ // Update continueStroke based on current stroke state
728
+ this.continueStroke = newCtx.hasAnyStroke;
729
+ newCtx.continueStroke = this.continueStroke;
730
+ // Use setCurrent to replace the current context
731
+ this.ctxStack.setCurrent(newCtx);
732
+
733
+ if (this.yieldPropertyCommands) {
734
+ const changes = this.getPropertyChanges(prevCtx, newCtx);
735
+ if (Object.keys(changes).length > 0) {
736
+ return {
737
+ command: cmd,
738
+ changes,
739
+ depth: this.ctxStack.depth,
740
+ };
741
+ }
742
+ }
743
+ }
744
+
745
+ /**
746
+ * Get property changes between two contexts
747
+ * @param oldCtx - The old context
748
+ * @param newCtx - The new context
749
+ * @returns Object containing changed properties
750
+ */
751
+ private getPropertyChanges(
752
+ oldCtx: MTextContext,
753
+ newCtx: MTextContext
754
+ ): TokenData[TokenType.PROPERTIES_CHANGED]['changes'] {
755
+ const changes: TokenData[TokenType.PROPERTIES_CHANGED]['changes'] = {};
756
+
757
+ if (oldCtx.underline !== newCtx.underline) {
758
+ changes.underline = newCtx.underline;
759
+ }
760
+ if (oldCtx.overline !== newCtx.overline) {
761
+ changes.overline = newCtx.overline;
762
+ }
763
+ if (oldCtx.strikeThrough !== newCtx.strikeThrough) {
764
+ changes.strikeThrough = newCtx.strikeThrough;
765
+ }
766
+ if (oldCtx.color.aci !== newCtx.color.aci) {
767
+ changes.aci = newCtx.color.aci;
768
+ }
769
+ if (oldCtx.color.rgbValue !== newCtx.color.rgbValue) {
770
+ changes.rgb = newCtx.color.rgb;
771
+ }
772
+ if (oldCtx.align !== newCtx.align) {
773
+ changes.align = newCtx.align;
774
+ }
775
+ if (JSON.stringify(oldCtx.fontFace) !== JSON.stringify(newCtx.fontFace)) {
776
+ changes.fontFace = newCtx.fontFace;
777
+ }
778
+ if (
779
+ oldCtx.capHeight.value !== newCtx.capHeight.value ||
780
+ oldCtx.capHeight.isRelative !== newCtx.capHeight.isRelative
781
+ ) {
782
+ changes.capHeight = newCtx.capHeight;
783
+ }
784
+ if (
785
+ oldCtx.widthFactor.value !== newCtx.widthFactor.value ||
786
+ oldCtx.widthFactor.isRelative !== newCtx.widthFactor.isRelative
787
+ ) {
788
+ changes.widthFactor = newCtx.widthFactor;
789
+ }
790
+ if (
791
+ oldCtx.charTrackingFactor.value !== newCtx.charTrackingFactor.value ||
792
+ oldCtx.charTrackingFactor.isRelative !== newCtx.charTrackingFactor.isRelative
793
+ ) {
794
+ changes.charTrackingFactor = newCtx.charTrackingFactor;
795
+ }
796
+ if (oldCtx.oblique !== newCtx.oblique) {
797
+ changes.oblique = newCtx.oblique;
798
+ }
799
+ if (JSON.stringify(oldCtx.paragraph) !== JSON.stringify(newCtx.paragraph)) {
800
+ // Only include changed paragraph properties
801
+ const changedProps: Partial<ParagraphProperties> = {};
802
+ if (oldCtx.paragraph.indent !== newCtx.paragraph.indent) {
803
+ changedProps.indent = newCtx.paragraph.indent;
804
+ }
805
+ if (oldCtx.paragraph.align !== newCtx.paragraph.align) {
806
+ changedProps.align = newCtx.paragraph.align;
807
+ }
808
+ if (oldCtx.paragraph.left !== newCtx.paragraph.left) {
809
+ changedProps.left = newCtx.paragraph.left;
810
+ }
811
+ if (oldCtx.paragraph.right !== newCtx.paragraph.right) {
812
+ changedProps.right = newCtx.paragraph.right;
813
+ }
814
+ if (JSON.stringify(oldCtx.paragraph.tabs) !== JSON.stringify(newCtx.paragraph.tabs)) {
815
+ changedProps.tabs = newCtx.paragraph.tabs;
816
+ }
817
+ if (Object.keys(changedProps).length > 0) {
818
+ changes.paragraph = changedProps;
819
+ }
820
+ }
821
+
822
+ return changes;
823
+ }
824
+
825
+ /**
826
+ * Parse alignment property
827
+ * @param ctx - The context to update
828
+ */
829
+ private parseAlign(ctx: MTextContext): void {
830
+ const char = this.scanner.get();
831
+ if ('012'.includes(char)) {
832
+ ctx.align = parseInt(char) as MTextLineAlignment;
833
+ } else {
834
+ ctx.align = MTextLineAlignment.BOTTOM;
835
+ }
836
+ this.consumeOptionalTerminator();
837
+ }
838
+
839
+ /**
840
+ * Parse height property
841
+ * @param ctx - The context to update
842
+ */
843
+ private parseHeight(ctx: MTextContext): void {
844
+ const expr = this.extractFloatExpression(true);
845
+ if (expr) {
846
+ try {
847
+ if (expr.endsWith('x')) {
848
+ // For height command, treat x suffix as relative value
849
+ ctx.capHeight = {
850
+ value: parseFloat(expr.slice(0, -1)),
851
+ isRelative: true,
852
+ };
853
+ } else {
854
+ ctx.capHeight = {
855
+ value: parseFloat(expr),
856
+ isRelative: false,
857
+ };
858
+ }
859
+ } catch {
860
+ // If parsing fails, treat the entire command as literal text
861
+ this.scanner.consume(-expr.length); // Rewind to before the expression
862
+ return;
863
+ }
864
+ }
865
+ this.consumeOptionalTerminator();
866
+ }
867
+
868
+ /**
869
+ * Parse width property
870
+ * @param ctx - The context to update
871
+ */
872
+ private parseWidth(ctx: MTextContext): void {
873
+ const expr = this.extractFloatExpression(true);
874
+ if (expr) {
875
+ try {
876
+ if (expr.endsWith('x')) {
877
+ // For width command, treat x suffix as relative value
878
+ ctx.widthFactor = {
879
+ value: parseFloat(expr.slice(0, -1)),
880
+ isRelative: true,
881
+ };
882
+ } else {
883
+ ctx.widthFactor = {
884
+ value: parseFloat(expr),
885
+ isRelative: false,
886
+ };
887
+ }
888
+ } catch {
889
+ // If parsing fails, treat the entire command as literal text
890
+ this.scanner.consume(-expr.length); // Rewind to before the expression
891
+ return;
892
+ }
893
+ }
894
+ this.consumeOptionalTerminator();
895
+ }
896
+
897
+ /**
898
+ * Parse character tracking property
899
+ * @param ctx - The context to update
900
+ */
901
+ private parseCharTracking(ctx: MTextContext): void {
902
+ const expr = this.extractFloatExpression(true);
903
+ if (expr) {
904
+ try {
905
+ if (expr.endsWith('x')) {
906
+ // For tracking command, treat x suffix as relative value
907
+ ctx.charTrackingFactor = {
908
+ value: Math.abs(parseFloat(expr.slice(0, -1))),
909
+ isRelative: true,
910
+ };
911
+ } else {
912
+ ctx.charTrackingFactor = {
913
+ value: Math.abs(parseFloat(expr)),
914
+ isRelative: false,
915
+ };
916
+ }
917
+ } catch {
918
+ // If parsing fails, treat the entire command as literal text
919
+ this.scanner.consume(-expr.length); // Rewind to before the expression
920
+ return;
921
+ }
922
+ }
923
+ this.consumeOptionalTerminator();
924
+ }
925
+
926
+ /**
927
+ * Parse float value or factor
928
+ * @param value - Current value to apply factor to
929
+ * @returns New value
930
+ */
931
+ private parseFloatValueOrFactor(value: number): number {
932
+ const expr = this.extractFloatExpression(true);
933
+ if (expr) {
934
+ if (expr.endsWith('x')) {
935
+ const factor = parseFloat(expr.slice(0, -1));
936
+ value *= factor; // Allow negative factors
937
+ } else {
938
+ value = parseFloat(expr); // Allow negative values
939
+ }
940
+ }
941
+ return value;
942
+ }
943
+
944
+ /**
945
+ * Parse oblique angle property
946
+ * @param ctx - The context to update
947
+ */
948
+ private parseOblique(ctx: MTextContext): void {
949
+ const obliqueExpr = this.extractFloatExpression(false);
950
+ if (obliqueExpr) {
951
+ ctx.oblique = parseFloat(obliqueExpr);
952
+ }
953
+ this.consumeOptionalTerminator();
954
+ }
955
+
956
+ /**
957
+ * Parse ACI color property
958
+ * @param ctx - The context to update
959
+ */
960
+ private parseAciColor(ctx: MTextContext): void {
961
+ const aciExpr = this.extractIntExpression();
962
+ if (aciExpr) {
963
+ const aci = parseInt(aciExpr);
964
+ if (aci < 257) {
965
+ ctx.color.aci = aci;
966
+ }
967
+ }
968
+ this.consumeOptionalTerminator();
969
+ }
970
+
971
+ /**
972
+ * Parse RGB color property
973
+ * @param ctx - The context to update
974
+ */
975
+ private parseRgbColor(ctx: MTextContext): void {
976
+ const rgbExpr = this.extractIntExpression();
977
+ if (rgbExpr) {
978
+ const value = parseInt(rgbExpr) & 0xffffff;
979
+ ctx.color.rgbValue = value;
980
+ }
981
+ this.consumeOptionalTerminator();
982
+ }
983
+
984
+ /**
985
+ * Extract float expression from scanner
986
+ * @param relative - Whether to allow relative values (ending in 'x')
987
+ * @returns Extracted expression
988
+ */
989
+ private extractFloatExpression(relative: boolean = false): string {
990
+ const pattern = relative
991
+ ? /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?x?/
992
+ : /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?/;
993
+ const match = this.scanner.tail.match(pattern);
994
+ if (match) {
995
+ const result = match[0];
996
+ this.scanner.consume(result.length);
997
+ return result;
998
+ }
999
+ return '';
1000
+ }
1001
+
1002
+ /**
1003
+ * Extract integer expression from scanner
1004
+ * @returns Extracted expression
1005
+ */
1006
+ private extractIntExpression(): string {
1007
+ const match = this.scanner.tail.match(/^\d+/);
1008
+ if (match) {
1009
+ const result = match[0];
1010
+ this.scanner.consume(result.length);
1011
+ return result;
1012
+ }
1013
+ return '';
1014
+ }
1015
+
1016
+ /**
1017
+ * Extract expression until semicolon or end
1018
+ * @param escape - Whether to handle escaped semicolons
1019
+ * @returns Extracted expression
1020
+ */
1021
+ private extractExpression(escape: boolean = false): string {
1022
+ const stop = this.scanner.find(';', escape);
1023
+ if (stop < 0) {
1024
+ const expr = this.scanner.tail;
1025
+ this.scanner.consume(expr.length);
1026
+ return expr;
1027
+ }
1028
+ // Check if the semicolon is escaped by looking at the previous character
1029
+ const prevChar = this.scanner.peek(stop - this.scanner.currentIndex - 1);
1030
+ const isEscaped = prevChar === '\\';
1031
+ const expr = this.scanner.tail.slice(0, stop - this.scanner.currentIndex + (isEscaped ? 1 : 0));
1032
+ this.scanner.consume(expr.length + 1);
1033
+ return expr;
1034
+ }
1035
+
1036
+ /**
1037
+ * Parse font properties
1038
+ * @param ctx - The context to update
1039
+ */
1040
+ private parseFontProperties(ctx: MTextContext): void {
1041
+ const parts = this.extractExpression().split('|');
1042
+ if (parts.length > 0 && parts[0]) {
1043
+ const name = parts[0];
1044
+ let style: FontStyle = 'Regular';
1045
+ let weight = 400;
1046
+
1047
+ for (const part of parts.slice(1)) {
1048
+ if (part.startsWith('b1')) {
1049
+ weight = 700;
1050
+ } else if (part === 'i' || part.startsWith('i1')) {
1051
+ style = 'Italic';
1052
+ } else if (part === 'i0' || part.startsWith('i0')) {
1053
+ style = 'Regular';
1054
+ }
1055
+ }
1056
+
1057
+ ctx.fontFace = {
1058
+ family: name,
1059
+ style,
1060
+ weight,
1061
+ };
1062
+ }
1063
+ }
1064
+
1065
+ /**
1066
+ * Parse paragraph properties from the MText content
1067
+ * Handles properties like indentation, alignment, and tab stops
1068
+ * @param ctx - The context to update
1069
+ */
1070
+ private parseParagraphProperties(ctx: MTextContext): void {
1071
+ const scanner = new TextScanner(this.extractExpression());
1072
+ /** Current indentation value */
1073
+ let indent = ctx.paragraph.indent;
1074
+ /** Left margin value */
1075
+ let left = ctx.paragraph.left;
1076
+ /** Right margin value */
1077
+ let right = ctx.paragraph.right;
1078
+ /** Current paragraph alignment */
1079
+ let align = ctx.paragraph.align;
1080
+ /** Array of tab stop positions and types */
1081
+ let tabStops: (number | string)[] = [];
1082
+
1083
+ /**
1084
+ * Parse a floating point number from the scanner's current position
1085
+ * Handles optional sign, decimal point, and scientific notation
1086
+ * @returns The parsed float value, or 0 if no valid number is found
1087
+ */
1088
+ const parseFloatValue = (): number => {
1089
+ const match = scanner.tail.match(/^[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?/);
1090
+ if (match) {
1091
+ const value = parseFloat(match[0]);
1092
+ scanner.consume(match[0].length);
1093
+ while (scanner.peek() === ',') {
1094
+ scanner.consume(1);
1095
+ }
1096
+ return value;
1097
+ }
1098
+ return 0;
1099
+ };
1100
+
1101
+ while (scanner.hasData) {
1102
+ const cmd = scanner.get();
1103
+ switch (cmd) {
1104
+ case 'i': // Indentation
1105
+ indent = parseFloatValue();
1106
+ break;
1107
+ case 'l': // Left margin
1108
+ left = parseFloatValue();
1109
+ break;
1110
+ case 'r': // Right margin
1111
+ right = parseFloatValue();
1112
+ break;
1113
+ case 'x': // Skip
1114
+ break;
1115
+ case 'q': {
1116
+ // Alignment
1117
+ const adjustment = scanner.get();
1118
+ align = CHAR_TO_ALIGN[adjustment] || MTextParagraphAlignment.DEFAULT;
1119
+ while (scanner.peek() === ',') {
1120
+ scanner.consume(1);
1121
+ }
1122
+ break;
1123
+ }
1124
+ case 't': // Tab stops
1125
+ tabStops = [];
1126
+ while (scanner.hasData) {
1127
+ const type = scanner.peek();
1128
+ if (type === 'r' || type === 'c') {
1129
+ scanner.consume(1);
1130
+ const value = parseFloatValue();
1131
+ tabStops.push(type + value.toString());
1132
+ } else {
1133
+ const value = parseFloatValue();
1134
+ if (!isNaN(value)) {
1135
+ tabStops.push(value);
1136
+ } else {
1137
+ scanner.consume(1);
1138
+ }
1139
+ }
1140
+ }
1141
+ break;
1142
+ }
1143
+ }
1144
+
1145
+ ctx.paragraph = {
1146
+ indent,
1147
+ left,
1148
+ right,
1149
+ align,
1150
+ tabs: tabStops,
1151
+ };
1152
+ }
1153
+
1154
+ /**
1155
+ * Consume optional terminator (semicolon)
1156
+ */
1157
+ private consumeOptionalTerminator(): void {
1158
+ if (this.scanner.peek() === ';') {
1159
+ this.scanner.consume(1);
1160
+ }
1161
+ }
1162
+
1163
+ /**
1164
+ * Parse MText content into tokens
1165
+ * @yields MTextToken objects
1166
+ */
1167
+ *parse(): Generator<MTextToken> {
1168
+ const wordToken = TokenType.WORD;
1169
+ const spaceToken = TokenType.SPACE;
1170
+ let followupToken: TokenType | null = null;
1171
+
1172
+ function resetParagraph(ctx: MTextContext): Partial<ParagraphProperties> {
1173
+ const prev = { ...ctx.paragraph };
1174
+ ctx.paragraph = {
1175
+ indent: 0,
1176
+ left: 0,
1177
+ right: 0,
1178
+ align: MTextParagraphAlignment.DEFAULT,
1179
+ tabs: [],
1180
+ };
1181
+ const changed: Partial<ParagraphProperties> = {};
1182
+ if (prev.indent !== 0) changed.indent = 0;
1183
+ if (prev.left !== 0) changed.left = 0;
1184
+ if (prev.right !== 0) changed.right = 0;
1185
+ if (prev.align !== MTextParagraphAlignment.DEFAULT)
1186
+ changed.align = MTextParagraphAlignment.DEFAULT;
1187
+ if (JSON.stringify(prev.tabs) !== JSON.stringify([])) changed.tabs = [];
1188
+ return changed;
1189
+ }
1190
+
1191
+ const nextToken = (): [TokenType, TokenData[TokenType]] => {
1192
+ let word = '';
1193
+ while (this.scanner.hasData) {
1194
+ let escape = false;
1195
+ let letter = this.scanner.peek();
1196
+ const cmdStartIndex = this.scanner.currentIndex;
1197
+
1198
+ // Handle control characters first
1199
+ if (letter.charCodeAt(0) < 32) {
1200
+ this.scanner.consume(1); // Always consume the control character
1201
+ if (letter === '\t') {
1202
+ return [TokenType.TABULATOR, null];
1203
+ }
1204
+ if (letter === '\n') {
1205
+ return [TokenType.NEW_PARAGRAPH, null];
1206
+ }
1207
+ letter = ' ';
1208
+ }
1209
+
1210
+ if (letter === '\\') {
1211
+ if ('\\{}'.includes(this.scanner.peek(1))) {
1212
+ escape = true;
1213
+ this.scanner.consume(1);
1214
+ letter = this.scanner.peek();
1215
+ } else {
1216
+ if (word) {
1217
+ return [wordToken, word];
1218
+ }
1219
+ this.scanner.consume(1);
1220
+ const cmd = this.scanner.get();
1221
+ switch (cmd) {
1222
+ case '~':
1223
+ return [TokenType.NBSP, null];
1224
+ case 'P':
1225
+ return [TokenType.NEW_PARAGRAPH, null];
1226
+ case 'N':
1227
+ return [TokenType.NEW_COLUMN, null];
1228
+ case 'X':
1229
+ return [TokenType.WRAP_AT_DIMLINE, null];
1230
+ case 'S': {
1231
+ this.inStackContext = true;
1232
+ const result = this.parseStacking();
1233
+ this.inStackContext = false;
1234
+ return result;
1235
+ }
1236
+ case 'm':
1237
+ case 'M':
1238
+ // Handle multi-byte character encoding (MIF)
1239
+ if (this.scanner.peek() === '+') {
1240
+ this.scanner.consume(1); // Consume the '+'
1241
+ const hexCode = this.extractMifCode(this.mifCodeLength);
1242
+ if (hexCode) {
1243
+ this.scanner.consume(hexCode.length);
1244
+ const decodedChar = this.mifDecoder(hexCode);
1245
+ if (word) {
1246
+ return [wordToken, word];
1247
+ }
1248
+ return [wordToken, decodedChar];
1249
+ }
1250
+ // If no valid hex code found, rewind the '+' character
1251
+ this.scanner.consume(-1);
1252
+ }
1253
+ // If not a valid multi-byte code, treat as literal text
1254
+ word += '\\M';
1255
+ continue;
1256
+ case 'U':
1257
+ // Handle Unicode escape: \U+XXXX or \U+XXXXXXXX
1258
+ if (this.scanner.peek() === '+') {
1259
+ this.scanner.consume(1); // Consume the '+'
1260
+ const hexMatch = this.scanner.tail.match(/^[0-9A-Fa-f]{4,8}/);
1261
+ if (hexMatch) {
1262
+ const hexCode = hexMatch[0];
1263
+ this.scanner.consume(hexCode.length);
1264
+ const codePoint = parseInt(hexCode, 16);
1265
+ let decodedChar = '';
1266
+ try {
1267
+ decodedChar = String.fromCodePoint(codePoint);
1268
+ } catch {
1269
+ decodedChar = '▯';
1270
+ }
1271
+ if (word) {
1272
+ return [wordToken, word];
1273
+ }
1274
+ return [wordToken, decodedChar];
1275
+ }
1276
+ // If no valid hex code found, rewind the '+' character
1277
+ this.scanner.consume(-1);
1278
+ }
1279
+ // If not a valid Unicode code, treat as literal text
1280
+ word += '\\U';
1281
+ continue;
1282
+ default:
1283
+ if (cmd) {
1284
+ try {
1285
+ const propertyChanges = this.parseProperties(cmd);
1286
+ if (this.yieldPropertyCommands && propertyChanges) {
1287
+ return [TokenType.PROPERTIES_CHANGED, propertyChanges];
1288
+ }
1289
+ // After processing a property command, continue with normal parsing
1290
+ continue;
1291
+ } catch {
1292
+ const commandText = this.scanner.tail.slice(
1293
+ cmdStartIndex,
1294
+ this.scanner.currentIndex
1295
+ );
1296
+ word += commandText;
1297
+ }
1298
+ }
1299
+ }
1300
+ continue;
1301
+ }
1302
+ }
1303
+
1304
+ if (letter === '%' && this.scanner.peek(1) === '%') {
1305
+ const code = this.scanner.peek(2).toLowerCase();
1306
+ const specialChar = SPECIAL_CHAR_ENCODING[code];
1307
+ if (specialChar) {
1308
+ this.scanner.consume(3);
1309
+ word += specialChar;
1310
+ continue;
1311
+ } else {
1312
+ /**
1313
+ * Supports Control Codes: `%%ddd`, where ddd is a three-digit decimal number representing the ASCII code value of the character.
1314
+ *
1315
+ * Reference: https://help.autodesk.com/view/ACD/2026/ENU/?guid=GUID-968CBC1D-BA99-4519-ABDD-88419EB2BF92
1316
+ */
1317
+ const digits = [code, this.scanner.peek(3), this.scanner.peek(4)];
1318
+
1319
+ if (digits.every(d => d >= '0' && d <= '9')) {
1320
+ const charCode = Number.parseInt(digits.join(''), 10);
1321
+ this.scanner.consume(5);
1322
+ word += String.fromCharCode(charCode);
1323
+ } else {
1324
+ // Skip invalid special character codes
1325
+ this.scanner.consume(3);
1326
+ }
1327
+
1328
+ continue;
1329
+ }
1330
+ }
1331
+
1332
+ if (letter === ' ') {
1333
+ if (word) {
1334
+ this.scanner.consume(1);
1335
+ followupToken = spaceToken;
1336
+ return [wordToken, word];
1337
+ }
1338
+ this.scanner.consume(1);
1339
+ return [spaceToken, null];
1340
+ }
1341
+
1342
+ if (!escape) {
1343
+ if (letter === '{') {
1344
+ if (word) {
1345
+ return [wordToken, word];
1346
+ }
1347
+ this.scanner.consume(1);
1348
+ this.pushCtx();
1349
+ continue;
1350
+ } else if (letter === '}') {
1351
+ if (word) {
1352
+ return [wordToken, word];
1353
+ }
1354
+ this.scanner.consume(1);
1355
+ // Context restoration with yieldPropertyCommands
1356
+ if (this.yieldPropertyCommands) {
1357
+ const prevCtx = this.ctxStack.current;
1358
+ this.popCtx();
1359
+ const changes = this.getPropertyChanges(prevCtx, this.ctxStack.current);
1360
+ if (Object.keys(changes).length > 0) {
1361
+ return [
1362
+ TokenType.PROPERTIES_CHANGED,
1363
+ { command: undefined, changes, depth: this.ctxStack.depth },
1364
+ ];
1365
+ }
1366
+ } else {
1367
+ this.popCtx();
1368
+ }
1369
+ continue;
1370
+ }
1371
+ }
1372
+
1373
+ // Handle caret-encoded characters only when not in stack context
1374
+ if (!this.inStackContext && letter === '^') {
1375
+ const nextChar = this.scanner.peek(1);
1376
+ if (nextChar) {
1377
+ const code = nextChar.charCodeAt(0);
1378
+ this.scanner.consume(2); // Consume both ^ and the next character
1379
+ if (code === 32) {
1380
+ // Space
1381
+ word += '^';
1382
+ } else if (code === 73) {
1383
+ // Tab
1384
+ if (word) {
1385
+ return [wordToken, word];
1386
+ }
1387
+ return [TokenType.TABULATOR, null];
1388
+ } else if (code === 74) {
1389
+ // Line feed
1390
+ if (word) {
1391
+ return [wordToken, word];
1392
+ }
1393
+ return [TokenType.NEW_PARAGRAPH, null];
1394
+ } else if (code === 77) {
1395
+ // Carriage return
1396
+ // Ignore carriage return
1397
+ continue;
1398
+ } else {
1399
+ word += '▯';
1400
+ }
1401
+ continue;
1402
+ }
1403
+ }
1404
+
1405
+ this.scanner.consume(1);
1406
+ if (letter.charCodeAt(0) >= 32) {
1407
+ word += letter;
1408
+ }
1409
+ }
1410
+
1411
+ if (word) {
1412
+ return [wordToken, word];
1413
+ }
1414
+ return [TokenType.NONE, null];
1415
+ };
1416
+
1417
+ while (true) {
1418
+ const [type, data] = nextToken.call(this);
1419
+ if (type) {
1420
+ yield new MTextToken(type, this.ctxStack.current.copy(), data);
1421
+ if (type === TokenType.NEW_PARAGRAPH && this.resetParagraphParameters) {
1422
+ // Reset paragraph properties and emit PROPERTIES_CHANGED if needed
1423
+ const ctx = this.ctxStack.current;
1424
+ const changed = resetParagraph(ctx);
1425
+ if (this.yieldPropertyCommands && Object.keys(changed).length > 0) {
1426
+ yield new MTextToken(TokenType.PROPERTIES_CHANGED, ctx.copy(), {
1427
+ command: undefined,
1428
+ changes: { paragraph: changed },
1429
+ depth: this.ctxStack.depth,
1430
+ });
1431
+ }
1432
+ }
1433
+ if (followupToken) {
1434
+ yield new MTextToken(followupToken, this.ctxStack.current.copy(), null);
1435
+ followupToken = null;
1436
+ }
1437
+ } else {
1438
+ break;
1439
+ }
1440
+ }
1441
+ }
1442
+ }
1443
+
1444
+ /**
1445
+ * Text scanner for parsing MText content
1446
+ */
1447
+ export class TextScanner {
1448
+ private text: string;
1449
+ private textLen: number;
1450
+ private _index: number;
1451
+
1452
+ /**
1453
+ * Create a new text scanner
1454
+ * @param text - The text to scan
1455
+ */
1456
+ constructor(text: string) {
1457
+ this.text = text;
1458
+ this.textLen = text.length;
1459
+ this._index = 0;
1460
+ }
1461
+
1462
+ /**
1463
+ * Get the current index in the text
1464
+ */
1465
+ get currentIndex(): number {
1466
+ return this._index;
1467
+ }
1468
+
1469
+ /**
1470
+ * Check if the scanner has reached the end of the text
1471
+ */
1472
+ get isEmpty(): boolean {
1473
+ return this._index >= this.textLen;
1474
+ }
1475
+
1476
+ /**
1477
+ * Check if there is more text to scan
1478
+ */
1479
+ get hasData(): boolean {
1480
+ return this._index < this.textLen;
1481
+ }
1482
+
1483
+ /**
1484
+ * Get the next character and advance the index
1485
+ * @returns The next character, or empty string if at end
1486
+ */
1487
+ get(): string {
1488
+ if (this.isEmpty) {
1489
+ return '';
1490
+ }
1491
+ const char = this.text[this._index];
1492
+ this._index++;
1493
+ return char;
1494
+ }
1495
+
1496
+ /**
1497
+ * Advance the index by the specified count
1498
+ * @param count - Number of characters to advance
1499
+ */
1500
+ consume(count: number = 1): void {
1501
+ this._index = Math.max(0, Math.min(this._index + count, this.textLen));
1502
+ }
1503
+
1504
+ /**
1505
+ * Look at a character without advancing the index
1506
+ * @param offset - Offset from current position
1507
+ * @returns The character at the offset position, or empty string if out of bounds
1508
+ */
1509
+ peek(offset: number = 0): string {
1510
+ const index = this._index + offset;
1511
+ if (index >= this.textLen || index < 0) {
1512
+ return '';
1513
+ }
1514
+ return this.text[index];
1515
+ }
1516
+
1517
+ /**
1518
+ * Find the next occurrence of a character
1519
+ * @param char - The character to find
1520
+ * @param escape - Whether to handle escaped characters
1521
+ * @returns Index of the character, or -1 if not found
1522
+ */
1523
+ find(char: string, escape: boolean = false): number {
1524
+ let index = this._index;
1525
+ while (index < this.textLen) {
1526
+ if (escape && this.text[index] === '\\') {
1527
+ if (index + 1 < this.textLen) {
1528
+ if (this.text[index + 1] === char) {
1529
+ return index + 1;
1530
+ }
1531
+ index += 2;
1532
+ continue;
1533
+ }
1534
+ index++;
1535
+ continue;
1536
+ }
1537
+ if (this.text[index] === char) {
1538
+ return index;
1539
+ }
1540
+ index++;
1541
+ }
1542
+ return -1;
1543
+ }
1544
+
1545
+ /**
1546
+ * Get the remaining text from the current position
1547
+ */
1548
+ get tail(): string {
1549
+ return this.text.slice(this._index);
1550
+ }
1551
+
1552
+ /**
1553
+ * Check if the next character is a space
1554
+ */
1555
+ isNextSpace(): boolean {
1556
+ return this.peek() === ' ';
1557
+ }
1558
+
1559
+ /**
1560
+ * Consume spaces until a non-space character is found
1561
+ * @returns Number of spaces consumed
1562
+ */
1563
+ consumeSpaces(): number {
1564
+ let count = 0;
1565
+ while (this.isNextSpace()) {
1566
+ this.consume();
1567
+ count++;
1568
+ }
1569
+ return count;
1570
+ }
1571
+ }
1572
+
1573
+ /**
1574
+ * Class to handle ACI and RGB color logic for MText.
1575
+ *
1576
+ * This class encapsulates color state for MText, supporting both AutoCAD Color Index (ACI) and RGB color.
1577
+ * Only one color mode is active at a time: setting an RGB color disables ACI, and vice versa.
1578
+ * RGB is stored as a single 24-bit integer (0xRRGGBB) for efficient comparison and serialization.
1579
+ *
1580
+ * Example usage:
1581
+ * ```ts
1582
+ * const color1 = new MTextColor(1); // ACI color
1583
+ * const color2 = new MTextColor([255, 0, 0]); // RGB color
1584
+ * const color3 = new MTextColor(); // Default (ACI=256, "by layer")
1585
+ * ```
1586
+ */
1587
+ export class MTextColor {
1588
+ /**
1589
+ * The AutoCAD Color Index (ACI) value. Only used if no RGB color is set.
1590
+ * @default 256 ("by layer")
1591
+ */
1592
+ private _aci: number | null = 256;
1593
+ /**
1594
+ * The RGB color value as a single 24-bit integer (0xRRGGBB), or null if not set.
1595
+ * @default null
1596
+ */
1597
+ private _rgbValue: number | null = null; // Store as 0xRRGGBB or null
1598
+
1599
+ /**
1600
+ * Create a new MTextColor instance.
1601
+ * @param color The initial color: number for ACI, [r,g,b] for RGB, or null/undefined for default (ACI=256).
1602
+ */
1603
+ constructor(color?: number | RGB | null) {
1604
+ if (Array.isArray(color)) {
1605
+ this.rgb = color;
1606
+ } else if (typeof color === 'number') {
1607
+ this.aci = color;
1608
+ } else {
1609
+ this.aci = 256;
1610
+ }
1611
+ }
1612
+
1613
+ /**
1614
+ * Get the current ACI color value.
1615
+ * @returns The ACI color (0-256), or null if using RGB.
1616
+ */
1617
+ get aci(): number | null {
1618
+ return this._aci;
1619
+ }
1620
+
1621
+ /**
1622
+ * Set the ACI color value. Setting this disables any RGB color.
1623
+ * @param value The ACI color (0-256), or null to unset.
1624
+ * @throws Error if value is out of range.
1625
+ */
1626
+ set aci(value: number | null) {
1627
+ if (value === null) {
1628
+ this._aci = null;
1629
+ } else if (value >= 0 && value <= 256) {
1630
+ this._aci = value;
1631
+ this._rgbValue = null;
1632
+ } else {
1633
+ throw new Error('ACI not in range [0, 256]');
1634
+ }
1635
+ }
1636
+
1637
+ /**
1638
+ * Get the current RGB color as a tuple [r, g, b], or null if not set.
1639
+ * @returns The RGB color tuple, or null if using ACI.
1640
+ */
1641
+ get rgb(): RGB | null {
1642
+ if (this._rgbValue === null) return null;
1643
+ // Extract R, G, B from 0xRRGGBB
1644
+ const r = (this._rgbValue >> 16) & 0xff;
1645
+ const g = (this._rgbValue >> 8) & 0xff;
1646
+ const b = this._rgbValue & 0xff;
1647
+ return [r, g, b];
1648
+ }
1649
+
1650
+ /**
1651
+ * Set the RGB color. Setting this disables ACI color.
1652
+ * @param value The RGB color tuple [r, g, b], or null to use ACI.
1653
+ */
1654
+ set rgb(value: RGB | null) {
1655
+ if (value) {
1656
+ const [r, g, b] = value;
1657
+ this._rgbValue = ((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff);
1658
+ this._aci = null;
1659
+ } else {
1660
+ this._rgbValue = null;
1661
+ }
1662
+ }
1663
+
1664
+ /**
1665
+ * Returns true if the color is set by RGB, false if by ACI.
1666
+ */
1667
+ get isRgb(): boolean {
1668
+ return this._rgbValue !== null;
1669
+ }
1670
+
1671
+ /**
1672
+ * Returns true if the color is set by ACI, false if by RGB.
1673
+ */
1674
+ get isAci(): boolean {
1675
+ return this._rgbValue === null && this._aci !== null;
1676
+ }
1677
+
1678
+ /**
1679
+ * Get or set the internal RGB value as a number (0xRRGGBB), or null if not set.
1680
+ * Setting this will switch to RGB mode and set ACI to null.
1681
+ */
1682
+ get rgbValue(): number | null {
1683
+ return this._rgbValue;
1684
+ }
1685
+
1686
+ set rgbValue(val: number | null) {
1687
+ if (val === null) {
1688
+ this._rgbValue = null;
1689
+ } else {
1690
+ this._rgbValue = val & 0xffffff;
1691
+ this._aci = null;
1692
+ }
1693
+ }
1694
+
1695
+ /**
1696
+ * Returns a deep copy of this color.
1697
+ * @returns A new MTextColor instance with the same color state.
1698
+ */
1699
+ copy(): MTextColor {
1700
+ const c = new MTextColor();
1701
+ c._aci = this._aci;
1702
+ c._rgbValue = this._rgbValue;
1703
+ return c;
1704
+ }
1705
+
1706
+ /**
1707
+ * Returns a plain object for serialization.
1708
+ * @returns An object with aci, rgb (tuple), and rgbValue (number or null).
1709
+ */
1710
+ toObject(): { aci: number | null; rgb: RGB | null; rgbValue: number | null } {
1711
+ return { aci: this._aci, rgb: this.rgb, rgbValue: this._rgbValue };
1712
+ }
1713
+
1714
+ /**
1715
+ * Convert the current color to a CSS hex color string (#rrggbb).
1716
+ * Returns null if the color is ACI-based and has no RGB value.
1717
+ */
1718
+ toCssColor(): string | null {
1719
+ if (this._rgbValue !== null) {
1720
+ return colorNumberToHex(this._rgbValue);
1721
+ }
1722
+ return null;
1723
+ }
1724
+
1725
+ /**
1726
+ * Create an MTextColor from a CSS color string.
1727
+ * Supports #rgb, #rrggbb, rgb(...), rgba(...). Returns null if invalid or transparent.
1728
+ */
1729
+ static fromCssColor(value: string | null | undefined): MTextColor | null {
1730
+ const rgbValue = cssColorToRgbValue(value);
1731
+ if (rgbValue === null) return null;
1732
+ const color = new MTextColor();
1733
+ color.rgbValue = rgbValue;
1734
+ return color;
1735
+ }
1736
+
1737
+ /**
1738
+ * Equality check for color.
1739
+ * @param other The other MTextColor to compare.
1740
+ * @returns True if both ACI and RGB values are equal.
1741
+ */
1742
+ equals(other: MTextColor): boolean {
1743
+ return this._aci === other._aci && this._rgbValue === other._rgbValue;
1744
+ }
1745
+ }
1746
+
1747
+ /**
1748
+ * MText context class for managing text formatting state
1749
+ */
1750
+ export class MTextContext {
1751
+ private _stroke: number = 0;
1752
+ /** Whether to continue stroke formatting */
1753
+ continueStroke: boolean = false;
1754
+ /** Color (ACI or RGB) */
1755
+ color: MTextColor = new MTextColor();
1756
+ /** Line alignment */
1757
+ align: MTextLineAlignment = MTextLineAlignment.BOTTOM;
1758
+ /** Font face properties */
1759
+ fontFace: FontFace = { family: '', style: 'Regular', weight: 400 };
1760
+ /** Capital letter height */
1761
+ private _capHeight: FactorValue = { value: 1.0, isRelative: false };
1762
+ /** Character width factor */
1763
+ private _widthFactor: FactorValue = { value: 1.0, isRelative: false };
1764
+ /**
1765
+ * Character tracking factor a multiplier applied to the default spacing between characters in the MText object.
1766
+ * - Value = 1.0 → Normal spacing.
1767
+ * - Value < 1.0 → Characters are closer together.
1768
+ * - Value > 1.0 → Characters are spaced farther apart.
1769
+ */
1770
+ private _charTrackingFactor: FactorValue = { value: 1.0, isRelative: false };
1771
+ /** Oblique angle */
1772
+ oblique: number = 0.0;
1773
+ /** Paragraph properties */
1774
+ paragraph: ParagraphProperties = {
1775
+ indent: 0,
1776
+ left: 0,
1777
+ right: 0,
1778
+ align: MTextParagraphAlignment.DEFAULT,
1779
+ tabs: [],
1780
+ };
1781
+
1782
+ /**
1783
+ * Get the capital letter height
1784
+ */
1785
+ get capHeight(): FactorValue {
1786
+ return this._capHeight;
1787
+ }
1788
+
1789
+ /**
1790
+ * Set the capital letter height
1791
+ * @param value - Height value
1792
+ */
1793
+ set capHeight(value: FactorValue) {
1794
+ this._capHeight = {
1795
+ value: Math.abs(value.value),
1796
+ isRelative: value.isRelative,
1797
+ };
1798
+ }
1799
+
1800
+ /**
1801
+ * Get the character width factor
1802
+ */
1803
+ get widthFactor(): FactorValue {
1804
+ return this._widthFactor;
1805
+ }
1806
+
1807
+ /**
1808
+ * Set the character width factor
1809
+ * @param value - Width factor value
1810
+ */
1811
+ set widthFactor(value: FactorValue) {
1812
+ this._widthFactor = {
1813
+ value: Math.abs(value.value),
1814
+ isRelative: value.isRelative,
1815
+ };
1816
+ }
1817
+
1818
+ /**
1819
+ * Get the character tracking factor
1820
+ */
1821
+ get charTrackingFactor(): FactorValue {
1822
+ return this._charTrackingFactor;
1823
+ }
1824
+
1825
+ /**
1826
+ * Set the character tracking factor
1827
+ * @param value - Tracking factor value
1828
+ */
1829
+ set charTrackingFactor(value: FactorValue) {
1830
+ this._charTrackingFactor = {
1831
+ value: Math.abs(value.value),
1832
+ isRelative: value.isRelative,
1833
+ };
1834
+ }
1835
+
1836
+ /**
1837
+ * Get the ACI color value
1838
+ */
1839
+ get aci(): number | null {
1840
+ return this.color.aci;
1841
+ }
1842
+
1843
+ /**
1844
+ * Set the ACI color value
1845
+ * @param value - ACI color value (0-256)
1846
+ * @throws Error if value is out of range
1847
+ */
1848
+ set aci(value: number) {
1849
+ this.color.aci = value;
1850
+ }
1851
+
1852
+ /**
1853
+ * Get the RGB color value
1854
+ */
1855
+ get rgb(): RGB | null {
1856
+ return this.color.rgb;
1857
+ }
1858
+
1859
+ /**
1860
+ * Set the RGB color value
1861
+ */
1862
+ set rgb(value: RGB | null) {
1863
+ this.color.rgb = value;
1864
+ }
1865
+
1866
+ /**
1867
+ * Gets whether the current text should be rendered in italic style.
1868
+ * @returns {boolean} True if the font style is 'Italic', otherwise false.
1869
+ */
1870
+ get italic(): boolean {
1871
+ return this.fontFace.style === 'Italic';
1872
+ }
1873
+ /**
1874
+ * Sets whether the current text should be rendered in italic style.
1875
+ * @param value - If true, sets the font style to 'Italic'; if false, sets it to 'Regular'.
1876
+ */
1877
+ set italic(value: boolean) {
1878
+ this.fontFace.style = value ? 'Italic' : 'Regular';
1879
+ }
1880
+
1881
+ /**
1882
+ * Gets whether the current text should be rendered in bold style.
1883
+ * This is primarily used for mesh fonts and affects font selection.
1884
+ * @returns {boolean} True if the font weight is 700 or higher, otherwise false.
1885
+ */
1886
+ get bold(): boolean {
1887
+ return (this.fontFace.weight || 400) >= 700;
1888
+ }
1889
+ /**
1890
+ * Sets whether the current text should be rendered in bold style.
1891
+ * This is primarily used for mesh fonts and affects font selection.
1892
+ * @param value - If true, sets the font weight to 700; if false, sets it to 400.
1893
+ */
1894
+ set bold(value: boolean) {
1895
+ this.fontFace.weight = value ? 700 : 400;
1896
+ }
1897
+
1898
+ /**
1899
+ * Get whether text is underlined
1900
+ */
1901
+ get underline(): boolean {
1902
+ return Boolean(this._stroke & MTextStroke.UNDERLINE);
1903
+ }
1904
+
1905
+ /**
1906
+ * Set whether text is underlined
1907
+ * @param value - Whether to underline
1908
+ */
1909
+ set underline(value: boolean) {
1910
+ this._setStrokeState(MTextStroke.UNDERLINE, value);
1911
+ }
1912
+
1913
+ /**
1914
+ * Get whether text has strike-through
1915
+ */
1916
+ get strikeThrough(): boolean {
1917
+ return Boolean(this._stroke & MTextStroke.STRIKE_THROUGH);
1918
+ }
1919
+
1920
+ /**
1921
+ * Set whether text has strike-through
1922
+ * @param value - Whether to strike through
1923
+ */
1924
+ set strikeThrough(value: boolean) {
1925
+ this._setStrokeState(MTextStroke.STRIKE_THROUGH, value);
1926
+ }
1927
+
1928
+ /**
1929
+ * Get whether text has overline
1930
+ */
1931
+ get overline(): boolean {
1932
+ return Boolean(this._stroke & MTextStroke.OVERLINE);
1933
+ }
1934
+
1935
+ /**
1936
+ * Set whether text has overline
1937
+ * @param value - Whether to overline
1938
+ */
1939
+ set overline(value: boolean) {
1940
+ this._setStrokeState(MTextStroke.OVERLINE, value);
1941
+ }
1942
+
1943
+ /**
1944
+ * Check if any stroke formatting is active
1945
+ */
1946
+ get hasAnyStroke(): boolean {
1947
+ return Boolean(this._stroke);
1948
+ }
1949
+
1950
+ /**
1951
+ * Set the state of a stroke type
1952
+ * @param stroke - The stroke type to set
1953
+ * @param state - Whether to enable or disable the stroke
1954
+ */
1955
+ private _setStrokeState(stroke: MTextStroke, state: boolean = true): void {
1956
+ if (state) {
1957
+ this._stroke |= stroke;
1958
+ } else {
1959
+ this._stroke &= ~stroke;
1960
+ }
1961
+ }
1962
+
1963
+ /**
1964
+ * Create a copy of this context
1965
+ * @returns A new context with the same properties
1966
+ */
1967
+ copy(): MTextContext {
1968
+ const ctx = new MTextContext();
1969
+ ctx._stroke = this._stroke;
1970
+ ctx.continueStroke = this.continueStroke;
1971
+ ctx.color = this.color.copy();
1972
+ ctx.align = this.align;
1973
+ ctx.fontFace = { ...this.fontFace };
1974
+ ctx._capHeight = { ...this._capHeight };
1975
+ ctx._widthFactor = { ...this._widthFactor };
1976
+ ctx._charTrackingFactor = { ...this._charTrackingFactor };
1977
+ ctx.oblique = this.oblique;
1978
+ ctx.paragraph = { ...this.paragraph };
1979
+ return ctx;
1980
+ }
1981
+ }
1982
+
1983
+ /**
1984
+ * Token class for MText parsing
1985
+ */
1986
+ export class MTextToken {
1987
+ /**
1988
+ * Create a new MText token
1989
+ * @param type - The token type
1990
+ * @param ctx - The text context at this token
1991
+ * @param data - Optional token data
1992
+ */
1993
+ constructor(
1994
+ public type: TokenType,
1995
+ public ctx: MTextContext,
1996
+ public data: TokenData[TokenType]
1997
+ ) {}
1998
+ }