pdf-lite 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/EXAMPLES.md CHANGED
@@ -1109,7 +1109,7 @@ console.log('Created form-empty.pdf with empty form fields')
1109
1109
  const emptyFormBytes = await fs.readFile(`${tmpFolder}/form-empty.pdf`)
1110
1110
  const filledDocument = await PdfDocument.fromBytes([emptyFormBytes])
1111
1111
 
1112
- const acroform = await filledDocument.acroForm.getAcroForm()
1112
+ const acroform = await filledDocument.acroForm.read()
1113
1113
  if (!acroform) {
1114
1114
  throw new Error('No AcroForm found in the document')
1115
1115
  }
@@ -37,8 +37,10 @@ export declare class PdfAcroFormField extends PdfDictionary<{
37
37
  }> {
38
38
  parent?: PdfAcroFormField;
39
39
  readonly container?: PdfIndirectObject;
40
+ form?: PdfAcroForm;
40
41
  constructor(options?: {
41
42
  container?: PdfIndirectObject;
43
+ form?: PdfAcroForm;
42
44
  });
43
45
  /**
44
46
  * Gets the field type
@@ -68,6 +70,11 @@ export declare class PdfAcroFormField extends PdfDictionary<{
68
70
  */
69
71
  set defaultValue(val: string);
70
72
  get value(): string;
73
+ /**
74
+ * Gets the cached encoding map for this field's font, if available.
75
+ * Returns undefined if no encoding has been cached yet.
76
+ */
77
+ private getCachedEncodingMap;
71
78
  set value(val: string);
72
79
  get checked(): boolean;
73
80
  set checked(isChecked: boolean);
@@ -132,10 +139,13 @@ export declare class PdfAcroForm<T extends Record<string, string> = Record<strin
132
139
  }> {
133
140
  fields: PdfAcroFormField[];
134
141
  readonly container?: PdfIndirectObject;
142
+ readonly fontEncodingMaps: Map<string, Map<number, string> | null>;
143
+ private document?;
135
144
  constructor(options: {
136
145
  dict: PdfDictionary;
137
146
  fields?: PdfAcroFormField[];
138
147
  container?: PdfIndirectObject;
148
+ document?: PdfDocument;
139
149
  });
140
150
  /**
141
151
  * Gets the NeedAppearances flag
@@ -177,7 +187,18 @@ export declare class PdfAcroForm<T extends Record<string, string> = Record<strin
177
187
  setValues(values: Partial<T>): void;
178
188
  importData(fields: T): void;
179
189
  exportData(): Partial<T>;
190
+ /**
191
+ * Gets the encoding map for a specific font in the form's resources.
192
+ * Returns null if no custom encoding is found.
193
+ * Results are cached for performance.
194
+ */
195
+ getFontEncodingMap(fontName: string): Promise<Map<number, string> | null>;
180
196
  static fromDocument(document: PdfDocument): Promise<PdfAcroForm | null>;
197
+ /**
198
+ * Pre-caches encoding maps for all fonts used in the form fields.
199
+ * This makes subsequent field value access faster and synchronous.
200
+ */
201
+ private cacheAllFontEncodings;
181
202
  /**
182
203
  * Gets or creates the Annots array for a page.
183
204
  * Returns the array and metadata about whether it's an indirect object.
@@ -6,6 +6,7 @@ import { PdfIndirectObject } from '../core/objects/pdf-indirect-object.js';
6
6
  import { PdfName } from '../core/objects/pdf-name.js';
7
7
  import { PdfBoolean } from '../core/objects/pdf-boolean.js';
8
8
  import { PdfNumber } from '../core/objects/pdf-number.js';
9
+ import { buildEncodingMap, decodeWithFontEncoding, } from '../utils/decodeWithFontEncoding.js';
9
10
  /**
10
11
  * Field types for AcroForm fields
11
12
  */
@@ -18,9 +19,11 @@ export const PdfFieldType = {
18
19
  export class PdfAcroFormField extends PdfDictionary {
19
20
  parent;
20
21
  container;
22
+ form;
21
23
  constructor(options) {
22
24
  super();
23
25
  this.container = options?.container;
26
+ this.form = options?.form;
24
27
  }
25
28
  /**
26
29
  * Gets the field type
@@ -81,7 +84,12 @@ export class PdfAcroFormField extends PdfDictionary {
81
84
  * Gets the field name
82
85
  */
83
86
  get name() {
84
- return this.get('T')?.as(PdfString)?.value ?? '';
87
+ const parentName = this.parent?.name ?? '';
88
+ const ownName = this.get('T')?.as(PdfString)?.value ?? '';
89
+ if (parentName && ownName) {
90
+ return `${parentName}.${ownName}`;
91
+ }
92
+ return parentName || ownName;
85
93
  }
86
94
  /**
87
95
  * Sets the field name
@@ -117,6 +125,15 @@ export class PdfAcroFormField extends PdfDictionary {
117
125
  get value() {
118
126
  const v = this.get('V');
119
127
  if (v instanceof PdfString) {
128
+ // UTF-16BE strings should always use UTF-16BE decoding regardless of font encoding
129
+ if (v.isUTF16BE) {
130
+ return v.value; // Use PdfString's built-in UTF-16BE decoder
131
+ }
132
+ // Try to use custom font encoding if available
133
+ const encodingMap = this.getCachedEncodingMap();
134
+ if (encodingMap !== undefined) {
135
+ return decodeWithFontEncoding(v.raw, encodingMap);
136
+ }
120
137
  return v.value;
121
138
  }
122
139
  else if (v instanceof PdfName) {
@@ -124,6 +141,24 @@ export class PdfAcroFormField extends PdfDictionary {
124
141
  }
125
142
  return '';
126
143
  }
144
+ /**
145
+ * Gets the cached encoding map for this field's font, if available.
146
+ * Returns undefined if no encoding has been cached yet.
147
+ */
148
+ getCachedEncodingMap() {
149
+ if (!this.form)
150
+ return undefined;
151
+ // Parse font name from DA (default appearance) string
152
+ const da = this.get('DA')?.as(PdfString)?.value;
153
+ if (!da)
154
+ return undefined;
155
+ // Extract font name from DA string (format: /FontName size Tf ...)
156
+ const fontMatch = da.match(/\/(\w+)\s+[\d.]+\s+Tf/);
157
+ if (!fontMatch)
158
+ return undefined;
159
+ const fontName = fontMatch[1];
160
+ return this.form.fontEncodingMaps.get(fontName);
161
+ }
127
162
  set value(val) {
128
163
  const fieldType = this.get('FT')?.as(PdfName)?.value;
129
164
  if (fieldType === PdfFieldType.Button) {
@@ -298,11 +333,14 @@ export class PdfAcroFormField extends PdfDictionary {
298
333
  export class PdfAcroForm extends PdfDictionary {
299
334
  fields;
300
335
  container;
336
+ fontEncodingMaps = new Map();
337
+ document;
301
338
  constructor(options) {
302
339
  super();
303
340
  this.copyFrom(options.dict);
304
341
  this.fields = options.fields ?? [];
305
342
  this.container = options.container;
343
+ this.document = options.document;
306
344
  }
307
345
  /**
308
346
  * Gets the NeedAppearances flag
@@ -383,6 +421,69 @@ export class PdfAcroForm extends PdfDictionary {
383
421
  }
384
422
  return result;
385
423
  }
424
+ /**
425
+ * Gets the encoding map for a specific font in the form's resources.
426
+ * Returns null if no custom encoding is found.
427
+ * Results are cached for performance.
428
+ */
429
+ async getFontEncodingMap(fontName) {
430
+ // Check cache first
431
+ if (this.fontEncodingMaps.has(fontName)) {
432
+ return this.fontEncodingMaps.get(fontName);
433
+ }
434
+ // Get the font from DR (default resources)
435
+ const dr = this.get('DR')?.as(PdfDictionary);
436
+ if (!dr) {
437
+ this.fontEncodingMaps.set(fontName, null);
438
+ return null;
439
+ }
440
+ const fonts = dr.get('Font')?.as(PdfDictionary);
441
+ if (!fonts) {
442
+ this.fontEncodingMaps.set(fontName, null);
443
+ return null;
444
+ }
445
+ const fontRef = fonts.get(fontName)?.as(PdfObjectReference);
446
+ if (!fontRef || !this.document) {
447
+ this.fontEncodingMaps.set(fontName, null);
448
+ return null;
449
+ }
450
+ // Read the font object
451
+ const fontObj = await this.document.readObject({
452
+ objectNumber: fontRef.objectNumber,
453
+ generationNumber: fontRef.generationNumber,
454
+ });
455
+ if (!fontObj) {
456
+ this.fontEncodingMaps.set(fontName, null);
457
+ return null;
458
+ }
459
+ const fontDict = fontObj.content.as(PdfDictionary);
460
+ const encoding = fontDict.get('Encoding');
461
+ // Handle encoding reference
462
+ let encodingDict = null;
463
+ if (encoding instanceof PdfObjectReference) {
464
+ const encodingObj = await this.document.readObject({
465
+ objectNumber: encoding.objectNumber,
466
+ generationNumber: encoding.generationNumber,
467
+ });
468
+ encodingDict = encodingObj?.content.as(PdfDictionary) ?? null;
469
+ }
470
+ else if (encoding instanceof PdfDictionary) {
471
+ encodingDict = encoding;
472
+ }
473
+ if (!encodingDict) {
474
+ this.fontEncodingMaps.set(fontName, null);
475
+ return null;
476
+ }
477
+ // Parse the Differences array
478
+ const differences = encodingDict.get('Differences')?.as(PdfArray);
479
+ if (!differences) {
480
+ this.fontEncodingMaps.set(fontName, null);
481
+ return null;
482
+ }
483
+ const encodingMap = buildEncodingMap(differences);
484
+ this.fontEncodingMaps.set(fontName, encodingMap);
485
+ return encodingMap;
486
+ }
386
487
  static async fromDocument(document) {
387
488
  const catalog = document.rootDictionary;
388
489
  if (!catalog)
@@ -413,14 +514,16 @@ export class PdfAcroForm extends PdfDictionary {
413
514
  const acroForm = new PdfAcroForm({
414
515
  dict: acroFormDict,
415
516
  container: acroFormContainer,
517
+ document,
416
518
  });
417
- const getFields = async (fields, seen = new Set(), parent) => {
418
- for (const fieldRef of fields.items) {
419
- const refKey = fieldRef.toString();
420
- if (seen.has(refKey)) {
519
+ const fields = new Map();
520
+ const getFields = async (fieldRefs, parent) => {
521
+ for (const fieldRef of fieldRefs.items) {
522
+ const refKey = fieldRef.toString().trim();
523
+ if (fields.has(refKey)) {
524
+ fields.get(refKey).parent = parent;
421
525
  continue;
422
526
  }
423
- seen.add(refKey);
424
527
  const fieldObject = await document.readObject({
425
528
  objectNumber: fieldRef.objectNumber,
426
529
  generationNumber: fieldRef.generationNumber,
@@ -431,15 +534,17 @@ export class PdfAcroForm extends PdfDictionary {
431
534
  continue;
432
535
  const field = new PdfAcroFormField({
433
536
  container: fieldObject,
537
+ form: acroForm,
434
538
  });
435
539
  field.parent = parent;
436
540
  field.copyFrom(fieldObject.content);
437
541
  // Process child fields (Kids) before adding the parent
438
542
  const kids = field.get('Kids')?.as((PdfArray));
439
543
  if (kids) {
440
- await getFields(kids, seen, field);
544
+ await getFields(kids, field);
441
545
  }
442
546
  acroForm.fields.push(field);
547
+ fields.set(refKey, field);
443
548
  }
444
549
  };
445
550
  const fieldsArray = new PdfArray();
@@ -459,8 +564,31 @@ export class PdfAcroForm extends PdfDictionary {
459
564
  }
460
565
  }
461
566
  await getFields(fieldsArray);
567
+ // Pre-cache font encoding maps for all fonts used in fields
568
+ await acroForm.cacheAllFontEncodings();
462
569
  return acroForm;
463
570
  }
571
+ /**
572
+ * Pre-caches encoding maps for all fonts used in the form fields.
573
+ * This makes subsequent field value access faster and synchronous.
574
+ */
575
+ async cacheAllFontEncodings() {
576
+ const fontNames = new Set();
577
+ // Collect all font names from field DA strings
578
+ for (const field of this.fields) {
579
+ const da = field.get('DA')?.as(PdfString)?.value;
580
+ if (da) {
581
+ const fontMatch = da.match(/\/(\w+)\s+[\d.]+\s+Tf/);
582
+ if (fontMatch) {
583
+ fontNames.add(fontMatch[1]);
584
+ }
585
+ }
586
+ }
587
+ // Pre-cache encoding for each font
588
+ for (const fontName of fontNames) {
589
+ await this.getFontEncodingMap(fontName);
590
+ }
591
+ }
464
592
  /**
465
593
  * Gets or creates the Annots array for a page.
466
594
  * Returns the array and metadata about whether it's an indirect object.
@@ -11,12 +11,12 @@ export declare class PdfAcroFormManager {
11
11
  * Checks if the document contains AcroForm fields.
12
12
  * @returns True if the document has AcroForm fields, false otherwise
13
13
  */
14
- hasAcroForm(): Promise<boolean>;
14
+ exists(): Promise<boolean>;
15
15
  /**
16
16
  * Gets the AcroForm object from the document catalog.
17
17
  * @returns The AcroForm object or null if not found
18
18
  */
19
- getAcroForm(): Promise<PdfAcroForm | null>;
19
+ read(): Promise<PdfAcroForm | null>;
20
20
  /**
21
21
  * Writes the provided AcroForm to the associated PDF document.
22
22
  * @param acroForm The AcroForm instance to serialize into the document.
@@ -12,9 +12,9 @@ export class PdfAcroFormManager {
12
12
  * Checks if the document contains AcroForm fields.
13
13
  * @returns True if the document has AcroForm fields, false otherwise
14
14
  */
15
- async hasAcroForm() {
15
+ async exists() {
16
16
  try {
17
- const acroForm = await this.getAcroForm();
17
+ const acroForm = await this.read();
18
18
  return acroForm !== null;
19
19
  }
20
20
  catch {
@@ -25,7 +25,7 @@ export class PdfAcroFormManager {
25
25
  * Gets the AcroForm object from the document catalog.
26
26
  * @returns The AcroForm object or null if not found
27
27
  */
28
- async getAcroForm() {
28
+ async read() {
29
29
  return await PdfAcroForm.fromDocument(this.document);
30
30
  }
31
31
  /**
@@ -179,7 +179,7 @@ export class PdfDecoder extends IncrementalParser {
179
179
  out = new PdfBoolean(token.value);
180
180
  }
181
181
  else if (token instanceof PdfHexadecimalToken) {
182
- out = new PdfHexadecimal(token.raw, 'hex');
182
+ out = new PdfHexadecimal(token.raw, 'hex', token.originalBytes);
183
183
  }
184
184
  else if (token instanceof PdfNullToken) {
185
185
  out = new PdfNull();
@@ -188,7 +188,7 @@ export class PdfDecoder extends IncrementalParser {
188
188
  out = new PdfObjectReference(token.objectNumber, token.generationNumber);
189
189
  }
190
190
  else if (token instanceof PdfStringToken) {
191
- out = new PdfString(token.value);
191
+ out = new PdfString(token.value, token.originalBytes);
192
192
  }
193
193
  else {
194
194
  throw new Error(`Unknown primitive token type: ${token.type}`);
@@ -7,7 +7,12 @@ export declare class PdfHexadecimal extends PdfObject {
7
7
  * NB: This is the hexadecimal representation, not the actual byte values.
8
8
  */
9
9
  raw: ByteArray;
10
- constructor(value: string | ByteArray, format?: 'hex' | 'bytes');
10
+ /**
11
+ * Original bytes from the PDF file, including angle brackets.
12
+ * Used to preserve exact formatting for incremental updates.
13
+ */
14
+ private _originalBytes?;
15
+ constructor(value: string | ByteArray, format?: 'hex' | 'bytes', originalBytes?: ByteArray);
11
16
  static toHexadecimal(data: string | ByteArray): PdfHexadecimal;
12
17
  get bytes(): ByteArray;
13
18
  toHexBytes(): ByteArray;
@@ -10,7 +10,12 @@ export class PdfHexadecimal extends PdfObject {
10
10
  * NB: This is the hexadecimal representation, not the actual byte values.
11
11
  */
12
12
  raw;
13
- constructor(value, format = 'hex') {
13
+ /**
14
+ * Original bytes from the PDF file, including angle brackets.
15
+ * Used to preserve exact formatting for incremental updates.
16
+ */
17
+ _originalBytes;
18
+ constructor(value, format = 'hex', originalBytes) {
14
19
  super();
15
20
  let bytes;
16
21
  if (format === 'bytes') {
@@ -20,6 +25,7 @@ export class PdfHexadecimal extends PdfObject {
20
25
  bytes = value instanceof Uint8Array ? value : stringToBytes(value);
21
26
  }
22
27
  this.raw = bytes;
28
+ this._originalBytes = originalBytes;
23
29
  }
24
30
  static toHexadecimal(data) {
25
31
  return new PdfHexadecimal(data, 'bytes');
@@ -34,9 +40,11 @@ export class PdfHexadecimal extends PdfObject {
34
40
  return bytesToString(this.toHexBytes());
35
41
  }
36
42
  tokenize() {
37
- return [new PdfHexadecimalToken(this.raw)];
43
+ return [new PdfHexadecimalToken(this.raw, this._originalBytes)];
38
44
  }
39
45
  clone() {
40
- return new PdfHexadecimal(new Uint8Array(this.raw));
46
+ return new PdfHexadecimal(new Uint8Array(this.raw), 'hex', this._originalBytes
47
+ ? new Uint8Array(this._originalBytes)
48
+ : undefined);
41
49
  }
42
50
  }
@@ -6,9 +6,19 @@ export declare class PdfString extends PdfObject {
6
6
  * The raw bytes of the PDF string.
7
7
  */
8
8
  private _raw;
9
- constructor(raw: ByteArray | string);
9
+ /**
10
+ * Original bytes from the PDF file, including parentheses and escape sequences.
11
+ * Used to preserve exact formatting for incremental updates.
12
+ */
13
+ private _originalBytes?;
14
+ constructor(raw: ByteArray | string, originalBytes?: ByteArray);
10
15
  get raw(): ByteArray;
11
16
  set raw(raw: ByteArray);
17
+ /**
18
+ * Checks if this string is UTF-16BE encoded (has UTF-16BE BOM).
19
+ * UTF-16BE strings start with the byte order mark 0xFE 0xFF.
20
+ */
21
+ get isUTF16BE(): boolean;
12
22
  get value(): string;
13
23
  protected tokenize(): PdfStringToken[];
14
24
  clone(): this;
@@ -10,7 +10,12 @@ export class PdfString extends PdfObject {
10
10
  * The raw bytes of the PDF string.
11
11
  */
12
12
  _raw;
13
- constructor(raw) {
13
+ /**
14
+ * Original bytes from the PDF file, including parentheses and escape sequences.
15
+ * Used to preserve exact formatting for incremental updates.
16
+ */
17
+ _originalBytes;
18
+ constructor(raw, originalBytes) {
14
19
  super();
15
20
  if (typeof raw === 'string') {
16
21
  // Check if the string contains non-ASCII characters
@@ -26,6 +31,7 @@ export class PdfString extends PdfObject {
26
31
  else {
27
32
  this._raw = raw;
28
33
  }
34
+ this._originalBytes = originalBytes;
29
35
  }
30
36
  get raw() {
31
37
  return this._raw;
@@ -33,21 +39,30 @@ export class PdfString extends PdfObject {
33
39
  set raw(raw) {
34
40
  this.setModified();
35
41
  this._raw = raw;
42
+ // Clear original bytes when modified
43
+ this._originalBytes = undefined;
44
+ }
45
+ /**
46
+ * Checks if this string is UTF-16BE encoded (has UTF-16BE BOM).
47
+ * UTF-16BE strings start with the byte order mark 0xFE 0xFF.
48
+ */
49
+ get isUTF16BE() {
50
+ return (this.raw.length >= 2 && this.raw[0] === 0xfe && this.raw[1] === 0xff);
36
51
  }
37
52
  get value() {
38
53
  // Check for UTF-16BE BOM (0xFE 0xFF)
39
- if (this.raw.length >= 2 &&
40
- this.raw[0] === 0xfe &&
41
- this.raw[1] === 0xff) {
54
+ if (this.isUTF16BE) {
42
55
  return decodeFromUTF16BE(this.raw);
43
56
  }
44
57
  // Default: use PDFDocEncoding
45
58
  return decodeFromPDFDocEncoding(this.raw);
46
59
  }
47
60
  tokenize() {
48
- return [new PdfStringToken(this.raw)];
61
+ return [new PdfStringToken(this.raw, this._originalBytes)];
49
62
  }
50
63
  clone() {
51
- return new PdfString(new Uint8Array(this.raw));
64
+ return new PdfString(new Uint8Array(this.raw), this._originalBytes
65
+ ? new Uint8Array(this._originalBytes)
66
+ : undefined);
52
67
  }
53
68
  }
@@ -1,5 +1,6 @@
1
1
  import { assert } from '../utils/assert.js';
2
2
  import { bytesToString } from '../utils/bytesToString.js';
3
+ import { unescapeString } from '../utils/unescapeString.js';
3
4
  import { IncrementalParser } from './incremental-parser.js';
4
5
  import { PdfBooleanToken } from './tokens/boolean-token.js';
5
6
  import { PdfCommentToken } from './tokens/comment-token.js';
@@ -128,7 +129,8 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
128
129
  nameBytes.push(this.next());
129
130
  byte = this.peek();
130
131
  }
131
- return new PdfNameToken(bytesToString(new Uint8Array(nameBytes)));
132
+ const name = bytesToString(new Uint8Array(nameBytes));
133
+ return new PdfNameToken(name);
132
134
  }
133
135
  nextDictionaryEndToken() {
134
136
  this.expect(ByteMap.RIGHT_ANGLE_BRACKET);
@@ -136,6 +138,8 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
136
138
  return new PdfEndDictionaryToken();
137
139
  }
138
140
  nextHexadecimalToken() {
141
+ // Capture starting position (before the opening angle bracket)
142
+ const startIndex = this.bufferIndex;
139
143
  this.expect(ByteMap.LEFT_ANGLE_BRACKET);
140
144
  const hexBytes = [];
141
145
  let byte = this.peek();
@@ -146,7 +150,10 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
146
150
  byte = this.peek();
147
151
  }
148
152
  this.expect(ByteMap.RIGHT_ANGLE_BRACKET);
149
- return new PdfHexadecimalToken(new Uint8Array(hexBytes));
153
+ // Capture original bytes including angle brackets for incremental updates
154
+ const endIndex = this.bufferIndex; // After the closing angle bracket
155
+ const originalBytes = new Uint8Array(this.buffer.slice(startIndex, endIndex));
156
+ return new PdfHexadecimalToken(new Uint8Array(hexBytes), originalBytes);
150
157
  }
151
158
  nextNumberToken() {
152
159
  const numberBytes = [];
@@ -187,8 +194,11 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
187
194
  return new PdfEndArrayToken();
188
195
  }
189
196
  nextStringToken() {
197
+ // Capture starting position (before the opening parenthesis)
198
+ const startIndex = this.bufferIndex;
190
199
  this.expect(ByteMap.LEFT_PARENTHESIS);
191
- const stringBytes = [];
200
+ // Collect raw bytes until we find the matching closing parenthesis
201
+ const rawBytes = [];
192
202
  let nesting = 1;
193
203
  let inEscape = false;
194
204
  while (inEscape || nesting > 0) {
@@ -196,87 +206,34 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
196
206
  if (byte === null) {
197
207
  throw new Error('Unexpected end of input in string token');
198
208
  }
199
- if (byte === ByteMap.LEFT_PARENTHESIS) {
200
- nesting++;
201
- }
202
- else if (byte === ByteMap.RIGHT_PARENTHESIS) {
203
- nesting--;
204
- if (nesting === 0) {
205
- break;
206
- }
207
- }
208
- else if (byte === ByteMap.BACKSLASH || inEscape) {
209
- inEscape = true;
210
- const next = this.next();
211
- let found = false;
212
- if (this.inputOffset >= 829528) {
213
- console.log('here', this.inputOffset, next);
214
- found = true;
215
- }
216
- if (next === null) {
217
- throw new Error('Unexpected end of input in string token');
209
+ // Add byte to rawBytes first (including the closing parenthesis)
210
+ rawBytes.push(byte);
211
+ // Track nesting level for proper parenthesis matching
212
+ if (!inEscape) {
213
+ if (byte === ByteMap.LEFT_PARENTHESIS) {
214
+ nesting++;
218
215
  }
219
- switch (next) {
220
- case ByteMap.n:
221
- stringBytes.push(0x0a);
222
- break; // \n
223
- case ByteMap.r:
224
- stringBytes.push(0x0d);
225
- break; // \r
226
- case ByteMap.t:
227
- stringBytes.push(0x09);
228
- break; // \t
229
- case ByteMap.b:
230
- stringBytes.push(0x08);
231
- break; // \b
232
- case ByteMap.f:
233
- stringBytes.push(0x0c);
234
- break; // \f
235
- case ByteMap.LEFT_PARENTHESIS:
236
- stringBytes.push(ByteMap.LEFT_PARENTHESIS);
237
- break; // \(
238
- case ByteMap.RIGHT_PARENTHESIS:
239
- stringBytes.push(ByteMap.RIGHT_PARENTHESIS);
240
- break; // \)
241
- case ByteMap.BACKSLASH:
242
- stringBytes.push(ByteMap.BACKSLASH);
243
- break; // \\
244
- case ByteMap.LINE_FEED: // Line feed
245
- case ByteMap.CARRIAGE_RETURN: // Carriage return
246
- stringBytes.push(next);
247
- break;
248
- default:
249
- if (PdfByteStreamTokeniser.isOctet(next)) {
250
- let octal = String.fromCharCode(next);
251
- // Octal: up to 3 digits
252
- const next2 = this.peek();
253
- if (next2 === null) {
254
- throw new Error('Unexpected end of input in string token');
255
- }
256
- if (PdfByteStreamTokeniser.isOctet(next2)) {
257
- octal += String.fromCharCode(this.next());
258
- }
259
- const next3 = this.peek();
260
- if (next3 === null) {
261
- throw new Error('Unexpected end of input in string token');
262
- }
263
- if (PdfByteStreamTokeniser.isOctet(next3)) {
264
- octal += String.fromCharCode(this.next());
265
- }
266
- stringBytes.push(parseInt(octal, 8));
267
- }
268
- else {
269
- // If it's not a valid escape sequence, just add the next byte
270
- stringBytes.push(next);
271
- }
216
+ else if (byte === ByteMap.RIGHT_PARENTHESIS) {
217
+ nesting--;
218
+ if (nesting === 0) {
272
219
  break;
220
+ }
221
+ }
222
+ else if (byte === ByteMap.BACKSLASH) {
223
+ inEscape = true;
273
224
  }
225
+ }
226
+ else {
274
227
  inEscape = false;
275
- continue;
276
228
  }
277
- stringBytes.push(byte);
278
229
  }
279
- return new PdfStringToken(new Uint8Array(stringBytes));
230
+ // Capture original bytes including parentheses for incremental updates
231
+ const endIndex = this.bufferIndex; // After the closing parenthesis
232
+ const originalBytes = new Uint8Array(this.buffer.slice(startIndex, endIndex));
233
+ // Use unescapeString utility to process escape sequences
234
+ // unescapeString expects bytes including the closing parenthesis
235
+ const unescapedBytes = unescapeString(new Uint8Array(rawBytes));
236
+ return new PdfStringToken(unescapedBytes, originalBytes);
280
237
  }
281
238
  nextEndObjectToken() {
282
239
  this.expect(ByteMap.e);
@@ -2,6 +2,13 @@ import { ByteArray } from '../../types.js';
2
2
  import { PdfToken } from './token.js';
3
3
  export declare class PdfHexadecimalToken extends PdfToken {
4
4
  raw: ByteArray;
5
- constructor(hexadecimal: string | ByteArray);
5
+ /**
6
+ * Original bytes from the PDF file, including angle brackets.
7
+ * Used to preserve exact formatting for incremental updates.
8
+ * @internal - Non-enumerable to avoid affecting test comparisons
9
+ */
10
+ private _originalBytes?;
11
+ constructor(hexadecimal: string | ByteArray, originalBytes?: ByteArray);
12
+ get originalBytes(): ByteArray | undefined;
6
13
  private static toBytes;
7
14
  }