npm - pdf-lite - Versions diffs - 1.2.0 → 1.3.0 - Mend

pdf-lite 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/EXAMPLES.md +1 -1
package/dist/acroform/acroform.d.ts +21 -0
package/dist/acroform/acroform.js +135 -7
package/dist/acroform/manager.d.ts +2 -2
package/dist/acroform/manager.js +3 -3
package/dist/core/decoder.js +2 -2
package/dist/core/objects/pdf-hexadecimal.d.ts +6 -1
package/dist/core/objects/pdf-hexadecimal.js +11 -3
package/dist/core/objects/pdf-string.d.ts +11 -1
package/dist/core/objects/pdf-string.js +21 -6
package/dist/core/tokeniser.js +35 -78
package/dist/core/tokens/hexadecimal-token.d.ts +8 -1
package/dist/core/tokens/hexadecimal-token.js +20 -2
package/dist/core/tokens/name-token.js +0 -3
package/dist/core/tokens/string-token.d.ts +8 -1
package/dist/core/tokens/string-token.js +20 -2
package/dist/pdf/pdf-document.d.ts +6 -6
package/dist/pdf/pdf-document.js +21 -21
package/dist/utils/decodeWithFontEncoding.d.ts +20 -0
package/dist/utils/decodeWithFontEncoding.js +67 -0
package/dist/utils/escapeString.d.ts +1 -1
package/dist/utils/escapeString.js +12 -3
package/dist/utils/glyphNameToUnicode.d.ts +10 -0
package/dist/utils/glyphNameToUnicode.js +4292 -0
package/package.json +1 -1

package/EXAMPLES.md CHANGED Viewed

@@ -1109,7 +1109,7 @@ console.log('Created form-empty.pdf with empty form fields')
 const emptyFormBytes = await fs.readFile(`${tmpFolder}/form-empty.pdf`)
 const filledDocument = await PdfDocument.fromBytes([emptyFormBytes])
-const acroform = await filledDocument.acroForm.getAcroForm()
+const acroform = await filledDocument.acroForm.read()
 if (!acroform) {
     throw new Error('No AcroForm found in the document')
 }

package/dist/acroform/acroform.d.ts CHANGED Viewed

@@ -37,8 +37,10 @@ export declare class PdfAcroFormField extends PdfDictionary<{
 }> {
     parent?: PdfAcroFormField;
     readonly container?: PdfIndirectObject;
+    form?: PdfAcroForm;
     constructor(options?: {
         container?: PdfIndirectObject;
+        form?: PdfAcroForm;
     });
     /**
      * Gets the field type
@@ -68,6 +70,11 @@ export declare class PdfAcroFormField extends PdfDictionary<{
      */
     set defaultValue(val: string);
     get value(): string;
+    /**
+     * Gets the cached encoding map for this field's font, if available.
+     * Returns undefined if no encoding has been cached yet.
+     */
+    private getCachedEncodingMap;
     set value(val: string);
     get checked(): boolean;
     set checked(isChecked: boolean);
@@ -132,10 +139,13 @@ export declare class PdfAcroForm<T extends Record<string, string> = Record<strin
 }> {
     fields: PdfAcroFormField[];
     readonly container?: PdfIndirectObject;
+    readonly fontEncodingMaps: Map<string, Map<number, string> | null>;
+    private document?;
     constructor(options: {
         dict: PdfDictionary;
         fields?: PdfAcroFormField[];
         container?: PdfIndirectObject;
+        document?: PdfDocument;
     });
     /**
      * Gets the NeedAppearances flag
@@ -177,7 +187,18 @@ export declare class PdfAcroForm<T extends Record<string, string> = Record<strin
     setValues(values: Partial<T>): void;
     importData(fields: T): void;
     exportData(): Partial<T>;
+    /**
+     * Gets the encoding map for a specific font in the form's resources.
+     * Returns null if no custom encoding is found.
+     * Results are cached for performance.
+     */
+    getFontEncodingMap(fontName: string): Promise<Map<number, string> | null>;
     static fromDocument(document: PdfDocument): Promise<PdfAcroForm | null>;
+    /**
+     * Pre-caches encoding maps for all fonts used in the form fields.
+     * This makes subsequent field value access faster and synchronous.
+     */
+    private cacheAllFontEncodings;
     /**
      * Gets or creates the Annots array for a page.
      * Returns the array and metadata about whether it's an indirect object.

package/dist/acroform/acroform.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { PdfIndirectObject } from '../core/objects/pdf-indirect-object.js';
 import { PdfName } from '../core/objects/pdf-name.js';
 import { PdfBoolean } from '../core/objects/pdf-boolean.js';
 import { PdfNumber } from '../core/objects/pdf-number.js';
+import { buildEncodingMap, decodeWithFontEncoding, } from '../utils/decodeWithFontEncoding.js';
 /**
  * Field types for AcroForm fields
  */
@@ -18,9 +19,11 @@ export const PdfFieldType = {
 export class PdfAcroFormField extends PdfDictionary {
     parent;
     container;
+    form;
     constructor(options) {
         super();
         this.container = options?.container;
+        this.form = options?.form;
     }
     /**
      * Gets the field type
@@ -81,7 +84,12 @@ export class PdfAcroFormField extends PdfDictionary {
      * Gets the field name
      */
     get name() {
-        return this.get('T')?.as(PdfString)?.value ?? '';
+        const parentName = this.parent?.name ?? '';
+        const ownName = this.get('T')?.as(PdfString)?.value ?? '';
+        if (parentName && ownName) {
+            return `${parentName}.${ownName}`;
+        }
+        return parentName || ownName;
     }
     /**
      * Sets the field name
@@ -117,6 +125,15 @@ export class PdfAcroFormField extends PdfDictionary {
     get value() {
         const v = this.get('V');
         if (v instanceof PdfString) {
+            // UTF-16BE strings should always use UTF-16BE decoding regardless of font encoding
+            if (v.isUTF16BE) {
+                return v.value; // Use PdfString's built-in UTF-16BE decoder
+            }
+            // Try to use custom font encoding if available
+            const encodingMap = this.getCachedEncodingMap();
+            if (encodingMap !== undefined) {
+                return decodeWithFontEncoding(v.raw, encodingMap);
+            }
             return v.value;
         }
         else if (v instanceof PdfName) {
@@ -124,6 +141,24 @@ export class PdfAcroFormField extends PdfDictionary {
         }
         return '';
     }
+    /**
+     * Gets the cached encoding map for this field's font, if available.
+     * Returns undefined if no encoding has been cached yet.
+     */
+    getCachedEncodingMap() {
+        if (!this.form)
+            return undefined;
+        // Parse font name from DA (default appearance) string
+        const da = this.get('DA')?.as(PdfString)?.value;
+        if (!da)
+            return undefined;
+        // Extract font name from DA string (format: /FontName size Tf ...)
+        const fontMatch = da.match(/\/(\w+)\s+[\d.]+\s+Tf/);
+        if (!fontMatch)
+            return undefined;
+        const fontName = fontMatch[1];
+        return this.form.fontEncodingMaps.get(fontName);
+    }
     set value(val) {
         const fieldType = this.get('FT')?.as(PdfName)?.value;
         if (fieldType === PdfFieldType.Button) {
@@ -298,11 +333,14 @@ export class PdfAcroFormField extends PdfDictionary {
 export class PdfAcroForm extends PdfDictionary {
     fields;
     container;
+    fontEncodingMaps = new Map();
+    document;
     constructor(options) {
         super();
         this.copyFrom(options.dict);
         this.fields = options.fields ?? [];
         this.container = options.container;
+        this.document = options.document;
     }
     /**
      * Gets the NeedAppearances flag
@@ -383,6 +421,69 @@ export class PdfAcroForm extends PdfDictionary {
         }
         return result;
     }
+    /**
+     * Gets the encoding map for a specific font in the form's resources.
+     * Returns null if no custom encoding is found.
+     * Results are cached for performance.
+     */
+    async getFontEncodingMap(fontName) {
+        // Check cache first
+        if (this.fontEncodingMaps.has(fontName)) {
+            return this.fontEncodingMaps.get(fontName);
+        }
+        // Get the font from DR (default resources)
+        const dr = this.get('DR')?.as(PdfDictionary);
+        if (!dr) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        const fonts = dr.get('Font')?.as(PdfDictionary);
+        if (!fonts) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        const fontRef = fonts.get(fontName)?.as(PdfObjectReference);
+        if (!fontRef || !this.document) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        // Read the font object
+        const fontObj = await this.document.readObject({
+            objectNumber: fontRef.objectNumber,
+            generationNumber: fontRef.generationNumber,
+        });
+        if (!fontObj) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        const fontDict = fontObj.content.as(PdfDictionary);
+        const encoding = fontDict.get('Encoding');
+        // Handle encoding reference
+        let encodingDict = null;
+        if (encoding instanceof PdfObjectReference) {
+            const encodingObj = await this.document.readObject({
+                objectNumber: encoding.objectNumber,
+                generationNumber: encoding.generationNumber,
+            });
+            encodingDict = encodingObj?.content.as(PdfDictionary) ?? null;
+        }
+        else if (encoding instanceof PdfDictionary) {
+            encodingDict = encoding;
+        }
+        if (!encodingDict) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        // Parse the Differences array
+        const differences = encodingDict.get('Differences')?.as(PdfArray);
+        if (!differences) {
+            this.fontEncodingMaps.set(fontName, null);
+            return null;
+        }
+        const encodingMap = buildEncodingMap(differences);
+        this.fontEncodingMaps.set(fontName, encodingMap);
+        return encodingMap;
+    }
     static async fromDocument(document) {
         const catalog = document.rootDictionary;
         if (!catalog)
@@ -413,14 +514,16 @@ export class PdfAcroForm extends PdfDictionary {
         const acroForm = new PdfAcroForm({
             dict: acroFormDict,
             container: acroFormContainer,
+            document,
         });
-        const getFields = async (fields, seen = new Set(), parent) => {
-            for (const fieldRef of fields.items) {
-                const refKey = fieldRef.toString();
-                if (seen.has(refKey)) {
+        const fields = new Map();
+        const getFields = async (fieldRefs, parent) => {
+            for (const fieldRef of fieldRefs.items) {
+                const refKey = fieldRef.toString().trim();
+                if (fields.has(refKey)) {
+                    fields.get(refKey).parent = parent;
                     continue;
                 }
-                seen.add(refKey);
                 const fieldObject = await document.readObject({
                     objectNumber: fieldRef.objectNumber,
                     generationNumber: fieldRef.generationNumber,
@@ -431,15 +534,17 @@ export class PdfAcroForm extends PdfDictionary {
                     continue;
                 const field = new PdfAcroFormField({
                     container: fieldObject,
+                    form: acroForm,
                 });
                 field.parent = parent;
                 field.copyFrom(fieldObject.content);
                 // Process child fields (Kids) before adding the parent
                 const kids = field.get('Kids')?.as((PdfArray));
                 if (kids) {
-                    await getFields(kids, seen, field);
+                    await getFields(kids, field);
                 }
                 acroForm.fields.push(field);
+                fields.set(refKey, field);
             }
         };
         const fieldsArray = new PdfArray();
@@ -459,8 +564,31 @@ export class PdfAcroForm extends PdfDictionary {
             }
         }
         await getFields(fieldsArray);
+        // Pre-cache font encoding maps for all fonts used in fields
+        await acroForm.cacheAllFontEncodings();
         return acroForm;
     }
+    /**
+     * Pre-caches encoding maps for all fonts used in the form fields.
+     * This makes subsequent field value access faster and synchronous.
+     */
+    async cacheAllFontEncodings() {
+        const fontNames = new Set();
+        // Collect all font names from field DA strings
+        for (const field of this.fields) {
+            const da = field.get('DA')?.as(PdfString)?.value;
+            if (da) {
+                const fontMatch = da.match(/\/(\w+)\s+[\d.]+\s+Tf/);
+                if (fontMatch) {
+                    fontNames.add(fontMatch[1]);
+                }
+            }
+        }
+        // Pre-cache encoding for each font
+        for (const fontName of fontNames) {
+            await this.getFontEncodingMap(fontName);
+        }
+    }
     /**
      * Gets or creates the Annots array for a page.
      * Returns the array and metadata about whether it's an indirect object.

package/dist/acroform/manager.d.ts CHANGED Viewed

@@ -11,12 +11,12 @@ export declare class PdfAcroFormManager {
      * Checks if the document contains AcroForm fields.
      * @returns True if the document has AcroForm fields, false otherwise
      */
-    hasAcroForm(): Promise<boolean>;
+    exists(): Promise<boolean>;
     /**
      * Gets the AcroForm object from the document catalog.
      * @returns The AcroForm object or null if not found
      */
-    getAcroForm(): Promise<PdfAcroForm | null>;
+    read(): Promise<PdfAcroForm | null>;
     /**
      * Writes the provided AcroForm to the associated PDF document.
      * @param acroForm The AcroForm instance to serialize into the document.

package/dist/acroform/manager.js CHANGED Viewed

@@ -12,9 +12,9 @@ export class PdfAcroFormManager {
      * Checks if the document contains AcroForm fields.
      * @returns True if the document has AcroForm fields, false otherwise
      */
-    async hasAcroForm() {
+    async exists() {
         try {
-            const acroForm = await this.getAcroForm();
+            const acroForm = await this.read();
             return acroForm !== null;
         }
         catch {
@@ -25,7 +25,7 @@ export class PdfAcroFormManager {
      * Gets the AcroForm object from the document catalog.
      * @returns The AcroForm object or null if not found
      */
-    async getAcroForm() {
+    async read() {
         return await PdfAcroForm.fromDocument(this.document);
     }
     /**

package/dist/core/decoder.js CHANGED Viewed

@@ -179,7 +179,7 @@ export class PdfDecoder extends IncrementalParser {
             out = new PdfBoolean(token.value);
         }
         else if (token instanceof PdfHexadecimalToken) {
-            out = new PdfHexadecimal(token.raw, 'hex');
+            out = new PdfHexadecimal(token.raw, 'hex', token.originalBytes);
         }
         else if (token instanceof PdfNullToken) {
             out = new PdfNull();
@@ -188,7 +188,7 @@ export class PdfDecoder extends IncrementalParser {
             out = new PdfObjectReference(token.objectNumber, token.generationNumber);
         }
         else if (token instanceof PdfStringToken) {
-            out = new PdfString(token.value);
+            out = new PdfString(token.value, token.originalBytes);
         }
         else {
             throw new Error(`Unknown primitive token type: ${token.type}`);

package/dist/core/objects/pdf-hexadecimal.d.ts CHANGED Viewed

@@ -7,7 +7,12 @@ export declare class PdfHexadecimal extends PdfObject {
      * NB: This is  the hexadecimal representation, not the actual byte values.
      */
     raw: ByteArray;
-    constructor(value: string | ByteArray, format?: 'hex' | 'bytes');
+    /**
+     * Original bytes from the PDF file, including angle brackets.
+     * Used to preserve exact formatting for incremental updates.
+     */
+    private _originalBytes?;
+    constructor(value: string | ByteArray, format?: 'hex' | 'bytes', originalBytes?: ByteArray);
     static toHexadecimal(data: string | ByteArray): PdfHexadecimal;
     get bytes(): ByteArray;
     toHexBytes(): ByteArray;

package/dist/core/objects/pdf-hexadecimal.js CHANGED Viewed

@@ -10,7 +10,12 @@ export class PdfHexadecimal extends PdfObject {
      * NB: This is  the hexadecimal representation, not the actual byte values.
      */
     raw;
-    constructor(value, format = 'hex') {
+    /**
+     * Original bytes from the PDF file, including angle brackets.
+     * Used to preserve exact formatting for incremental updates.
+     */
+    _originalBytes;
+    constructor(value, format = 'hex', originalBytes) {
         super();
         let bytes;
         if (format === 'bytes') {
@@ -20,6 +25,7 @@ export class PdfHexadecimal extends PdfObject {
             bytes = value instanceof Uint8Array ? value : stringToBytes(value);
         }
         this.raw = bytes;
+        this._originalBytes = originalBytes;
     }
     static toHexadecimal(data) {
         return new PdfHexadecimal(data, 'bytes');
@@ -34,9 +40,11 @@ export class PdfHexadecimal extends PdfObject {
         return bytesToString(this.toHexBytes());
     }
     tokenize() {
-        return [new PdfHexadecimalToken(this.raw)];
+        return [new PdfHexadecimalToken(this.raw, this._originalBytes)];
     }
     clone() {
-        return new PdfHexadecimal(new Uint8Array(this.raw));
+        return new PdfHexadecimal(new Uint8Array(this.raw), 'hex', this._originalBytes
+            ? new Uint8Array(this._originalBytes)
+            : undefined);
     }
 }

package/dist/core/objects/pdf-string.d.ts CHANGED Viewed

@@ -6,9 +6,19 @@ export declare class PdfString extends PdfObject {
      * The raw bytes of the PDF string.
      */
     private _raw;
-    constructor(raw: ByteArray | string);
+    /**
+     * Original bytes from the PDF file, including parentheses and escape sequences.
+     * Used to preserve exact formatting for incremental updates.
+     */
+    private _originalBytes?;
+    constructor(raw: ByteArray | string, originalBytes?: ByteArray);
     get raw(): ByteArray;
     set raw(raw: ByteArray);
+    /**
+     * Checks if this string is UTF-16BE encoded (has UTF-16BE BOM).
+     * UTF-16BE strings start with the byte order mark 0xFE 0xFF.
+     */
+    get isUTF16BE(): boolean;
     get value(): string;
     protected tokenize(): PdfStringToken[];
     clone(): this;

package/dist/core/objects/pdf-string.js CHANGED Viewed

@@ -10,7 +10,12 @@ export class PdfString extends PdfObject {
      * The raw bytes of the PDF string.
      */
     _raw;
-    constructor(raw) {
+    /**
+     * Original bytes from the PDF file, including parentheses and escape sequences.
+     * Used to preserve exact formatting for incremental updates.
+     */
+    _originalBytes;
+    constructor(raw, originalBytes) {
         super();
         if (typeof raw === 'string') {
             // Check if the string contains non-ASCII characters
@@ -26,6 +31,7 @@ export class PdfString extends PdfObject {
         else {
             this._raw = raw;
         }
+        this._originalBytes = originalBytes;
     }
     get raw() {
         return this._raw;
@@ -33,21 +39,30 @@ export class PdfString extends PdfObject {
     set raw(raw) {
         this.setModified();
         this._raw = raw;
+        // Clear original bytes when modified
+        this._originalBytes = undefined;
+    }
+    /**
+     * Checks if this string is UTF-16BE encoded (has UTF-16BE BOM).
+     * UTF-16BE strings start with the byte order mark 0xFE 0xFF.
+     */
+    get isUTF16BE() {
+        return (this.raw.length >= 2 && this.raw[0] === 0xfe && this.raw[1] === 0xff);
     }
     get value() {
         // Check for UTF-16BE BOM (0xFE 0xFF)
-        if (this.raw.length >= 2 &&
-            this.raw[0] === 0xfe &&
-            this.raw[1] === 0xff) {
+        if (this.isUTF16BE) {
             return decodeFromUTF16BE(this.raw);
         }
         // Default: use PDFDocEncoding
         return decodeFromPDFDocEncoding(this.raw);
     }
     tokenize() {
-        return [new PdfStringToken(this.raw)];
+        return [new PdfStringToken(this.raw, this._originalBytes)];
     }
     clone() {
-        return new PdfString(new Uint8Array(this.raw));
+        return new PdfString(new Uint8Array(this.raw), this._originalBytes
+            ? new Uint8Array(this._originalBytes)
+            : undefined);
     }
 }

package/dist/core/tokeniser.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { assert } from '../utils/assert.js';
 import { bytesToString } from '../utils/bytesToString.js';
+import { unescapeString } from '../utils/unescapeString.js';
 import { IncrementalParser } from './incremental-parser.js';
 import { PdfBooleanToken } from './tokens/boolean-token.js';
 import { PdfCommentToken } from './tokens/comment-token.js';
@@ -128,7 +129,8 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
             nameBytes.push(this.next());
             byte = this.peek();
         }
-        return new PdfNameToken(bytesToString(new Uint8Array(nameBytes)));
+        const name = bytesToString(new Uint8Array(nameBytes));
+        return new PdfNameToken(name);
     }
     nextDictionaryEndToken() {
         this.expect(ByteMap.RIGHT_ANGLE_BRACKET);
@@ -136,6 +138,8 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
         return new PdfEndDictionaryToken();
     }
     nextHexadecimalToken() {
+        // Capture starting position (before the opening angle bracket)
+        const startIndex = this.bufferIndex;
         this.expect(ByteMap.LEFT_ANGLE_BRACKET);
         const hexBytes = [];
         let byte = this.peek();
@@ -146,7 +150,10 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
             byte = this.peek();
         }
         this.expect(ByteMap.RIGHT_ANGLE_BRACKET);
-        return new PdfHexadecimalToken(new Uint8Array(hexBytes));
+        // Capture original bytes including angle brackets for incremental updates
+        const endIndex = this.bufferIndex; // After the closing angle bracket
+        const originalBytes = new Uint8Array(this.buffer.slice(startIndex, endIndex));
+        return new PdfHexadecimalToken(new Uint8Array(hexBytes), originalBytes);
     }
     nextNumberToken() {
         const numberBytes = [];
@@ -187,8 +194,11 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
         return new PdfEndArrayToken();
     }
     nextStringToken() {
+        // Capture starting position (before the opening parenthesis)
+        const startIndex = this.bufferIndex;
         this.expect(ByteMap.LEFT_PARENTHESIS);
-        const stringBytes = [];
+        // Collect raw bytes until we find the matching closing parenthesis
+        const rawBytes = [];
         let nesting = 1;
         let inEscape = false;
         while (inEscape || nesting > 0) {
@@ -196,87 +206,34 @@ export class PdfByteStreamTokeniser extends IncrementalParser {
             if (byte === null) {
                 throw new Error('Unexpected end of input in string token');
             }
-            if (byte === ByteMap.LEFT_PARENTHESIS) {
-                nesting++;
-            }
-            else if (byte === ByteMap.RIGHT_PARENTHESIS) {
-                nesting--;
-                if (nesting === 0) {
-                    break;
-                }
-            }
-            else if (byte === ByteMap.BACKSLASH || inEscape) {
-                inEscape = true;
-                const next = this.next();
-                let found = false;
-                if (this.inputOffset >= 829528) {
-                    console.log('here', this.inputOffset, next);
-                    found = true;
-                }
-                if (next === null) {
-                    throw new Error('Unexpected end of input in string token');
+            // Add byte to rawBytes first (including the closing parenthesis)
+            rawBytes.push(byte);
+            // Track nesting level for proper parenthesis matching
+            if (!inEscape) {
+                if (byte === ByteMap.LEFT_PARENTHESIS) {
+                    nesting++;
                 }
-                switch (next) {
-                    case ByteMap.n:
-                        stringBytes.push(0x0a);
-                        break; // \n
-                    case ByteMap.r:
-                        stringBytes.push(0x0d);
-                        break; // \r
-                    case ByteMap.t:
-                        stringBytes.push(0x09);
-                        break; // \t
-                    case ByteMap.b:
-                        stringBytes.push(0x08);
-                        break; // \b
-                    case ByteMap.f:
-                        stringBytes.push(0x0c);
-                        break; // \f
-                    case ByteMap.LEFT_PARENTHESIS:
-                        stringBytes.push(ByteMap.LEFT_PARENTHESIS);
-                        break; // \(
-                    case ByteMap.RIGHT_PARENTHESIS:
-                        stringBytes.push(ByteMap.RIGHT_PARENTHESIS);
-                        break; // \)
-                    case ByteMap.BACKSLASH:
-                        stringBytes.push(ByteMap.BACKSLASH);
-                        break; // \\
-                    case ByteMap.LINE_FEED: // Line feed
-                    case ByteMap.CARRIAGE_RETURN: // Carriage return
-                        stringBytes.push(next);
-                        break;
-                    default:
-                        if (PdfByteStreamTokeniser.isOctet(next)) {
-                            let octal = String.fromCharCode(next);
-                            // Octal: up to 3 digits
-                            const next2 = this.peek();
-                            if (next2 === null) {
-                                throw new Error('Unexpected end of input in string token');
-                            }
-                            if (PdfByteStreamTokeniser.isOctet(next2)) {
-                                octal += String.fromCharCode(this.next());
-                            }
-                            const next3 = this.peek();
-                            if (next3 === null) {
-                                throw new Error('Unexpected end of input in string token');
-                            }
-                            if (PdfByteStreamTokeniser.isOctet(next3)) {
-                                octal += String.fromCharCode(this.next());
-                            }
-                            stringBytes.push(parseInt(octal, 8));
-                        }
-                        else {
-                            // If it's not a valid escape sequence, just add the next byte
-                            stringBytes.push(next);
-                        }
+                else if (byte === ByteMap.RIGHT_PARENTHESIS) {
+                    nesting--;
+                    if (nesting === 0) {
                         break;
+                    }
+                }
+                else if (byte === ByteMap.BACKSLASH) {
+                    inEscape = true;
                 }
+            }
+            else {
                 inEscape = false;
-                continue;
             }
-            stringBytes.push(byte);
         }
-        return new PdfStringToken(new Uint8Array(stringBytes));
+        // Capture original bytes including parentheses for incremental updates
+        const endIndex = this.bufferIndex; // After the closing parenthesis
+        const originalBytes = new Uint8Array(this.buffer.slice(startIndex, endIndex));
+        // Use unescapeString utility to process escape sequences
+        // unescapeString expects bytes including the closing parenthesis
+        const unescapedBytes = unescapeString(new Uint8Array(rawBytes));
+        return new PdfStringToken(unescapedBytes, originalBytes);
     }
     nextEndObjectToken() {
         this.expect(ByteMap.e);

package/dist/core/tokens/hexadecimal-token.d.ts CHANGED Viewed

@@ -2,6 +2,13 @@ import { ByteArray } from '../../types.js';
 import { PdfToken } from './token.js';
 export declare class PdfHexadecimalToken extends PdfToken {
     raw: ByteArray;
-    constructor(hexadecimal: string | ByteArray);
+    /**
+     * Original bytes from the PDF file, including angle brackets.
+     * Used to preserve exact formatting for incremental updates.
+     * @internal - Non-enumerable to avoid affecting test comparisons
+     */
+    private _originalBytes?;
+    constructor(hexadecimal: string | ByteArray, originalBytes?: ByteArray);
+    get originalBytes(): ByteArray | undefined;
     private static toBytes;
 }