pdf-lite 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -191,17 +191,17 @@ Long-Term Validation (LTV) support ensures that digital signatures remain valid
191
191
 
192
192
  Supports filling out AcroForm forms within PDF documents, allowing for dynamic content generation and user interaction.
193
193
 
194
- [x] Text fields
195
- [x] Checkboxes
196
- [x] Radio buttons
197
- [x] Dropdowns
194
+ - [x] Text fields
195
+ - [x] Checkboxes
196
+ - [x] Radio buttons
197
+ - [x] Dropdowns
198
198
 
199
199
  ### XFA Forms
200
200
 
201
201
  You can read/write XFA XML data from PDFs, but rendering and filling XFA forms is not supported.
202
202
 
203
- [x] Read XFA XML
204
- [x] Write XFA XML
203
+ - [x] Read XFA XML
204
+ - [x] Write XFA XML
205
205
 
206
206
  ## Future Plans
207
207
 
@@ -1,5 +1,8 @@
1
1
  import { bytesToString } from '../../utils/bytesToString.js';
2
2
  import { stringToBytes } from '../../utils/stringToBytes.js';
3
+ import { needsUnicodeEncoding } from '../../utils/needsUnicodeEncoding.js';
4
+ import { encodeAsUTF16BE } from '../../utils/encodeAsUTF16BE.js';
5
+ import { decodeFromUTF16BE } from '../../utils/decodeFromUTF16BE.js';
3
6
  import { PdfStringToken } from '../tokens/string-token.js';
4
7
  import { PdfObject } from './pdf-object.js';
5
8
  export class PdfString extends PdfObject {
@@ -9,7 +12,20 @@ export class PdfString extends PdfObject {
9
12
  _raw;
10
13
  constructor(raw) {
11
14
  super();
12
- this._raw = typeof raw === 'string' ? stringToBytes(raw) : raw;
15
+ if (typeof raw === 'string') {
16
+ // Check if the string contains non-ASCII characters
17
+ if (needsUnicodeEncoding(raw)) {
18
+ // Use UTF-16BE encoding with BOM for Unicode strings
19
+ this._raw = encodeAsUTF16BE(raw);
20
+ }
21
+ else {
22
+ // Use PDFDocEncoding (ASCII-compatible) for simple strings
23
+ this._raw = stringToBytes(raw);
24
+ }
25
+ }
26
+ else {
27
+ this._raw = raw;
28
+ }
13
29
  }
14
30
  get raw() {
15
31
  return this._raw;
@@ -19,6 +35,13 @@ export class PdfString extends PdfObject {
19
35
  this._raw = raw;
20
36
  }
21
37
  get value() {
38
+ // Check for UTF-16BE BOM (0xFE 0xFF)
39
+ if (this.raw.length >= 2 &&
40
+ this.raw[0] === 0xfe &&
41
+ this.raw[1] === 0xff) {
42
+ return decodeFromUTF16BE(this.raw);
43
+ }
44
+ // Default: use UTF-8 decoding
22
45
  return bytesToString(this.raw);
23
46
  }
24
47
  tokenize() {
@@ -83,8 +83,6 @@ export class PdfDocument extends PdfObject {
83
83
  else {
84
84
  this.setVersion(options?.version ?? '2.0');
85
85
  }
86
- this.securityHandler =
87
- options?.securityHandler ?? this.getSecurityHandler();
88
86
  if (options?.password) {
89
87
  this.setPassword(options.password);
90
88
  }
@@ -94,6 +92,8 @@ export class PdfDocument extends PdfObject {
94
92
  this.signer = options?.signer ?? new PdfSigner();
95
93
  this.linkRevisions();
96
94
  this.calculateOffsets();
95
+ this.securityHandler =
96
+ options?.securityHandler ?? this.getSecurityHandler();
97
97
  }
98
98
  /**
99
99
  * Creates a PdfDocument from an array of PDF objects.
@@ -262,6 +262,9 @@ export class PdfDocument extends PdfObject {
262
262
  return undefined;
263
263
  }
264
264
  const encryptionDictObject = this.findUncompressedObject(encryptionDictionaryRef);
265
+ if (!encryptionDictObject) {
266
+ throw new Error('Encryption dictionary object not found');
267
+ }
265
268
  if (!(encryptionDictObject?.content instanceof PdfDictionary)) {
266
269
  throw new Error(`Encryption dictionary object ${encryptionDictionaryRef.objectNumber} ${encryptionDictionaryRef.generationNumber} is not a dictionary, it is a ${encryptionDictObject?.content.objectType}`);
267
270
  }
@@ -446,11 +449,12 @@ export class PdfDocument extends PdfObject {
446
449
  xrefEntry.generationNumber.value !== options.generationNumber)) {
447
450
  return undefined;
448
451
  }
449
- return this.objects.find((obj) => obj instanceof PdfIndirectObject &&
452
+ const found = this.objects.find((obj) => obj instanceof PdfIndirectObject &&
450
453
  obj.objectNumber === options.objectNumber &&
451
454
  (options.generationNumber === undefined ||
452
455
  obj.generationNumber === options.generationNumber) &&
453
456
  obj.offset.equals(xrefEntry.byteOffset.ref));
457
+ return found;
454
458
  }
455
459
  /**
456
460
  * Reads and optionally decrypts an object by its object number.
@@ -0,0 +1,18 @@
1
+ import { ByteArray } from '../types.js';
2
+ /**
3
+ * Decodes a UTF-16BE byte array to a string
4
+ *
5
+ * Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
6
+ * Each character is represented by 2 bytes (high byte, low byte).
7
+ *
8
+ * @param bytes - The byte array to decode (should start with BOM)
9
+ * @returns The decoded string
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
14
+ * decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
15
+ * // Returns "PR"
16
+ * ```
17
+ */
18
+ export declare function decodeFromUTF16BE(bytes: ByteArray): string;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Decodes a UTF-16BE byte array to a string
3
+ *
4
+ * Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
5
+ * Each character is represented by 2 bytes (high byte, low byte).
6
+ *
7
+ * @param bytes - The byte array to decode (should start with BOM)
8
+ * @returns The decoded string
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
13
+ * decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
14
+ * // Returns "PR"
15
+ * ```
16
+ */
17
+ export function decodeFromUTF16BE(bytes) {
18
+ // Skip the BOM (first 2 bytes) and decode the rest
19
+ const chars = [];
20
+ for (let i = 2; i < bytes.length; i += 2) {
21
+ const high = bytes[i];
22
+ const low = bytes[i + 1] || 0;
23
+ const charCode = (high << 8) | low;
24
+ chars.push(String.fromCharCode(charCode));
25
+ }
26
+ return chars.join('');
27
+ }
@@ -0,0 +1,17 @@
1
+ import { ByteArray } from '../types.js';
2
+ /**
3
+ * Encodes a string as UTF-16BE with BOM for PDF
4
+ *
5
+ * PDF strings can use UTF-16BE encoding to represent Unicode characters.
6
+ * The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
7
+ *
8
+ * @param str - The string to encode
9
+ * @returns Byte array with UTF-16BE BOM followed by the encoded string
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * encodeAsUTF16BE('PROSZĘ')
14
+ * // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
15
+ * ```
16
+ */
17
+ export declare function encodeAsUTF16BE(str: string): ByteArray;
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Encodes a string as UTF-16BE with BOM for PDF
3
+ *
4
+ * PDF strings can use UTF-16BE encoding to represent Unicode characters.
5
+ * The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
6
+ *
7
+ * @param str - The string to encode
8
+ * @returns Byte array with UTF-16BE BOM followed by the encoded string
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * encodeAsUTF16BE('PROSZĘ')
13
+ * // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
14
+ * ```
15
+ */
16
+ export function encodeAsUTF16BE(str) {
17
+ // UTF-16BE BOM (0xFE 0xFF)
18
+ const result = [0xfe, 0xff];
19
+ for (let i = 0; i < str.length; i++) {
20
+ const code = str.charCodeAt(i);
21
+ // UTF-16BE: high byte first, then low byte
22
+ result.push((code >> 8) & 0xff);
23
+ result.push(code & 0xff);
24
+ }
25
+ return new Uint8Array(result);
26
+ }
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
5
5
  export * from './bytesToHexBytes.js';
6
6
  export * from './bytesToString.js';
7
7
  export * from './concatUint8Arrays.js';
8
+ export * from './decodeFromUTF16BE.js';
9
+ export * from './encodeAsUTF16BE.js';
8
10
  export * from './escapeString.js';
9
11
  export * from './hexBytesToBytes.js';
10
12
  export * from './hexBytesToString.js';
11
13
  export * from './hexToBytes.js';
14
+ export * from './needsUnicodeEncoding.js';
12
15
  export * from './padBytes.js';
13
16
  export * from './predictors.js';
14
17
  export * from './replaceInBuffer.js';
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
5
5
  export * from './bytesToHexBytes.js';
6
6
  export * from './bytesToString.js';
7
7
  export * from './concatUint8Arrays.js';
8
+ export * from './decodeFromUTF16BE.js';
9
+ export * from './encodeAsUTF16BE.js';
8
10
  export * from './escapeString.js';
9
11
  export * from './hexBytesToBytes.js';
10
12
  export * from './hexBytesToString.js';
11
13
  export * from './hexToBytes.js';
14
+ export * from './needsUnicodeEncoding.js';
12
15
  export * from './padBytes.js';
13
16
  export * from './predictors.js';
14
17
  export * from './replaceInBuffer.js';
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Checks if a string contains non-ASCII characters that require UTF-16BE encoding
3
+ *
4
+ * @param str - The string to check
5
+ * @returns True if the string contains characters above ASCII range (code > 127)
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * needsUnicodeEncoding('Hello') // Returns false
10
+ * needsUnicodeEncoding('PROSZĘ') // Returns true
11
+ * ```
12
+ */
13
+ export declare function needsUnicodeEncoding(str: string): boolean;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Checks if a string contains non-ASCII characters that require UTF-16BE encoding
3
+ *
4
+ * @param str - The string to check
5
+ * @returns True if the string contains characters above ASCII range (code > 127)
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * needsUnicodeEncoding('Hello') // Returns false
10
+ * needsUnicodeEncoding('PROSZĘ') // Returns true
11
+ * ```
12
+ */
13
+ export function needsUnicodeEncoding(str) {
14
+ for (let i = 0; i < str.length; i++) {
15
+ if (str.charCodeAt(i) > 127) {
16
+ return true;
17
+ }
18
+ }
19
+ return false;
20
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-lite",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "main": "dist/index.js",
5
5
  "type": "module",
6
6
  "exports": {