pdf-lite 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/core/objects/pdf-string.js +24 -1
- package/dist/pdf/pdf-document.js +7 -3
- package/dist/utils/decodeFromUTF16BE.d.ts +18 -0
- package/dist/utils/decodeFromUTF16BE.js +27 -0
- package/dist/utils/encodeAsUTF16BE.d.ts +17 -0
- package/dist/utils/encodeAsUTF16BE.js +26 -0
- package/dist/utils/index.d.ts +3 -0
- package/dist/utils/index.js +3 -0
- package/dist/utils/needsUnicodeEncoding.d.ts +13 -0
- package/dist/utils/needsUnicodeEncoding.js +20 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -191,17 +191,17 @@ Long-Term Validation (LTV) support ensures that digital signatures remain valid
|
|
|
191
191
|
|
|
192
192
|
Supports filling out AcroForm forms within PDF documents, allowing for dynamic content generation and user interaction.
|
|
193
193
|
|
|
194
|
-
[x] Text fields
|
|
195
|
-
[x] Checkboxes
|
|
196
|
-
[x] Radio buttons
|
|
197
|
-
[x] Dropdowns
|
|
194
|
+
- [x] Text fields
|
|
195
|
+
- [x] Checkboxes
|
|
196
|
+
- [x] Radio buttons
|
|
197
|
+
- [x] Dropdowns
|
|
198
198
|
|
|
199
199
|
### XFA Forms
|
|
200
200
|
|
|
201
201
|
You can read/write XFA XML data from PDFs, but rendering and filling XFA forms is not supported.
|
|
202
202
|
|
|
203
|
-
[x] Read XFA XML
|
|
204
|
-
[x] Write XFA XML
|
|
203
|
+
- [x] Read XFA XML
|
|
204
|
+
- [x] Write XFA XML
|
|
205
205
|
|
|
206
206
|
## Future Plans
|
|
207
207
|
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { bytesToString } from '../../utils/bytesToString.js';
|
|
2
2
|
import { stringToBytes } from '../../utils/stringToBytes.js';
|
|
3
|
+
import { needsUnicodeEncoding } from '../../utils/needsUnicodeEncoding.js';
|
|
4
|
+
import { encodeAsUTF16BE } from '../../utils/encodeAsUTF16BE.js';
|
|
5
|
+
import { decodeFromUTF16BE } from '../../utils/decodeFromUTF16BE.js';
|
|
3
6
|
import { PdfStringToken } from '../tokens/string-token.js';
|
|
4
7
|
import { PdfObject } from './pdf-object.js';
|
|
5
8
|
export class PdfString extends PdfObject {
|
|
@@ -9,7 +12,20 @@ export class PdfString extends PdfObject {
|
|
|
9
12
|
_raw;
|
|
10
13
|
constructor(raw) {
|
|
11
14
|
super();
|
|
12
|
-
|
|
15
|
+
if (typeof raw === 'string') {
|
|
16
|
+
// Check if the string contains non-ASCII characters
|
|
17
|
+
if (needsUnicodeEncoding(raw)) {
|
|
18
|
+
// Use UTF-16BE encoding with BOM for Unicode strings
|
|
19
|
+
this._raw = encodeAsUTF16BE(raw);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
// Use PDFDocEncoding (ASCII-compatible) for simple strings
|
|
23
|
+
this._raw = stringToBytes(raw);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
this._raw = raw;
|
|
28
|
+
}
|
|
13
29
|
}
|
|
14
30
|
get raw() {
|
|
15
31
|
return this._raw;
|
|
@@ -19,6 +35,13 @@ export class PdfString extends PdfObject {
|
|
|
19
35
|
this._raw = raw;
|
|
20
36
|
}
|
|
21
37
|
get value() {
|
|
38
|
+
// Check for UTF-16BE BOM (0xFE 0xFF)
|
|
39
|
+
if (this.raw.length >= 2 &&
|
|
40
|
+
this.raw[0] === 0xfe &&
|
|
41
|
+
this.raw[1] === 0xff) {
|
|
42
|
+
return decodeFromUTF16BE(this.raw);
|
|
43
|
+
}
|
|
44
|
+
// Default: use UTF-8 decoding
|
|
22
45
|
return bytesToString(this.raw);
|
|
23
46
|
}
|
|
24
47
|
tokenize() {
|
package/dist/pdf/pdf-document.js
CHANGED
|
@@ -83,8 +83,6 @@ export class PdfDocument extends PdfObject {
|
|
|
83
83
|
else {
|
|
84
84
|
this.setVersion(options?.version ?? '2.0');
|
|
85
85
|
}
|
|
86
|
-
this.securityHandler =
|
|
87
|
-
options?.securityHandler ?? this.getSecurityHandler();
|
|
88
86
|
if (options?.password) {
|
|
89
87
|
this.setPassword(options.password);
|
|
90
88
|
}
|
|
@@ -94,6 +92,8 @@ export class PdfDocument extends PdfObject {
|
|
|
94
92
|
this.signer = options?.signer ?? new PdfSigner();
|
|
95
93
|
this.linkRevisions();
|
|
96
94
|
this.calculateOffsets();
|
|
95
|
+
this.securityHandler =
|
|
96
|
+
options?.securityHandler ?? this.getSecurityHandler();
|
|
97
97
|
}
|
|
98
98
|
/**
|
|
99
99
|
* Creates a PdfDocument from an array of PDF objects.
|
|
@@ -262,6 +262,9 @@ export class PdfDocument extends PdfObject {
|
|
|
262
262
|
return undefined;
|
|
263
263
|
}
|
|
264
264
|
const encryptionDictObject = this.findUncompressedObject(encryptionDictionaryRef);
|
|
265
|
+
if (!encryptionDictObject) {
|
|
266
|
+
throw new Error('Encryption dictionary object not found');
|
|
267
|
+
}
|
|
265
268
|
if (!(encryptionDictObject?.content instanceof PdfDictionary)) {
|
|
266
269
|
throw new Error(`Encryption dictionary object ${encryptionDictionaryRef.objectNumber} ${encryptionDictionaryRef.generationNumber} is not a dictionary, it is a ${encryptionDictObject?.content.objectType}`);
|
|
267
270
|
}
|
|
@@ -446,11 +449,12 @@ export class PdfDocument extends PdfObject {
|
|
|
446
449
|
xrefEntry.generationNumber.value !== options.generationNumber)) {
|
|
447
450
|
return undefined;
|
|
448
451
|
}
|
|
449
|
-
|
|
452
|
+
const found = this.objects.find((obj) => obj instanceof PdfIndirectObject &&
|
|
450
453
|
obj.objectNumber === options.objectNumber &&
|
|
451
454
|
(options.generationNumber === undefined ||
|
|
452
455
|
obj.generationNumber === options.generationNumber) &&
|
|
453
456
|
obj.offset.equals(xrefEntry.byteOffset.ref));
|
|
457
|
+
return found;
|
|
454
458
|
}
|
|
455
459
|
/**
|
|
456
460
|
* Reads and optionally decrypts an object by its object number.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { ByteArray } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Decodes a UTF-16BE byte array to a string
|
|
4
|
+
*
|
|
5
|
+
* Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
|
|
6
|
+
* Each character is represented by 2 bytes (high byte, low byte).
|
|
7
|
+
*
|
|
8
|
+
* @param bytes - The byte array to decode (should start with BOM)
|
|
9
|
+
* @returns The decoded string
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
|
|
14
|
+
* decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
|
|
15
|
+
* // Returns "PR"
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export declare function decodeFromUTF16BE(bytes: ByteArray): string;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decodes a UTF-16BE byte array to a string
|
|
3
|
+
*
|
|
4
|
+
* Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
|
|
5
|
+
* Each character is represented by 2 bytes (high byte, low byte).
|
|
6
|
+
*
|
|
7
|
+
* @param bytes - The byte array to decode (should start with BOM)
|
|
8
|
+
* @returns The decoded string
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
|
|
13
|
+
* decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
|
|
14
|
+
* // Returns "PR"
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export function decodeFromUTF16BE(bytes) {
|
|
18
|
+
// Skip the BOM (first 2 bytes) and decode the rest
|
|
19
|
+
const chars = [];
|
|
20
|
+
for (let i = 2; i < bytes.length; i += 2) {
|
|
21
|
+
const high = bytes[i];
|
|
22
|
+
const low = bytes[i + 1] || 0;
|
|
23
|
+
const charCode = (high << 8) | low;
|
|
24
|
+
chars.push(String.fromCharCode(charCode));
|
|
25
|
+
}
|
|
26
|
+
return chars.join('');
|
|
27
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { ByteArray } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Encodes a string as UTF-16BE with BOM for PDF
|
|
4
|
+
*
|
|
5
|
+
* PDF strings can use UTF-16BE encoding to represent Unicode characters.
|
|
6
|
+
* The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
|
|
7
|
+
*
|
|
8
|
+
* @param str - The string to encode
|
|
9
|
+
* @returns Byte array with UTF-16BE BOM followed by the encoded string
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* encodeAsUTF16BE('PROSZĘ')
|
|
14
|
+
* // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export declare function encodeAsUTF16BE(str: string): ByteArray;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encodes a string as UTF-16BE with BOM for PDF
|
|
3
|
+
*
|
|
4
|
+
* PDF strings can use UTF-16BE encoding to represent Unicode characters.
|
|
5
|
+
* The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
|
|
6
|
+
*
|
|
7
|
+
* @param str - The string to encode
|
|
8
|
+
* @returns Byte array with UTF-16BE BOM followed by the encoded string
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* encodeAsUTF16BE('PROSZĘ')
|
|
13
|
+
* // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export function encodeAsUTF16BE(str) {
|
|
17
|
+
// UTF-16BE BOM (0xFE 0xFF)
|
|
18
|
+
const result = [0xfe, 0xff];
|
|
19
|
+
for (let i = 0; i < str.length; i++) {
|
|
20
|
+
const code = str.charCodeAt(i);
|
|
21
|
+
// UTF-16BE: high byte first, then low byte
|
|
22
|
+
result.push((code >> 8) & 0xff);
|
|
23
|
+
result.push(code & 0xff);
|
|
24
|
+
}
|
|
25
|
+
return new Uint8Array(result);
|
|
26
|
+
}
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
|
|
|
5
5
|
export * from './bytesToHexBytes.js';
|
|
6
6
|
export * from './bytesToString.js';
|
|
7
7
|
export * from './concatUint8Arrays.js';
|
|
8
|
+
export * from './decodeFromUTF16BE.js';
|
|
9
|
+
export * from './encodeAsUTF16BE.js';
|
|
8
10
|
export * from './escapeString.js';
|
|
9
11
|
export * from './hexBytesToBytes.js';
|
|
10
12
|
export * from './hexBytesToString.js';
|
|
11
13
|
export * from './hexToBytes.js';
|
|
14
|
+
export * from './needsUnicodeEncoding.js';
|
|
12
15
|
export * from './padBytes.js';
|
|
13
16
|
export * from './predictors.js';
|
|
14
17
|
export * from './replaceInBuffer.js';
|
package/dist/utils/index.js
CHANGED
|
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
|
|
|
5
5
|
export * from './bytesToHexBytes.js';
|
|
6
6
|
export * from './bytesToString.js';
|
|
7
7
|
export * from './concatUint8Arrays.js';
|
|
8
|
+
export * from './decodeFromUTF16BE.js';
|
|
9
|
+
export * from './encodeAsUTF16BE.js';
|
|
8
10
|
export * from './escapeString.js';
|
|
9
11
|
export * from './hexBytesToBytes.js';
|
|
10
12
|
export * from './hexBytesToString.js';
|
|
11
13
|
export * from './hexToBytes.js';
|
|
14
|
+
export * from './needsUnicodeEncoding.js';
|
|
12
15
|
export * from './padBytes.js';
|
|
13
16
|
export * from './predictors.js';
|
|
14
17
|
export * from './replaceInBuffer.js';
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checks if a string contains non-ASCII characters that require UTF-16BE encoding
|
|
3
|
+
*
|
|
4
|
+
* @param str - The string to check
|
|
5
|
+
* @returns True if the string contains characters above ASCII range (code > 127)
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* needsUnicodeEncoding('Hello') // Returns false
|
|
10
|
+
* needsUnicodeEncoding('PROSZĘ') // Returns true
|
|
11
|
+
* ```
|
|
12
|
+
*/
|
|
13
|
+
export declare function needsUnicodeEncoding(str: string): boolean;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checks if a string contains non-ASCII characters that require UTF-16BE encoding
|
|
3
|
+
*
|
|
4
|
+
* @param str - The string to check
|
|
5
|
+
* @returns True if the string contains characters above ASCII range (code > 127)
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* needsUnicodeEncoding('Hello') // Returns false
|
|
10
|
+
* needsUnicodeEncoding('PROSZĘ') // Returns true
|
|
11
|
+
* ```
|
|
12
|
+
*/
|
|
13
|
+
export function needsUnicodeEncoding(str) {
|
|
14
|
+
for (let i = 0; i < str.length; i++) {
|
|
15
|
+
if (str.charCodeAt(i) > 127) {
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return false;
|
|
20
|
+
}
|