pdf-lite 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/objects/pdf-string.js +24 -1
- package/dist/utils/decodeFromUTF16BE.d.ts +18 -0
- package/dist/utils/decodeFromUTF16BE.js +27 -0
- package/dist/utils/encodeAsUTF16BE.d.ts +17 -0
- package/dist/utils/encodeAsUTF16BE.js +26 -0
- package/dist/utils/index.d.ts +3 -0
- package/dist/utils/index.js +3 -0
- package/dist/utils/needsUnicodeEncoding.d.ts +13 -0
- package/dist/utils/needsUnicodeEncoding.js +20 -0
- package/package.json +1 -1
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { bytesToString } from '../../utils/bytesToString.js';
|
|
2
2
|
import { stringToBytes } from '../../utils/stringToBytes.js';
|
|
3
|
+
import { needsUnicodeEncoding } from '../../utils/needsUnicodeEncoding.js';
|
|
4
|
+
import { encodeAsUTF16BE } from '../../utils/encodeAsUTF16BE.js';
|
|
5
|
+
import { decodeFromUTF16BE } from '../../utils/decodeFromUTF16BE.js';
|
|
3
6
|
import { PdfStringToken } from '../tokens/string-token.js';
|
|
4
7
|
import { PdfObject } from './pdf-object.js';
|
|
5
8
|
export class PdfString extends PdfObject {
|
|
@@ -9,7 +12,20 @@ export class PdfString extends PdfObject {
|
|
|
9
12
|
_raw;
|
|
10
13
|
constructor(raw) {
|
|
11
14
|
super();
|
|
12
|
-
|
|
15
|
+
if (typeof raw === 'string') {
|
|
16
|
+
// Check if the string contains non-ASCII characters
|
|
17
|
+
if (needsUnicodeEncoding(raw)) {
|
|
18
|
+
// Use UTF-16BE encoding with BOM for Unicode strings
|
|
19
|
+
this._raw = encodeAsUTF16BE(raw);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
// Use PDFDocEncoding (ASCII-compatible) for simple strings
|
|
23
|
+
this._raw = stringToBytes(raw);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
this._raw = raw;
|
|
28
|
+
}
|
|
13
29
|
}
|
|
14
30
|
get raw() {
|
|
15
31
|
return this._raw;
|
|
@@ -19,6 +35,13 @@ export class PdfString extends PdfObject {
|
|
|
19
35
|
this._raw = raw;
|
|
20
36
|
}
|
|
21
37
|
get value() {
|
|
38
|
+
// Check for UTF-16BE BOM (0xFE 0xFF)
|
|
39
|
+
if (this.raw.length >= 2 &&
|
|
40
|
+
this.raw[0] === 0xfe &&
|
|
41
|
+
this.raw[1] === 0xff) {
|
|
42
|
+
return decodeFromUTF16BE(this.raw);
|
|
43
|
+
}
|
|
44
|
+
// Default: use UTF-8 decoding
|
|
22
45
|
return bytesToString(this.raw);
|
|
23
46
|
}
|
|
24
47
|
tokenize() {
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { ByteArray } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Decodes a UTF-16BE byte array to a string
|
|
4
|
+
*
|
|
5
|
+
* Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
|
|
6
|
+
* Each character is represented by 2 bytes (high byte, low byte).
|
|
7
|
+
*
|
|
8
|
+
* @param bytes - The byte array to decode (should start with BOM)
|
|
9
|
+
* @returns The decoded string
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
|
|
14
|
+
* decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
|
|
15
|
+
* // Returns "PR"
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export declare function decodeFromUTF16BE(bytes: ByteArray): string;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decodes a UTF-16BE byte array to a string
|
|
3
|
+
*
|
|
4
|
+
* Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
|
|
5
|
+
* Each character is represented by 2 bytes (high byte, low byte).
|
|
6
|
+
*
|
|
7
|
+
* @param bytes - The byte array to decode (should start with BOM)
|
|
8
|
+
* @returns The decoded string
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
|
|
13
|
+
* decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
|
|
14
|
+
* // Returns "PR"
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export function decodeFromUTF16BE(bytes) {
|
|
18
|
+
// Skip the BOM (first 2 bytes) and decode the rest
|
|
19
|
+
const chars = [];
|
|
20
|
+
for (let i = 2; i < bytes.length; i += 2) {
|
|
21
|
+
const high = bytes[i];
|
|
22
|
+
const low = bytes[i + 1] || 0;
|
|
23
|
+
const charCode = (high << 8) | low;
|
|
24
|
+
chars.push(String.fromCharCode(charCode));
|
|
25
|
+
}
|
|
26
|
+
return chars.join('');
|
|
27
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { ByteArray } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Encodes a string as UTF-16BE with BOM for PDF
|
|
4
|
+
*
|
|
5
|
+
* PDF strings can use UTF-16BE encoding to represent Unicode characters.
|
|
6
|
+
* The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
|
|
7
|
+
*
|
|
8
|
+
* @param str - The string to encode
|
|
9
|
+
* @returns Byte array with UTF-16BE BOM followed by the encoded string
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* encodeAsUTF16BE('PROSZĘ')
|
|
14
|
+
* // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export declare function encodeAsUTF16BE(str: string): ByteArray;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Encodes a string as UTF-16BE with BOM for PDF
|
|
3
|
+
*
|
|
4
|
+
* PDF strings can use UTF-16BE encoding to represent Unicode characters.
|
|
5
|
+
* The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
|
|
6
|
+
*
|
|
7
|
+
* @param str - The string to encode
|
|
8
|
+
* @returns Byte array with UTF-16BE BOM followed by the encoded string
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* encodeAsUTF16BE('PROSZĘ')
|
|
13
|
+
* // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export function encodeAsUTF16BE(str) {
|
|
17
|
+
// UTF-16BE BOM (0xFE 0xFF)
|
|
18
|
+
const result = [0xfe, 0xff];
|
|
19
|
+
for (let i = 0; i < str.length; i++) {
|
|
20
|
+
const code = str.charCodeAt(i);
|
|
21
|
+
// UTF-16BE: high byte first, then low byte
|
|
22
|
+
result.push((code >> 8) & 0xff);
|
|
23
|
+
result.push(code & 0xff);
|
|
24
|
+
}
|
|
25
|
+
return new Uint8Array(result);
|
|
26
|
+
}
|
package/dist/utils/index.d.ts
CHANGED
|
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
|
|
|
5
5
|
export * from './bytesToHexBytes.js';
|
|
6
6
|
export * from './bytesToString.js';
|
|
7
7
|
export * from './concatUint8Arrays.js';
|
|
8
|
+
export * from './decodeFromUTF16BE.js';
|
|
9
|
+
export * from './encodeAsUTF16BE.js';
|
|
8
10
|
export * from './escapeString.js';
|
|
9
11
|
export * from './hexBytesToBytes.js';
|
|
10
12
|
export * from './hexBytesToString.js';
|
|
11
13
|
export * from './hexToBytes.js';
|
|
14
|
+
export * from './needsUnicodeEncoding.js';
|
|
12
15
|
export * from './padBytes.js';
|
|
13
16
|
export * from './predictors.js';
|
|
14
17
|
export * from './replaceInBuffer.js';
|
package/dist/utils/index.js
CHANGED
|
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
|
|
|
5
5
|
export * from './bytesToHexBytes.js';
|
|
6
6
|
export * from './bytesToString.js';
|
|
7
7
|
export * from './concatUint8Arrays.js';
|
|
8
|
+
export * from './decodeFromUTF16BE.js';
|
|
9
|
+
export * from './encodeAsUTF16BE.js';
|
|
8
10
|
export * from './escapeString.js';
|
|
9
11
|
export * from './hexBytesToBytes.js';
|
|
10
12
|
export * from './hexBytesToString.js';
|
|
11
13
|
export * from './hexToBytes.js';
|
|
14
|
+
export * from './needsUnicodeEncoding.js';
|
|
12
15
|
export * from './padBytes.js';
|
|
13
16
|
export * from './predictors.js';
|
|
14
17
|
export * from './replaceInBuffer.js';
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checks if a string contains non-ASCII characters that require UTF-16BE encoding
|
|
3
|
+
*
|
|
4
|
+
* @param str - The string to check
|
|
5
|
+
* @returns True if the string contains characters above ASCII range (code > 127)
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* needsUnicodeEncoding('Hello') // Returns false
|
|
10
|
+
* needsUnicodeEncoding('PROSZĘ') // Returns true
|
|
11
|
+
* ```
|
|
12
|
+
*/
|
|
13
|
+
export declare function needsUnicodeEncoding(str: string): boolean;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checks if a string contains non-ASCII characters that require UTF-16BE encoding
|
|
3
|
+
*
|
|
4
|
+
* @param str - The string to check
|
|
5
|
+
* @returns True if the string contains characters above ASCII range (code > 127)
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* needsUnicodeEncoding('Hello') // Returns false
|
|
10
|
+
* needsUnicodeEncoding('PROSZĘ') // Returns true
|
|
11
|
+
* ```
|
|
12
|
+
*/
|
|
13
|
+
export function needsUnicodeEncoding(str) {
|
|
14
|
+
for (let i = 0; i < str.length; i++) {
|
|
15
|
+
if (str.charCodeAt(i) > 127) {
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return false;
|
|
20
|
+
}
|