pdf-lite 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  import { bytesToString } from '../../utils/bytesToString.js';
2
2
  import { stringToBytes } from '../../utils/stringToBytes.js';
3
+ import { needsUnicodeEncoding } from '../../utils/needsUnicodeEncoding.js';
4
+ import { encodeAsUTF16BE } from '../../utils/encodeAsUTF16BE.js';
5
+ import { decodeFromUTF16BE } from '../../utils/decodeFromUTF16BE.js';
3
6
  import { PdfStringToken } from '../tokens/string-token.js';
4
7
  import { PdfObject } from './pdf-object.js';
5
8
  export class PdfString extends PdfObject {
@@ -9,7 +12,20 @@ export class PdfString extends PdfObject {
9
12
  _raw;
10
13
  constructor(raw) {
11
14
  super();
12
- this._raw = typeof raw === 'string' ? stringToBytes(raw) : raw;
15
+ if (typeof raw === 'string') {
16
+ // Check if the string contains non-ASCII characters
17
+ if (needsUnicodeEncoding(raw)) {
18
+ // Use UTF-16BE encoding with BOM for Unicode strings
19
+ this._raw = encodeAsUTF16BE(raw);
20
+ }
21
+ else {
22
+ // Use PDFDocEncoding (ASCII-compatible) for simple strings
23
+ this._raw = stringToBytes(raw);
24
+ }
25
+ }
26
+ else {
27
+ this._raw = raw;
28
+ }
13
29
  }
14
30
  get raw() {
15
31
  return this._raw;
@@ -19,6 +35,13 @@ export class PdfString extends PdfObject {
19
35
  this._raw = raw;
20
36
  }
21
37
  get value() {
38
+ // Check for UTF-16BE BOM (0xFE 0xFF)
39
+ if (this.raw.length >= 2 &&
40
+ this.raw[0] === 0xfe &&
41
+ this.raw[1] === 0xff) {
42
+ return decodeFromUTF16BE(this.raw);
43
+ }
44
+ // Default: use UTF-8 decoding
22
45
  return bytesToString(this.raw);
23
46
  }
24
47
  tokenize() {
@@ -0,0 +1,18 @@
1
+ import { ByteArray } from '../types.js';
2
+ /**
3
+ * Decodes a UTF-16BE byte array to a string
4
+ *
5
+ * Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
6
+ * Each character is represented by 2 bytes (high byte, low byte).
7
+ *
8
+ * @param bytes - The byte array to decode (should start with BOM)
9
+ * @returns The decoded string
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
14
+ * decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
15
+ * // Returns "PR"
16
+ * ```
17
+ */
18
+ export declare function decodeFromUTF16BE(bytes: ByteArray): string;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Decodes a UTF-16BE byte array to a string
3
+ *
4
+ * Assumes the byte array starts with UTF-16BE BOM (0xFE 0xFF) which is skipped.
5
+ * Each character is represented by 2 bytes (high byte, low byte).
6
+ *
7
+ * @param bytes - The byte array to decode (should start with BOM)
8
+ * @returns The decoded string
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * // Byte array with BOM: 0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52 -> "PR"
13
+ * decodeFromUTF16BE(new Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52]))
14
+ * // Returns "PR"
15
+ * ```
16
+ */
17
+ export function decodeFromUTF16BE(bytes) {
18
+ // Skip the BOM (first 2 bytes) and decode the rest
19
+ const chars = [];
20
+ for (let i = 2; i < bytes.length; i += 2) {
21
+ const high = bytes[i];
22
+ const low = bytes[i + 1] || 0;
23
+ const charCode = (high << 8) | low;
24
+ chars.push(String.fromCharCode(charCode));
25
+ }
26
+ return chars.join('');
27
+ }
@@ -0,0 +1,17 @@
1
+ import { ByteArray } from '../types.js';
2
+ /**
3
+ * Encodes a string as UTF-16BE with BOM for PDF
4
+ *
5
+ * PDF strings can use UTF-16BE encoding to represent Unicode characters.
6
+ * The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
7
+ *
8
+ * @param str - The string to encode
9
+ * @returns Byte array with UTF-16BE BOM followed by the encoded string
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * encodeAsUTF16BE('PROSZĘ')
14
+ * // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
15
+ * ```
16
+ */
17
+ export declare function encodeAsUTF16BE(str: string): ByteArray;
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Encodes a string as UTF-16BE with BOM for PDF
3
+ *
4
+ * PDF strings can use UTF-16BE encoding to represent Unicode characters.
5
+ * The encoding must start with the UTF-16BE BOM (0xFE 0xFF) to be recognized.
6
+ *
7
+ * @param str - The string to encode
8
+ * @returns Byte array with UTF-16BE BOM followed by the encoded string
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * encodeAsUTF16BE('PROSZĘ')
13
+ * // Returns Uint8Array([0xFE, 0xFF, 0x00, 0x50, 0x00, 0x52, ...])
14
+ * ```
15
+ */
16
+ export function encodeAsUTF16BE(str) {
17
+ // UTF-16BE BOM (0xFE 0xFF)
18
+ const result = [0xfe, 0xff];
19
+ for (let i = 0; i < str.length; i++) {
20
+ const code = str.charCodeAt(i);
21
+ // UTF-16BE: high byte first, then low byte
22
+ result.push((code >> 8) & 0xff);
23
+ result.push(code & 0xff);
24
+ }
25
+ return new Uint8Array(result);
26
+ }
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
5
5
  export * from './bytesToHexBytes.js';
6
6
  export * from './bytesToString.js';
7
7
  export * from './concatUint8Arrays.js';
8
+ export * from './decodeFromUTF16BE.js';
9
+ export * from './encodeAsUTF16BE.js';
8
10
  export * from './escapeString.js';
9
11
  export * from './hexBytesToBytes.js';
10
12
  export * from './hexBytesToString.js';
11
13
  export * from './hexToBytes.js';
14
+ export * from './needsUnicodeEncoding.js';
12
15
  export * from './padBytes.js';
13
16
  export * from './predictors.js';
14
17
  export * from './replaceInBuffer.js';
@@ -5,10 +5,13 @@ export * from './bytesToHex.js';
5
5
  export * from './bytesToHexBytes.js';
6
6
  export * from './bytesToString.js';
7
7
  export * from './concatUint8Arrays.js';
8
+ export * from './decodeFromUTF16BE.js';
9
+ export * from './encodeAsUTF16BE.js';
8
10
  export * from './escapeString.js';
9
11
  export * from './hexBytesToBytes.js';
10
12
  export * from './hexBytesToString.js';
11
13
  export * from './hexToBytes.js';
14
+ export * from './needsUnicodeEncoding.js';
12
15
  export * from './padBytes.js';
13
16
  export * from './predictors.js';
14
17
  export * from './replaceInBuffer.js';
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Checks if a string contains non-ASCII characters that require UTF-16BE encoding
3
+ *
4
+ * @param str - The string to check
5
+ * @returns True if the string contains characters above ASCII range (code > 127)
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * needsUnicodeEncoding('Hello') // Returns false
10
+ * needsUnicodeEncoding('PROSZĘ') // Returns true
11
+ * ```
12
+ */
13
+ export declare function needsUnicodeEncoding(str: string): boolean;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Checks if a string contains non-ASCII characters that require UTF-16BE encoding
3
+ *
4
+ * @param str - The string to check
5
+ * @returns True if the string contains characters above ASCII range (code > 127)
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * needsUnicodeEncoding('Hello') // Returns false
10
+ * needsUnicodeEncoding('PROSZĘ') // Returns true
11
+ * ```
12
+ */
13
+ export function needsUnicodeEncoding(str) {
14
+ for (let i = 0; i < str.length; i++) {
15
+ if (str.charCodeAt(i) > 127) {
16
+ return true;
17
+ }
18
+ }
19
+ return false;
20
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-lite",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "main": "dist/index.js",
5
5
  "type": "module",
6
6
  "exports": {