@libpdf/core 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,5 @@
1
1
  import { t as __exportAll } from "./chunk-15K8U1wQ.mjs";
2
+ import { LRUCache } from "lru-cache";
2
3
  import pako, { deflate, inflate } from "pako";
3
4
  import { cbc, ecb } from "@noble/ciphers/aes.js";
4
5
  import { randomBytes } from "@noble/ciphers/utils.js";
@@ -10,7 +11,7 @@ import { createCMSECDSASignature } from "pkijs";
10
11
  import { base64 } from "@scure/base";
11
12
 
12
13
  //#region package.json
13
- var version = "0.2.7";
14
+ var version = "0.2.8";
14
15
 
15
16
  //#endregion
16
17
  //#region src/objects/pdf-array.ts
@@ -112,6 +113,80 @@ var PdfArray = class PdfArray {
112
113
  }
113
114
  };
114
115
 
116
+ //#endregion
117
+ //#region src/helpers/buffer.ts
118
+ /**
119
+ * Buffer utilities for working with ArrayBuffer and Uint8Array.
120
+ */
121
+ /**
122
+ * Ensure we have a proper ArrayBuffer (not SharedArrayBuffer or slice).
123
+ *
124
+ * Web Crypto APIs require a true ArrayBuffer, not a view into one.
125
+ *
126
+ * @param data - Uint8Array to convert
127
+ * @returns ArrayBuffer containing the data
128
+ */
129
+ function toArrayBuffer(data) {
130
+ if (data.buffer instanceof ArrayBuffer && data.byteOffset === 0 && data.byteLength === data.buffer.byteLength) return data.buffer;
131
+ return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
132
+ }
133
+ /**
134
+ * Concatenate multiple Uint8Arrays into a single Uint8Array.
135
+ *
136
+ * @param arrays - Arrays to concatenate
137
+ * @returns Single Uint8Array containing all data
138
+ */
139
+ function concatBytes(arrays) {
140
+ const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
141
+ const result = new Uint8Array(totalLength);
142
+ let offset = 0;
143
+ for (const arr of arrays) {
144
+ result.set(arr, offset);
145
+ offset += arr.length;
146
+ }
147
+ return result;
148
+ }
149
+ /** Pre-computed hex lookup: byte value → "XX" uppercase string. */
150
+ const HEX_TABLE = new Array(256);
151
+ for (let i = 0; i < 256; i++) HEX_TABLE[i] = i.toString(16).toUpperCase().padStart(2, "0");
152
+ /**
153
+ * Convert bytes to uppercase hex string.
154
+ *
155
+ * @param bytes - Raw bytes
156
+ * @returns Hex string (e.g., "48656C6C6F")
157
+ *
158
+ * @example
159
+ * ```ts
160
+ * bytesToHex(new Uint8Array([72, 101, 108, 108, 111])) // "48656C6C6F"
161
+ * ```
162
+ */
163
+ function bytesToHex(bytes) {
164
+ let hex = "";
165
+ for (const byte of bytes) hex += HEX_TABLE[byte];
166
+ return hex;
167
+ }
168
+ /**
169
+ * Convert a hex string to bytes.
170
+ *
171
+ * Whitespace is ignored. Odd-length strings are padded with trailing 0.
172
+ *
173
+ * @param hex - Hex string (e.g., "48656C6C6F" or "48 65 6C 6C 6F")
174
+ * @returns Decoded bytes
175
+ *
176
+ * @example
177
+ * ```ts
178
+ * hexToBytes("48656C6C6F") // Uint8Array([72, 101, 108, 108, 111])
179
+ * hexToBytes("ABC") // Uint8Array([171, 192]) - padded to "ABC0"
180
+ * ```
181
+ */
182
+ function hexToBytes(hex) {
183
+ const clean = hex.replace(/\s/g, "");
184
+ const padded = clean.length % 2 === 1 ? `${clean}0` : clean;
185
+ const bytes = new Uint8Array(padded.length / 2);
186
+ for (let i = 0; i < bytes.length; i++) bytes[i] = Number.parseInt(padded.slice(i * 2, i * 2 + 2), 16);
187
+ return bytes;
188
+ }
189
+
115
190
  //#endregion
116
191
  //#region src/helpers/chars.ts
117
192
  /**
@@ -221,83 +296,6 @@ function hexValue(byte) {
221
296
  */
222
297
  const SINGLE_BYTE_MASK = 255;
223
298
 
224
- //#endregion
225
- //#region src/helpers/lru-cache.ts
226
- /**
227
- * Simple LRU (Least Recently Used) cache implementation.
228
- *
229
- * Used for interning frequently-used PDF objects (PdfName, PdfRef)
230
- * while preventing unbounded memory growth.
231
- */
232
- /**
233
- * A bounded cache that evicts least-recently-used entries when full.
234
- *
235
- * @typeParam K - Key type
236
- * @typeParam V - Value type
237
- */
238
- var LRUCache = class {
239
- maxSize;
240
- cache = /* @__PURE__ */ new Map();
241
- /**
242
- * Create a new LRU cache.
243
- *
244
- * @param maxSize - Maximum number of entries to retain (default: 10000)
245
- */
246
- constructor(maxSize = 1e4) {
247
- this.maxSize = maxSize;
248
- }
249
- /**
250
- * Get a value from the cache, updating its recency.
251
- *
252
- * @returns The cached value, or undefined if not present
253
- */
254
- get(key$1) {
255
- const value = this.cache.get(key$1);
256
- if (value !== void 0) {
257
- this.cache.delete(key$1);
258
- this.cache.set(key$1, value);
259
- }
260
- return value;
261
- }
262
- /**
263
- * Check if a key exists in the cache (without updating recency).
264
- */
265
- has(key$1) {
266
- return this.cache.has(key$1);
267
- }
268
- /**
269
- * Add or update a value in the cache.
270
- *
271
- * If the cache is at capacity, the least-recently-used entry is evicted.
272
- */
273
- set(key$1, value) {
274
- if (this.cache.has(key$1)) this.cache.delete(key$1);
275
- else if (this.cache.size >= this.maxSize) {
276
- const oldestKey = this.cache.keys().next().value;
277
- if (oldestKey !== void 0) this.cache.delete(oldestKey);
278
- }
279
- this.cache.set(key$1, value);
280
- }
281
- /**
282
- * Remove a value from the cache.
283
- */
284
- delete(key$1) {
285
- return this.cache.delete(key$1);
286
- }
287
- /**
288
- * Clear all entries from the cache.
289
- */
290
- clear() {
291
- this.cache.clear();
292
- }
293
- /**
294
- * Get the current number of entries in the cache.
295
- */
296
- get size() {
297
- return this.cache.size;
298
- }
299
- };
300
-
301
299
  //#endregion
302
300
  //#region src/objects/pdf-name.ts
303
301
  const NAME_NEEDS_ESCAPE = new Set([
@@ -305,11 +303,20 @@ const NAME_NEEDS_ESCAPE = new Set([
305
303
  ...DELIMITERS,
306
304
  CHAR_HASH
307
305
  ]);
306
+ /** Module-level encoder — avoids constructing one per escapeName call. */
307
+ const textEncoder = new TextEncoder();
308
308
  /**
309
- * Default cache size for PdfName interning.
310
- * Can be overridden via PdfName.setCacheSize().
309
+ * Check whether a name is pure "safe" ASCII — every char is printable ASCII
310
+ * (33–126) and not in the escape set. If so, no escaping is needed and we
311
+ * can skip the TextEncoder entirely.
311
312
  */
312
- const DEFAULT_NAME_CACHE_SIZE = 1e4;
313
+ function isSimpleAsciiName(name) {
314
+ for (let i = 0; i < name.length; i++) {
315
+ const c = name.charCodeAt(i);
316
+ if (c < 33 || c > 126 || NAME_NEEDS_ESCAPE.has(c)) return false;
317
+ }
318
+ return true;
319
+ }
313
320
  /**
314
321
  * Escape a PDF name for serialization.
315
322
  *
@@ -319,13 +326,19 @@ const DEFAULT_NAME_CACHE_SIZE = 1e4;
319
326
  * - The # character itself
320
327
  */
321
328
  function escapeName$1(name) {
322
- const bytes = new TextEncoder().encode(name);
329
+ if (isSimpleAsciiName(name)) return name;
330
+ const bytes = textEncoder.encode(name);
323
331
  let result = "";
324
- for (const byte of bytes) if (byte < 33 || byte > 126 || NAME_NEEDS_ESCAPE.has(byte)) result += `#${byte.toString(16).toUpperCase().padStart(2, "0")}`;
332
+ for (const byte of bytes) if (byte < 33 || byte > 126 || NAME_NEEDS_ESCAPE.has(byte)) result += `#${HEX_TABLE[byte]}`;
325
333
  else result += String.fromCharCode(byte);
326
334
  return result;
327
335
  }
328
336
  /**
337
+ * Default cache size for PdfName interning.
338
+ * Can be overridden via PdfName.setCacheSize().
339
+ */
340
+ const DEFAULT_NAME_CACHE_SIZE = 1e4;
341
+ /**
329
342
  * PDF name object (interned).
330
343
  *
331
344
  * In PDF: `/Type`, `/Page`, `/Length`
@@ -340,7 +353,7 @@ var PdfName = class PdfName {
340
353
  get type() {
341
354
  return "name";
342
355
  }
343
- static cache = new LRUCache(DEFAULT_NAME_CACHE_SIZE);
356
+ static cache = new LRUCache({ max: DEFAULT_NAME_CACHE_SIZE });
344
357
  /**
345
358
  * Pre-cached common names that should never be evicted.
346
359
  * These are stored separately from the LRU cache.
@@ -359,6 +372,8 @@ var PdfName = class PdfName {
359
372
  static Length = PdfName.createPermanent("Length");
360
373
  static Filter = PdfName.createPermanent("Filter");
361
374
  static FlateDecode = PdfName.createPermanent("FlateDecode");
375
+ /** Cached serialized form (e.g. "/Type"). Computed lazily on first toBytes(). */
376
+ cachedBytes = null;
362
377
  constructor(value) {
363
378
  this.value = value;
364
379
  }
@@ -394,7 +409,13 @@ var PdfName = class PdfName {
394
409
  return PdfName.cache.size;
395
410
  }
396
411
  toBytes(writer) {
397
- writer.writeAscii(`/${escapeName$1(this.value)}`);
412
+ let bytes = this.cachedBytes;
413
+ if (bytes === null) {
414
+ const escaped = escapeName$1(this.value);
415
+ bytes = textEncoder.encode(`/${escaped}`);
416
+ this.cachedBytes = bytes;
417
+ }
418
+ writer.writeBytes(bytes);
398
419
  }
399
420
  /**
400
421
  * Create a permanent (non-evictable) name.
@@ -427,7 +448,7 @@ var PdfRef = class PdfRef {
427
448
  get type() {
428
449
  return "ref";
429
450
  }
430
- static cache = new LRUCache(DEFAULT_REF_CACHE_SIZE);
451
+ static cache = new LRUCache({ max: DEFAULT_REF_CACHE_SIZE });
431
452
  constructor(objectNumber, generation) {
432
453
  this.objectNumber = objectNumber;
433
454
  this.generation = generation;
@@ -2044,77 +2065,6 @@ var PdfStream = class PdfStream extends PdfDict {
2044
2065
  }
2045
2066
  };
2046
2067
 
2047
- //#endregion
2048
- //#region src/helpers/buffer.ts
2049
- /**
2050
- * Buffer utilities for working with ArrayBuffer and Uint8Array.
2051
- */
2052
- /**
2053
- * Ensure we have a proper ArrayBuffer (not SharedArrayBuffer or slice).
2054
- *
2055
- * Web Crypto APIs require a true ArrayBuffer, not a view into one.
2056
- *
2057
- * @param data - Uint8Array to convert
2058
- * @returns ArrayBuffer containing the data
2059
- */
2060
- function toArrayBuffer(data) {
2061
- if (data.buffer instanceof ArrayBuffer && data.byteOffset === 0 && data.byteLength === data.buffer.byteLength) return data.buffer;
2062
- return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
2063
- }
2064
- /**
2065
- * Concatenate multiple Uint8Arrays into a single Uint8Array.
2066
- *
2067
- * @param arrays - Arrays to concatenate
2068
- * @returns Single Uint8Array containing all data
2069
- */
2070
- function concatBytes(arrays) {
2071
- const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
2072
- const result = new Uint8Array(totalLength);
2073
- let offset = 0;
2074
- for (const arr of arrays) {
2075
- result.set(arr, offset);
2076
- offset += arr.length;
2077
- }
2078
- return result;
2079
- }
2080
- /**
2081
- * Convert bytes to uppercase hex string.
2082
- *
2083
- * @param bytes - Raw bytes
2084
- * @returns Hex string (e.g., "48656C6C6F")
2085
- *
2086
- * @example
2087
- * ```ts
2088
- * bytesToHex(new Uint8Array([72, 101, 108, 108, 111])) // "48656C6C6F"
2089
- * ```
2090
- */
2091
- function bytesToHex(bytes) {
2092
- let hex = "";
2093
- for (const byte of bytes) hex += byte.toString(16).toUpperCase().padStart(2, "0");
2094
- return hex;
2095
- }
2096
- /**
2097
- * Convert a hex string to bytes.
2098
- *
2099
- * Whitespace is ignored. Odd-length strings are padded with trailing 0.
2100
- *
2101
- * @param hex - Hex string (e.g., "48656C6C6F" or "48 65 6C 6C 6F")
2102
- * @returns Decoded bytes
2103
- *
2104
- * @example
2105
- * ```ts
2106
- * hexToBytes("48656C6C6F") // Uint8Array([72, 101, 108, 108, 111])
2107
- * hexToBytes("ABC") // Uint8Array([171, 192]) - padded to "ABC0"
2108
- * ```
2109
- */
2110
- function hexToBytes(hex) {
2111
- const clean = hex.replace(/\s/g, "");
2112
- const padded = clean.length % 2 === 1 ? `${clean}0` : clean;
2113
- const bytes = new Uint8Array(padded.length / 2);
2114
- for (let i = 0; i < bytes.length; i++) bytes[i] = Number.parseInt(padded.slice(i * 2, i * 2 + 2), 16);
2115
- return bytes;
2116
- }
2117
-
2118
2068
  //#endregion
2119
2069
  //#region src/content/operators.ts
2120
2070
  /**
@@ -24410,6 +24360,14 @@ function mergeBboxes(boxes) {
24410
24360
  //#endregion
24411
24361
  //#region src/text/line-grouper.ts
24412
24362
  /**
24363
+ * Minimum fraction of consecutive char pairs with decreasing x-positions
24364
+ * (in stream order) to classify a line as "RTL-placed".
24365
+ *
24366
+ * Figma/Canva exports produce ~100% decreasing pairs within words.
24367
+ * 80% tolerates small forward jumps at word boundaries.
24368
+ */
24369
+ const RTL_PLACED_THRESHOLD = .8;
24370
+ /**
24413
24371
  * Group extracted characters into lines and spans.
24414
24372
  *
24415
24373
  * @param chars - Array of extracted characters
@@ -24423,8 +24381,8 @@ function groupCharsIntoLines(chars, options = {}) {
24423
24381
  const lineGroups = groupByBaseline(chars, baselineTolerance);
24424
24382
  const lines = [];
24425
24383
  for (const group of lineGroups) {
24426
- const sorted = [...group].sort((a, b) => a.bbox.x - b.bbox.x);
24427
- const spans = groupIntoSpans(sorted, spaceThreshold);
24384
+ const { chars: sorted, rtlPlaced } = orderLineChars(group);
24385
+ const spans = groupIntoSpans(sorted, spaceThreshold, rtlPlaced);
24428
24386
  if (spans.length === 0) continue;
24429
24387
  const lineText = spans.map((s) => s.text).join("");
24430
24388
  const lineBbox = mergeBboxes(spans.map((s) => s.bbox));
@@ -24440,6 +24398,71 @@ function groupCharsIntoLines(chars, options = {}) {
24440
24398
  return lines;
24441
24399
  }
24442
24400
  /**
24401
+ * Determine the correct character order for a line.
24402
+ *
24403
+ * Design tools like Figma and Canva export PDFs where LTR characters are placed
24404
+ * right-to-left via TJ positioning adjustments (positive values move the pen left).
24405
+ * The font has near-zero glyph widths, so all positioning comes from TJ. Characters
24406
+ * appear in correct reading order in the content stream, but their x-positions
24407
+ * decrease monotonically.
24408
+ *
24409
+ * When this pattern is detected, we preserve content stream order instead of sorting
24410
+ * by x-position, which would reverse the text.
24411
+ *
24412
+ * **Limitation**: Detection requires `sequenceIndex` on every character. If any
24413
+ * character in the group lacks a `sequenceIndex`, we fall back to x-position sorting
24414
+ * because stream order cannot be reliably reconstructed.
24415
+ */
24416
+ function orderLineChars(group) {
24417
+ if (group.length <= 1) return {
24418
+ chars: [...group],
24419
+ rtlPlaced: false
24420
+ };
24421
+ if (!group.every((c) => c.sequenceIndex != null)) return {
24422
+ chars: [...group].sort((a, b) => a.bbox.x - b.bbox.x),
24423
+ rtlPlaced: false
24424
+ };
24425
+ const streamOrder = [...group].sort((a, b) => a.sequenceIndex - b.sequenceIndex);
24426
+ if (isRtlPlaced(streamOrder)) return {
24427
+ chars: streamOrder,
24428
+ rtlPlaced: true
24429
+ };
24430
+ return {
24431
+ chars: [...group].sort((a, b) => a.bbox.x - b.bbox.x),
24432
+ rtlPlaced: false
24433
+ };
24434
+ }
24435
+ /**
24436
+ * Detect whether characters are placed right-to-left in user space while
24437
+ * content stream order represents the correct reading order.
24438
+ *
24439
+ * Returns true when x-positions in stream order are predominantly decreasing
24440
+ * (≥ 80% of consecutive pairs). In that case, position-based sorting would
24441
+ * reverse the reading order, so we preserve stream order instead.
24442
+ *
24443
+ * This covers two real-world scenarios:
24444
+ * - **Design-tool PDFs** (Figma, Canva): LTR text placed right-to-left via
24445
+ * TJ positioning adjustments. Stream order = correct reading order.
24446
+ * - **Genuine RTL text** (Arabic, Hebrew): characters naturally placed
24447
+ * right-to-left. PDF producers typically emit them in reading order, so
24448
+ * stream order is again correct.
24449
+ *
24450
+ * In both cases, when x-positions decrease in stream order, preserving stream
24451
+ * order produces the correct reading order.
24452
+ *
24453
+ * **Known limitation**: mixed bidi text (e.g., Arabic with embedded English)
24454
+ * requires a full Unicode bidi algorithm, which is out of scope for this
24455
+ * heuristic. For mixed lines, neither stream order nor x-sort is fully
24456
+ * correct; a future bidi implementation should replace this heuristic.
24457
+ */
24458
+ function isRtlPlaced(streamOrder) {
24459
+ if (streamOrder.length < 2) return false;
24460
+ let decreasingCount = 0;
24461
+ for (let i = 1; i < streamOrder.length; i++) if (streamOrder[i].bbox.x < streamOrder[i - 1].bbox.x) decreasingCount++;
24462
+ const totalPairs = streamOrder.length - 1;
24463
+ return decreasingCount / totalPairs >= RTL_PLACED_THRESHOLD;
24464
+ }
24465
+ /**
24443
24466
  * Group characters by baseline Y coordinate.
24444
24467
  */
24445
24468
  function groupByBaseline(chars, tolerance) {
@@ -24461,7 +24484,7 @@ function groupByBaseline(chars, tolerance) {
24461
24484
  /**
24462
24485
  * Group characters into spans based on font/size and detect spaces.
24463
24486
  */
24464
- function groupIntoSpans(chars, spaceThreshold) {
24487
+ function groupIntoSpans(chars, spaceThreshold, rtlPlaced) {
24465
24488
  if (chars.length === 0) return [];
24466
24489
  const spans = [];
24467
24490
  let currentSpan = [chars[0]];
@@ -24471,14 +24494,14 @@ function groupIntoSpans(chars, spaceThreshold) {
24471
24494
  const prevChar = chars[i - 1];
24472
24495
  const char = chars[i];
24473
24496
  const fontChanged = char.fontName !== currentFontName || Math.abs(char.fontSize - currentFontSize) > .5;
24474
- const needsSpace = char.bbox.x - (prevChar.bbox.x + prevChar.bbox.width) > (prevChar.fontSize + char.fontSize) / 2 * spaceThreshold;
24497
+ const needsSpace = (rtlPlaced ? prevChar.bbox.x - (char.bbox.x + char.bbox.width) : char.bbox.x - (prevChar.bbox.x + prevChar.bbox.width)) > (prevChar.fontSize + char.fontSize) / 2 * spaceThreshold;
24475
24498
  if (fontChanged) {
24476
24499
  spans.push(buildSpan(currentSpan));
24477
24500
  currentSpan = [char];
24478
24501
  currentFontName = char.fontName;
24479
24502
  currentFontSize = char.fontSize;
24480
24503
  } else if (needsSpace) {
24481
- currentSpan.push(createSpaceChar(prevChar, char));
24504
+ currentSpan.push(createSpaceChar(prevChar, char, rtlPlaced));
24482
24505
  currentSpan.push(char);
24483
24506
  } else currentSpan.push(char);
24484
24507
  }
@@ -24503,9 +24526,9 @@ function buildSpan(chars) {
24503
24526
  /**
24504
24527
  * Create a synthetic space character between two characters.
24505
24528
  */
24506
- function createSpaceChar(before, after) {
24507
- const x = before.bbox.x + before.bbox.width;
24508
- const width = after.bbox.x - x;
24529
+ function createSpaceChar(before, after, rtlPlaced) {
24530
+ const x = rtlPlaced ? after.bbox.x + after.bbox.width : before.bbox.x + before.bbox.width;
24531
+ const width = rtlPlaced ? before.bbox.x - x : after.bbox.x - x;
24509
24532
  return {
24510
24533
  char: " ",
24511
24534
  bbox: {
@@ -24516,7 +24539,8 @@ function createSpaceChar(before, after) {
24516
24539
  },
24517
24540
  fontSize: (before.fontSize + after.fontSize) / 2,
24518
24541
  fontName: before.fontName,
24519
- baseline: (before.baseline + after.baseline) / 2
24542
+ baseline: (before.baseline + after.baseline) / 2,
24543
+ sequenceIndex: before.sequenceIndex != null ? before.sequenceIndex + .5 : void 0
24520
24544
  };
24521
24545
  }
24522
24546
  /**
@@ -26152,7 +26176,8 @@ var TextExtractor = class {
26152
26176
  },
26153
26177
  fontSize: this.state.effectiveFontSize,
26154
26178
  fontName: font.baseFontName,
26155
- baseline: bbox.baseline
26179
+ baseline: bbox.baseline,
26180
+ sequenceIndex: this.chars.length
26156
26181
  });
26157
26182
  const isSpace = char === " " || char === "\xA0";
26158
26183
  this.state.advanceChar(width, isSpace);
@@ -29131,7 +29156,7 @@ const INHERITABLE_PAGE_ATTRS = [
29131
29156
  * @example
29132
29157
  * ```typescript
29133
29158
  * const copier = new ObjectCopier(sourcePdf, destPdf);
29134
- * const copiedPageRef = await copier.copyPage(sourcePageRef);
29159
+ * const copiedPageRef = copier.copyPage(sourcePageRef);
29135
29160
  * destPdf.insertPage(0, copiedPageRef);
29136
29161
  * ```
29137
29162
  */
@@ -29161,14 +29186,14 @@ var ObjectCopier = class {
29161
29186
  * @param srcPageRef Reference to the page in source document
29162
29187
  * @returns Reference to the copied page in destination document
29163
29188
  */
29164
- async copyPage(srcPageRef) {
29189
+ copyPage(srcPageRef) {
29165
29190
  const srcPage = this.source.getObject(srcPageRef);
29166
29191
  if (!(srcPage instanceof PdfDict)) throw new Error(`Page object not found or not a dictionary: ${srcPageRef.objectNumber} ${srcPageRef.generation} R`);
29167
29192
  const cloned = srcPage.clone();
29168
29193
  for (const key$1 of INHERITABLE_PAGE_ATTRS) if (!cloned.has(key$1)) {
29169
29194
  const inherited = this.getInheritedAttribute(srcPage, key$1);
29170
29195
  if (inherited) {
29171
- const copied = await this.copyObject(inherited);
29196
+ const copied = this.copyObject(inherited);
29172
29197
  cloned.set(key$1, copied);
29173
29198
  }
29174
29199
  }
@@ -29177,17 +29202,17 @@ var ObjectCopier = class {
29177
29202
  if (!this.options.includeThumbnails) cloned.delete("Thumb");
29178
29203
  if (!this.options.includeStructure) cloned.delete("StructParents");
29179
29204
  cloned.delete("Parent");
29180
- const copiedPage = await this.copyDictValues(cloned);
29205
+ const copiedPage = this.copyDictValues(cloned);
29181
29206
  return this.dest.register(copiedPage);
29182
29207
  }
29183
29208
  /**
29184
29209
  * Deep copy any PDF object, remapping references to destination.
29185
29210
  */
29186
- async copyObject(obj) {
29187
- if (obj instanceof PdfRef) return await this.copyRef(obj);
29188
- if (obj instanceof PdfStream) return await this.copyStream(obj);
29189
- if (obj instanceof PdfDict) return await this.copyDict(obj);
29190
- if (obj instanceof PdfArray) return await this.copyArray(obj);
29211
+ copyObject(obj) {
29212
+ if (obj instanceof PdfRef) return this.copyRef(obj);
29213
+ if (obj instanceof PdfStream) return this.copyStream(obj);
29214
+ if (obj instanceof PdfDict) return this.copyDict(obj);
29215
+ if (obj instanceof PdfArray) return this.copyArray(obj);
29191
29216
  return obj;
29192
29217
  }
29193
29218
  /**
@@ -29196,7 +29221,7 @@ var ObjectCopier = class {
29196
29221
  * Handles circular references by registering a placeholder before
29197
29222
  * recursively copying the referenced object's contents.
29198
29223
  */
29199
- async copyRef(ref) {
29224
+ copyRef(ref) {
29200
29225
  const key$1 = `${ref.objectNumber}:${ref.generation}`;
29201
29226
  const existing = this.refMap.get(key$1);
29202
29227
  if (existing) return existing;
@@ -29210,7 +29235,7 @@ var ObjectCopier = class {
29210
29235
  if (srcObj instanceof PdfDict) return this.copyDictRef(key$1, srcObj);
29211
29236
  if (srcObj instanceof PdfArray) {
29212
29237
  const items = [];
29213
- for (const item of srcObj) items.push(await this.copyObject(item));
29238
+ for (const item of srcObj) items.push(this.copyObject(item));
29214
29239
  const copiedArr = new PdfArray(items);
29215
29240
  const destRef$1 = this.dest.register(copiedArr);
29216
29241
  this.refMap.set(key$1, destRef$1);
@@ -29223,17 +29248,17 @@ var ObjectCopier = class {
29223
29248
  /**
29224
29249
  * Copy a dict reference, handling circular references.
29225
29250
  */
29226
- async copyDictRef(key$1, srcDict) {
29251
+ copyDictRef(key$1, srcDict) {
29227
29252
  const cloned = srcDict.clone();
29228
29253
  const destRef = this.dest.register(cloned);
29229
29254
  this.refMap.set(key$1, destRef);
29230
- await this.copyDictValues(cloned);
29255
+ this.copyDictValues(cloned);
29231
29256
  return destRef;
29232
29257
  }
29233
29258
  /**
29234
29259
  * Copy a stream reference, handling circular references and encryption.
29235
29260
  */
29236
- async copyStreamRef(key$1, srcStream) {
29261
+ copyStreamRef(key$1, srcStream) {
29237
29262
  const sourceWasEncrypted = this.source.isEncrypted;
29238
29263
  const clonedDict = srcStream.clone();
29239
29264
  let streamData;
@@ -29268,7 +29293,7 @@ var ObjectCopier = class {
29268
29293
  const destRef = this.dest.register(copiedStream);
29269
29294
  this.refMap.set(key$1, destRef);
29270
29295
  for (const [entryKey, value] of clonedDict) {
29271
- const copied = await this.copyObject(value);
29296
+ const copied = this.copyObject(value);
29272
29297
  copiedStream.set(entryKey.value, copied);
29273
29298
  }
29274
29299
  return destRef;
@@ -29276,7 +29301,7 @@ var ObjectCopier = class {
29276
29301
  /**
29277
29302
  * Copy a dictionary, remapping all reference values.
29278
29303
  */
29279
- async copyDict(dict) {
29304
+ copyDict(dict) {
29280
29305
  const cloned = dict.clone();
29281
29306
  return this.copyDictValues(cloned);
29282
29307
  }
@@ -29284,9 +29309,9 @@ var ObjectCopier = class {
29284
29309
  * Copy all values in a dictionary, remapping references.
29285
29310
  * Modifies the dict in place and returns it.
29286
29311
  */
29287
- async copyDictValues(dict) {
29312
+ copyDictValues(dict) {
29288
29313
  for (const [key$1, value] of dict) {
29289
- const copied = await this.copyObject(value);
29314
+ const copied = this.copyObject(value);
29290
29315
  dict.set(key$1.value, copied);
29291
29316
  }
29292
29317
  return dict;
@@ -29294,9 +29319,9 @@ var ObjectCopier = class {
29294
29319
  /**
29295
29320
  * Copy an array, remapping all reference elements.
29296
29321
  */
29297
- async copyArray(arr) {
29322
+ copyArray(arr) {
29298
29323
  const items = [];
29299
- for (const item of arr) items.push(await this.copyObject(item));
29324
+ for (const item of arr) items.push(this.copyObject(item));
29300
29325
  return new PdfArray(items);
29301
29326
  }
29302
29327
  /**
@@ -29305,10 +29330,10 @@ var ObjectCopier = class {
29305
29330
  * If source wasn't encrypted, copies raw encoded bytes (fastest).
29306
29331
  * If source was encrypted, decodes and re-encodes with same filters.
29307
29332
  */
29308
- async copyStream(stream) {
29333
+ copyStream(stream) {
29309
29334
  const sourceWasEncrypted = this.source.isEncrypted;
29310
29335
  const clonedDict = stream.clone();
29311
- await this.copyDictValues(clonedDict);
29336
+ this.copyDictValues(clonedDict);
29312
29337
  if (!sourceWasEncrypted) return new PdfStream(clonedDict, stream.data);
29313
29338
  try {
29314
29339
  const decodedData = stream.getDecodedData();
@@ -30672,15 +30697,21 @@ function aesEncrypt(key$1, plaintext) {
30672
30697
  * @param key - 16 bytes (AES-128) or 32 bytes (AES-256)
30673
30698
  * @param data - IV (16 bytes) + ciphertext
30674
30699
  * @returns Decrypted plaintext
30675
- * @throws {Error} if data is too short or padding is invalid
30700
+ * @throws {Error} if data is too short to contain an IV
30676
30701
  */
30677
30702
  function aesDecrypt(key$1, data) {
30678
30703
  validateAesKey(key$1);
30679
30704
  if (data.length < AES_BLOCK_SIZE) throw new Error(`AES ciphertext too short: expected at least ${AES_BLOCK_SIZE} bytes for IV`);
30680
30705
  if (data.length === AES_BLOCK_SIZE) return new Uint8Array(0);
30681
30706
  const iv = data.subarray(0, AES_BLOCK_SIZE);
30682
- const ciphertext = data.subarray(AES_BLOCK_SIZE);
30683
- if (ciphertext.length % AES_BLOCK_SIZE !== 0) throw new Error(`AES ciphertext length must be multiple of ${AES_BLOCK_SIZE}, got ${ciphertext.length}`);
30707
+ let ciphertext = data.subarray(AES_BLOCK_SIZE);
30708
+ if (ciphertext.length % AES_BLOCK_SIZE !== 0) {
30709
+ const remainder = ciphertext.length % AES_BLOCK_SIZE;
30710
+ const aligned = ciphertext.length - remainder;
30711
+ console.warn(`AES ciphertext length (${ciphertext.length}) is not a multiple of ${AES_BLOCK_SIZE}, truncating ${remainder} trailing bytes`);
30712
+ if (aligned === 0) return new Uint8Array(0);
30713
+ ciphertext = ciphertext.subarray(0, aligned);
30714
+ }
30684
30715
  return cbc(key$1, iv).decrypt(ciphertext);
30685
30716
  }
30686
30717
  /**
@@ -33281,29 +33312,34 @@ var DocumentParser = class {
33281
33312
  * Decrypt an object's strings and stream data.
33282
33313
  */
33283
33314
  const decryptObject = (obj, objNum, genNum) => {
33284
- if (!securityHandler?.isAuthenticated) return obj;
33285
- if (obj instanceof PdfString) return new PdfString(securityHandler.decryptString(obj.bytes, objNum, genNum), obj.format);
33286
- if (obj instanceof PdfArray) {
33287
- const decryptedItems = [];
33288
- for (const item of obj) decryptedItems.push(decryptObject(item, objNum, genNum));
33289
- return new PdfArray(decryptedItems);
33290
- }
33291
- if (obj instanceof PdfStream) {
33292
- const streamType = obj.getName("Type")?.value;
33293
- if (!securityHandler.shouldEncryptStream(streamType)) return obj;
33294
- const newStream = new PdfStream(obj, securityHandler.decryptStream(obj.data, objNum, genNum));
33295
- for (const [key$1, value] of obj) {
33296
- const decryptedValue = decryptObject(value, objNum, genNum);
33297
- if (decryptedValue !== value) newStream.set(key$1.value, decryptedValue);
33315
+ try {
33316
+ if (!securityHandler?.isAuthenticated) return obj;
33317
+ if (obj instanceof PdfString) return new PdfString(securityHandler.decryptString(obj.bytes, objNum, genNum), obj.format);
33318
+ if (obj instanceof PdfArray) {
33319
+ const decryptedItems = [];
33320
+ for (const item of obj) decryptedItems.push(decryptObject(item, objNum, genNum));
33321
+ return new PdfArray(decryptedItems);
33298
33322
  }
33299
- return newStream;
33300
- }
33301
- if (obj instanceof PdfDict) {
33302
- const decryptedDict = new PdfDict();
33303
- for (const [key$1, value] of obj) decryptedDict.set(key$1.value, decryptObject(value, objNum, genNum));
33304
- return decryptedDict;
33323
+ if (obj instanceof PdfStream) {
33324
+ const streamType = obj.getName("Type")?.value;
33325
+ if (!securityHandler.shouldEncryptStream(streamType)) return obj;
33326
+ const newStream = new PdfStream(obj, securityHandler.decryptStream(obj.data, objNum, genNum));
33327
+ for (const [key$1, value] of obj) {
33328
+ const decryptedValue = decryptObject(value, objNum, genNum);
33329
+ if (decryptedValue !== value) newStream.set(key$1.value, decryptedValue);
33330
+ }
33331
+ return newStream;
33332
+ }
33333
+ if (obj instanceof PdfDict) {
33334
+ const decryptedDict = new PdfDict();
33335
+ for (const [key$1, value] of obj) decryptedDict.set(key$1.value, decryptObject(value, objNum, genNum));
33336
+ return decryptedDict;
33337
+ }
33338
+ return obj;
33339
+ } catch (error) {
33340
+ console.warn(`Failed to decrypt object ${objNum} ${genNum}:`, error);
33341
+ return obj;
33305
33342
  }
33306
- return obj;
33307
33343
  };
33308
33344
  const getObject = (ref) => {
33309
33345
  const key$1 = `${ref.objectNumber} ${ref.generation}`;
@@ -33781,11 +33817,12 @@ function writeIndirectObject(writer, ref, obj) {
33781
33817
  * Streams that already have filters are returned unchanged - this includes
33782
33818
  * image formats (DCTDecode, JPXDecode, etc.) that are already compressed.
33783
33819
  */
33784
- function prepareObjectForWrite(obj, compress) {
33820
+ const DEFAULT_COMPRESSION_THRESHOLD = 512;
33821
+ function prepareObjectForWrite(obj, compress, compressionThreshold) {
33785
33822
  if (!(obj instanceof PdfStream)) return obj;
33786
33823
  if (obj.has("Filter")) return obj;
33787
33824
  if (!compress) return obj;
33788
- if (obj.data.length === 0) return obj;
33825
+ if (obj.data.length < compressionThreshold) return obj;
33789
33826
  const compressed = FilterPipeline.encode(obj.data, { name: "FlateDecode" });
33790
33827
  if (compressed.length >= obj.data.length) return obj;
33791
33828
  const compressedStream = new PdfStream(obj, compressed);
@@ -33886,6 +33923,7 @@ function collectReachableRefs(registry, root, info, encrypt) {
33886
33923
  function writeComplete(registry, options) {
33887
33924
  const writer = new ByteWriter();
33888
33925
  const compress = options.compressStreams ?? true;
33926
+ const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD;
33889
33927
  const version$1 = options.version ?? "1.7";
33890
33928
  writer.writeAscii(`%PDF-${version$1}\n`);
33891
33929
  writer.writeBytes(new Uint8Array([
@@ -33901,7 +33939,7 @@ function writeComplete(registry, options) {
33901
33939
  for (const [ref, obj] of registry.entries()) {
33902
33940
  const key$1 = `${ref.objectNumber} ${ref.generation}`;
33903
33941
  if (!reachableKeys.has(key$1)) continue;
33904
- let prepared = prepareObjectForWrite(obj, compress);
33942
+ let prepared = prepareObjectForWrite(obj, compress, threshold);
33905
33943
  if (options.securityHandler && options.encrypt && ref !== options.encrypt) prepared = encryptObject(prepared, {
33906
33944
  handler: options.securityHandler,
33907
33945
  objectNumber: ref.objectNumber,
@@ -33986,12 +34024,13 @@ function writeIncremental(registry, options) {
33986
34024
  xrefOffset: options.originalXRefOffset
33987
34025
  };
33988
34026
  const compress = options.compressStreams ?? true;
34027
+ const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD;
33989
34028
  const writer = new ByteWriter(options.originalBytes);
33990
34029
  const lastByte = options.originalBytes[options.originalBytes.length - 1];
33991
34030
  if (lastByte !== LF && lastByte !== CR) writer.writeByte(10);
33992
34031
  const offsets = /* @__PURE__ */ new Map();
33993
34032
  for (const [ref, obj] of changes.modified) {
33994
- let prepared = prepareObjectForWrite(obj, compress);
34033
+ let prepared = prepareObjectForWrite(obj, compress, threshold);
33995
34034
  if (options.securityHandler && options.encrypt && ref !== options.encrypt) prepared = encryptObject(prepared, {
33996
34035
  handler: options.securityHandler,
33997
34036
  objectNumber: ref.objectNumber,
@@ -34004,7 +34043,7 @@ function writeIncremental(registry, options) {
34004
34043
  writeIndirectObject(writer, ref, prepared);
34005
34044
  }
34006
34045
  for (const [ref, obj] of changes.created) {
34007
- let prepared = prepareObjectForWrite(obj, compress);
34046
+ let prepared = prepareObjectForWrite(obj, compress, threshold);
34008
34047
  if (options.securityHandler && options.encrypt && ref !== options.encrypt) prepared = encryptObject(prepared, {
34009
34048
  handler: options.securityHandler,
34010
34049
  objectNumber: ref.objectNumber,
@@ -40849,7 +40888,7 @@ var PDF = class PDF {
40849
40888
  for (const index of indices) {
40850
40889
  const srcPage = source.getPage(index);
40851
40890
  if (!srcPage) throw new Error(`Source page ${index} not found`);
40852
- const copiedPageRef = await copier.copyPage(srcPage.ref);
40891
+ const copiedPageRef = copier.copyPage(srcPage.ref);
40853
40892
  copiedRefs.push(copiedPageRef);
40854
40893
  }
40855
40894
  let insertIndex = options.insertAt ?? this.getPageCount();
@@ -40925,7 +40964,7 @@ var PDF = class PDF {
40925
40964
  const srcResources = srcPage.dict.getDict("Resources", source.getObject.bind(source));
40926
40965
  let resources;
40927
40966
  if (srcResources) {
40928
- const copied = await copier.copyObject(srcResources);
40967
+ const copied = copier.copyObject(srcResources);
40929
40968
  resources = copied instanceof PdfDict ? copied : new PdfDict();
40930
40969
  } else resources = new PdfDict();
40931
40970
  const mediaBox = srcPage.getMediaBox();
@@ -41903,7 +41942,9 @@ var PDF = class PDF {
41903
41942
  encrypt: encryptRef,
41904
41943
  id: fileId,
41905
41944
  useXRefStream,
41906
- securityHandler
41945
+ securityHandler,
41946
+ compressStreams: options.compressStreams,
41947
+ compressionThreshold: options.compressionThreshold
41907
41948
  });
41908
41949
  this._pendingSecurity = { action: "none" };
41909
41950
  return result$1;
@@ -41915,7 +41956,9 @@ var PDF = class PDF {
41915
41956
  encrypt: encryptRef,
41916
41957
  id: fileId,
41917
41958
  useXRefStream,
41918
- securityHandler
41959
+ securityHandler,
41960
+ compressStreams: options.compressStreams,
41961
+ compressionThreshold: options.compressionThreshold
41919
41962
  });
41920
41963
  this._pendingSecurity = { action: "none" };
41921
41964
  return result;