@cj-tech-master/excelts 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +16 -1
  2. package/dist/browser/modules/archive/compression/crc32.js +1 -1
  3. package/dist/browser/modules/archive/crypto/aes.d.ts +0 -8
  4. package/dist/browser/modules/archive/crypto/aes.js +1 -20
  5. package/dist/browser/modules/archive/crypto/index.d.ts +2 -1
  6. package/dist/browser/modules/archive/crypto/index.js +3 -1
  7. package/dist/browser/modules/csv/parse/row-processor.d.ts +1 -1
  8. package/dist/browser/modules/csv/worker/worker-script.generated.js +1 -1
  9. package/dist/browser/modules/excel/utils/cell-matrix.js +1 -0
  10. package/dist/browser/modules/excel/utils/encryptor.browser.d.ts +4 -5
  11. package/dist/browser/modules/excel/utils/encryptor.browser.js +7 -12
  12. package/dist/browser/modules/excel/utils/encryptor.d.ts +1 -1
  13. package/dist/browser/modules/excel/utils/encryptor.js +4 -7
  14. package/dist/browser/modules/pdf/builder/document-builder.d.ts +517 -0
  15. package/dist/browser/modules/pdf/builder/document-builder.js +1493 -0
  16. package/dist/browser/modules/pdf/builder/form-appearance.d.ts +56 -0
  17. package/dist/browser/modules/pdf/builder/form-appearance.js +140 -0
  18. package/dist/browser/modules/pdf/builder/image-utils.d.ts +39 -0
  19. package/dist/browser/modules/pdf/builder/image-utils.js +129 -0
  20. package/dist/browser/modules/pdf/builder/pdf-editor.d.ts +230 -0
  21. package/dist/browser/modules/pdf/builder/pdf-editor.js +1574 -0
  22. package/dist/browser/modules/pdf/builder/resource-merger.d.ts +41 -0
  23. package/dist/browser/modules/pdf/builder/resource-merger.js +258 -0
  24. package/dist/browser/modules/pdf/core/digital-signature.d.ts +109 -0
  25. package/dist/browser/modules/pdf/core/digital-signature.js +659 -0
  26. package/dist/browser/modules/pdf/core/encryption.js +8 -7
  27. package/dist/browser/modules/pdf/core/pdf-object.d.ts +11 -0
  28. package/dist/browser/modules/pdf/core/pdf-object.js +38 -0
  29. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +32 -0
  30. package/dist/browser/modules/pdf/core/pdf-stream.js +66 -0
  31. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +55 -1
  32. package/dist/browser/modules/pdf/core/pdf-writer.js +271 -6
  33. package/dist/browser/modules/pdf/core/pdfa.d.ts +62 -0
  34. package/dist/browser/modules/pdf/core/pdfa.js +261 -0
  35. package/dist/browser/modules/pdf/index.d.ts +11 -0
  36. package/dist/browser/modules/pdf/index.js +9 -0
  37. package/dist/browser/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  38. package/dist/browser/modules/pdf/reader/bookmark-extractor.js +324 -0
  39. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +6 -5
  40. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +17 -0
  41. package/dist/browser/modules/pdf/reader/pdf-reader.js +26 -2
  42. package/dist/browser/modules/pdf/reader/table-extractor.d.ts +69 -0
  43. package/dist/browser/modules/pdf/reader/table-extractor.js +365 -0
  44. package/dist/browser/modules/pdf/render/layout-engine.d.ts +21 -1
  45. package/dist/browser/modules/pdf/render/layout-engine.js +112 -5
  46. package/dist/browser/modules/pdf/render/page-renderer.d.ts +2 -9
  47. package/dist/browser/modules/pdf/render/page-renderer.js +62 -103
  48. package/dist/browser/modules/pdf/render/pdf-exporter.js +2 -61
  49. package/dist/browser/modules/pdf/render/style-converter.d.ts +4 -0
  50. package/dist/browser/modules/pdf/render/style-converter.js +1 -1
  51. package/dist/browser/modules/pdf/types.d.ts +14 -1
  52. package/dist/browser/modules/stream/browser/readable.js +8 -2
  53. package/dist/browser/utils/crypto.browser.d.ts +64 -0
  54. package/dist/browser/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  55. package/dist/browser/utils/crypto.d.ts +97 -0
  56. package/dist/browser/utils/crypto.js +209 -0
  57. package/dist/cjs/modules/archive/compression/crc32.js +1 -1
  58. package/dist/cjs/modules/archive/crypto/aes.js +2 -23
  59. package/dist/cjs/modules/archive/crypto/index.js +3 -1
  60. package/dist/cjs/modules/csv/worker/worker-script.generated.js +1 -1
  61. package/dist/cjs/modules/excel/utils/cell-matrix.js +1 -0
  62. package/dist/cjs/modules/excel/utils/encryptor.browser.js +7 -12
  63. package/dist/cjs/modules/excel/utils/encryptor.js +4 -10
  64. package/dist/cjs/modules/pdf/builder/document-builder.js +1532 -0
  65. package/dist/cjs/modules/pdf/builder/form-appearance.js +145 -0
  66. package/dist/cjs/modules/pdf/builder/image-utils.js +135 -0
  67. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1612 -0
  68. package/dist/cjs/modules/pdf/builder/resource-merger.js +263 -0
  69. package/dist/cjs/modules/pdf/core/digital-signature.js +667 -0
  70. package/dist/cjs/modules/pdf/core/encryption.js +8 -7
  71. package/dist/cjs/modules/pdf/core/pdf-object.js +38 -0
  72. package/dist/cjs/modules/pdf/core/pdf-stream.js +66 -0
  73. package/dist/cjs/modules/pdf/core/pdf-writer.js +272 -6
  74. package/dist/cjs/modules/pdf/core/pdfa.js +266 -0
  75. package/dist/cjs/modules/pdf/index.js +19 -1
  76. package/dist/cjs/modules/pdf/reader/bookmark-extractor.js +327 -0
  77. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +6 -5
  78. package/dist/cjs/modules/pdf/reader/pdf-reader.js +26 -2
  79. package/dist/cjs/modules/pdf/reader/table-extractor.js +368 -0
  80. package/dist/cjs/modules/pdf/render/layout-engine.js +113 -4
  81. package/dist/cjs/modules/pdf/render/page-renderer.js +63 -105
  82. package/dist/cjs/modules/pdf/render/pdf-exporter.js +3 -62
  83. package/dist/cjs/modules/pdf/render/style-converter.js +1 -0
  84. package/dist/cjs/modules/stream/browser/readable.js +8 -2
  85. package/dist/cjs/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +95 -102
  86. package/dist/cjs/utils/crypto.js +228 -0
  87. package/dist/esm/modules/archive/compression/crc32.js +1 -1
  88. package/dist/esm/modules/archive/crypto/aes.js +1 -20
  89. package/dist/esm/modules/archive/crypto/index.js +3 -1
  90. package/dist/esm/modules/csv/worker/worker-script.generated.js +1 -1
  91. package/dist/esm/modules/excel/utils/cell-matrix.js +1 -0
  92. package/dist/esm/modules/excel/utils/encryptor.browser.js +7 -12
  93. package/dist/esm/modules/excel/utils/encryptor.js +4 -7
  94. package/dist/esm/modules/pdf/builder/document-builder.js +1493 -0
  95. package/dist/esm/modules/pdf/builder/form-appearance.js +140 -0
  96. package/dist/esm/modules/pdf/builder/image-utils.js +129 -0
  97. package/dist/esm/modules/pdf/builder/pdf-editor.js +1574 -0
  98. package/dist/esm/modules/pdf/builder/resource-merger.js +258 -0
  99. package/dist/esm/modules/pdf/core/digital-signature.js +659 -0
  100. package/dist/esm/modules/pdf/core/encryption.js +8 -7
  101. package/dist/esm/modules/pdf/core/pdf-object.js +38 -0
  102. package/dist/esm/modules/pdf/core/pdf-stream.js +66 -0
  103. package/dist/esm/modules/pdf/core/pdf-writer.js +271 -6
  104. package/dist/esm/modules/pdf/core/pdfa.js +261 -0
  105. package/dist/esm/modules/pdf/index.js +9 -0
  106. package/dist/esm/modules/pdf/reader/bookmark-extractor.js +324 -0
  107. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +6 -5
  108. package/dist/esm/modules/pdf/reader/pdf-reader.js +26 -2
  109. package/dist/esm/modules/pdf/reader/table-extractor.js +365 -0
  110. package/dist/esm/modules/pdf/render/layout-engine.js +112 -5
  111. package/dist/esm/modules/pdf/render/page-renderer.js +62 -103
  112. package/dist/esm/modules/pdf/render/pdf-exporter.js +2 -61
  113. package/dist/esm/modules/pdf/render/style-converter.js +1 -1
  114. package/dist/esm/modules/stream/browser/readable.js +8 -2
  115. package/dist/esm/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  116. package/dist/esm/utils/crypto.js +209 -0
  117. package/dist/iife/excelts.iife.js +1248 -1074
  118. package/dist/iife/excelts.iife.js.map +1 -1
  119. package/dist/iife/excelts.iife.min.js +53 -54
  120. package/dist/types/modules/archive/crypto/aes.d.ts +0 -8
  121. package/dist/types/modules/archive/crypto/index.d.ts +2 -1
  122. package/dist/types/modules/csv/parse/row-processor.d.ts +1 -1
  123. package/dist/types/modules/excel/utils/encryptor.browser.d.ts +4 -5
  124. package/dist/types/modules/excel/utils/encryptor.d.ts +1 -1
  125. package/dist/types/modules/pdf/builder/document-builder.d.ts +517 -0
  126. package/dist/types/modules/pdf/builder/form-appearance.d.ts +56 -0
  127. package/dist/types/modules/pdf/builder/image-utils.d.ts +39 -0
  128. package/dist/types/modules/pdf/builder/pdf-editor.d.ts +230 -0
  129. package/dist/types/modules/pdf/builder/resource-merger.d.ts +41 -0
  130. package/dist/types/modules/pdf/core/digital-signature.d.ts +109 -0
  131. package/dist/types/modules/pdf/core/pdf-object.d.ts +11 -0
  132. package/dist/types/modules/pdf/core/pdf-stream.d.ts +32 -0
  133. package/dist/types/modules/pdf/core/pdf-writer.d.ts +55 -1
  134. package/dist/types/modules/pdf/core/pdfa.d.ts +62 -0
  135. package/dist/types/modules/pdf/index.d.ts +11 -0
  136. package/dist/types/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  137. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +17 -0
  138. package/dist/types/modules/pdf/reader/table-extractor.d.ts +69 -0
  139. package/dist/types/modules/pdf/render/layout-engine.d.ts +21 -1
  140. package/dist/types/modules/pdf/render/page-renderer.d.ts +2 -9
  141. package/dist/types/modules/pdf/render/style-converter.d.ts +4 -0
  142. package/dist/types/modules/pdf/types.d.ts +14 -1
  143. package/dist/types/utils/crypto.browser.d.ts +64 -0
  144. package/dist/types/utils/crypto.d.ts +97 -0
  145. package/package.json +110 -111
  146. package/dist/browser/modules/pdf/core/crypto.d.ts +0 -65
  147. package/dist/types/modules/pdf/core/crypto.d.ts +0 -65
@@ -142,12 +142,37 @@ function encodePdfUtf16String(value) {
142
142
  export class PdfDict {
143
143
  constructor() {
144
144
  this.entries = [];
145
+ /** When set, toString() returns this raw string. set() appends/replaces entries within it. */
146
+ this._raw = null;
147
+ }
148
+ /**
149
+ * Create a PdfDict that wraps a pre-serialized dictionary string.
150
+ * toString() returns the raw string (with any set() overrides applied).
151
+ */
152
+ static fromRawString(raw) {
153
+ const d = new PdfDict();
154
+ d._raw = raw;
155
+ return d;
145
156
  }
146
157
  /**
147
158
  * Set a dictionary entry. The key should NOT include the leading /.
148
159
  * The value should be a pre-serialized PDF value string.
149
160
  */
150
161
  set(key, value) {
162
+ if (this._raw !== null) {
163
+ // Update or append the entry in the raw string.
164
+ // Use a regex that matches /Key followed by a simple token value (number, name, ref).
165
+ // This is safe for keys like /Length, /Filter which have simple values.
166
+ const keyPattern = new RegExp(`(/${key})\\s+\\S+(?:\\s+\\d+\\s+R)?`);
167
+ if (keyPattern.test(this._raw)) {
168
+ this._raw = this._raw.replace(keyPattern, `/${key} ${value}`);
169
+ }
170
+ else {
171
+ // Append before the closing >>
172
+ this._raw = this._raw.replace(/>>$/, `\n${pdfName(key)} ${value}\n>>`);
173
+ }
174
+ return this;
175
+ }
151
176
  const idx = this.entries.findIndex(([k]) => k === key);
152
177
  if (idx >= 0) {
153
178
  this.entries[idx] = [key, value];
@@ -166,10 +191,23 @@ export class PdfDict {
166
191
  }
167
192
  return this;
168
193
  }
194
+ /**
195
+ * Remove a dictionary entry by key.
196
+ */
197
+ delete(key) {
198
+ const idx = this.entries.findIndex(([k]) => k === key);
199
+ if (idx >= 0) {
200
+ this.entries.splice(idx, 1);
201
+ }
202
+ return this;
203
+ }
169
204
  /**
170
205
  * Serialize to a PDF dictionary string.
171
206
  */
172
207
  toString() {
208
+ if (this._raw !== null) {
209
+ return this._raw;
210
+ }
173
211
  const parts = ["<<"];
174
212
  for (const [key, value] of this.entries) {
175
213
  parts.push(`${pdfName(key)} ${value}`);
@@ -129,6 +129,30 @@ export class PdfContentStream {
129
129
  this.parts.push(`${pdfNumber(x)} ${pdfNumber(y)} ${pdfNumber(width)} ${pdfNumber(height)} re`);
130
130
  return this;
131
131
  }
132
+ /**
133
+ * Append a cubic Bezier curve to the current path.
134
+ * From current point to (x3, y3), with control points (x1, y1) and (x2, y2).
135
+ */
136
+ curveTo(x1, y1, x2, y2, x3, y3) {
137
+ this.parts.push(`${pdfNumber(x1)} ${pdfNumber(y1)} ${pdfNumber(x2)} ${pdfNumber(y2)} ${pdfNumber(x3)} ${pdfNumber(y3)} c`);
138
+ return this;
139
+ }
140
+ /**
141
+ * Append a cubic Bezier curve where the first control point is the current point.
142
+ * From current point to (x3, y3), with control points (current, y1) and (x2, y2).
143
+ */
144
+ curveToV(x2, y2, x3, y3) {
145
+ this.parts.push(`${pdfNumber(x2)} ${pdfNumber(y2)} ${pdfNumber(x3)} ${pdfNumber(y3)} v`);
146
+ return this;
147
+ }
148
+ /**
149
+ * Append a cubic Bezier curve where the second control point equals (x3, y3).
150
+ * From current point to (x3, y3), with control point (x1, y1).
151
+ */
152
+ curveToY(x1, y1, x3, y3) {
153
+ this.parts.push(`${pdfNumber(x1)} ${pdfNumber(y1)} ${pdfNumber(x3)} ${pdfNumber(y3)} y`);
154
+ return this;
155
+ }
132
156
  // ===========================================================================
133
157
  // Path Painting
134
158
  // ===========================================================================
@@ -354,6 +378,48 @@ export class PdfContentStream {
354
378
  }
355
379
  return this.moveTo(x1, y1).lineTo(x2, y2).stroke().restore();
356
380
  }
381
+ /**
382
+ * Append an ellipse to the current path using 4 cubic Bezier curves.
383
+ * (cx, cy) is the center; rx, ry are the radii.
384
+ *
385
+ * Uses the standard kappa = 4 * (sqrt(2) - 1) / 3 ≈ 0.5522847 approximation.
386
+ */
387
+ ellipse(cx, cy, rx, ry) {
388
+ const k = 0.5522847;
389
+ const kx = k * rx;
390
+ const ky = k * ry;
391
+ this.moveTo(cx + rx, cy);
392
+ this.curveTo(cx + rx, cy + ky, cx + kx, cy + ry, cx, cy + ry);
393
+ this.curveTo(cx - kx, cy + ry, cx - rx, cy + ky, cx - rx, cy);
394
+ this.curveTo(cx - rx, cy - ky, cx - kx, cy - ry, cx, cy - ry);
395
+ this.curveTo(cx + kx, cy - ry, cx + rx, cy - ky, cx + rx, cy);
396
+ return this;
397
+ }
398
+ /**
399
+ * Append a circle to the current path.
400
+ * (cx, cy) is the center; r is the radius.
401
+ */
402
+ circle(cx, cy, r) {
403
+ return this.ellipse(cx, cy, r, r);
404
+ }
405
+ /**
406
+ * Append a rounded rectangle to the current path.
407
+ * (x, y) is the lower-left corner; r is the corner radius.
408
+ */
409
+ roundedRect(x, y, width, height, r) {
410
+ const k = 0.5522847;
411
+ const kr = k * r;
412
+ this.moveTo(x + r, y);
413
+ this.lineTo(x + width - r, y);
414
+ this.curveTo(x + width - r + kr, y, x + width, y + r - kr, x + width, y + r);
415
+ this.lineTo(x + width, y + height - r);
416
+ this.curveTo(x + width, y + height - r + kr, x + width - r + kr, y + height, x + width - r, y + height);
417
+ this.lineTo(x + r, y + height);
418
+ this.curveTo(x + r - kr, y + height, x, y + height - r + kr, x, y + height - r);
419
+ this.lineTo(x, y + r);
420
+ this.curveTo(x, y + r - kr, x + r - kr, y, x + r, y);
421
+ return this;
422
+ }
357
423
  // ===========================================================================
358
424
  // Serialization
359
425
  // ===========================================================================
@@ -8,6 +8,9 @@
8
8
  * 3. Cross-reference table
9
9
  * 4. Trailer (with document catalog reference)
10
10
  *
11
+ * Also provides {@link buildIncremental} for appending incremental updates
12
+ * to an existing PDF without rewriting the original bytes.
13
+ *
11
14
  * Encryption uses AES-256 (V=5, R=5) per ISO 32000-2:2020.
12
15
  *
13
16
  * @see ISO 32000-2:2020, Chapter 7.5 — File Structure
@@ -35,6 +38,14 @@ export class PdfWriter {
35
38
  this.catalogRef = 0;
36
39
  this.infoRef = 0;
37
40
  this.encryption = null;
41
+ this.pdfVersion = "2.0";
42
+ }
43
+ /**
44
+ * Set the PDF version string (e.g. "1.4", "1.7", "2.0").
45
+ * Default is "2.0".
46
+ */
47
+ setVersion(version) {
48
+ this.pdfVersion = version;
38
49
  }
39
50
  /**
40
51
  * Enable encryption for this document.
@@ -64,10 +75,11 @@ export class PdfWriter {
64
75
  content: typeof dict === "string" ? dict : dict.toString()
65
76
  });
66
77
  }
67
- addStreamObject(objectNumber, dict, data) {
78
+ addStreamObject(objectNumber, dict, data, options) {
68
79
  let streamData = data instanceof Uint8Array ? data : data.toUint8Array();
80
+ const compress = options?.compress ?? true;
69
81
  // Compress with zlib (RFC 1950) for PDF /FlateDecode
70
- if (streamData.length > 256 && !dict.toString().includes("/Filter")) {
82
+ if (compress && streamData.length > 256 && !dict.toString().includes("/Filter")) {
71
83
  const compressed = zlibSync(streamData, { level: 6 });
72
84
  if (compressed.length < streamData.length) {
73
85
  dict.set("Filter", "/FlateDecode");
@@ -86,6 +98,22 @@ export class PdfWriter {
86
98
  streamData
87
99
  });
88
100
  }
101
+ /**
102
+ * Return all stored objects for inspection (e.g., incremental update remapping).
103
+ * Stream objects include their binary data.
104
+ */
105
+ getObjects() {
106
+ return this.objects.map(o => {
107
+ const result = {
108
+ objectNumber: o.objectNumber,
109
+ content: o.content
110
+ };
111
+ if ("streamData" in o) {
112
+ result.streamData = o.streamData;
113
+ }
114
+ return result;
115
+ });
116
+ }
89
117
  /**
90
118
  * Set the document catalog object number.
91
119
  * This is required and references the root of the document structure.
@@ -144,14 +172,25 @@ export class PdfWriter {
144
172
  }
145
173
  /**
146
174
  * Create and add the Catalog dictionary.
175
+ *
176
+ * @param pagesRef - Object number of the Pages tree root
177
+ * @param optionsOrOutlinesRef - Either an outlinesRef number (legacy) or an options object
147
178
  */
148
- addCatalog(pagesRef, outlinesRef) {
179
+ addCatalog(pagesRef, optionsOrOutlinesRef) {
180
+ const resolvedOptions = typeof optionsOrOutlinesRef === "number"
181
+ ? { outlinesRef: optionsOrOutlinesRef }
182
+ : (optionsOrOutlinesRef ?? {});
149
183
  const objNum = this.allocObject();
150
184
  const dict = new PdfDict().set("Type", "/Catalog").set("Pages", pdfRef(pagesRef));
151
- if (outlinesRef) {
152
- dict.set("Outlines", pdfRef(outlinesRef));
185
+ if (resolvedOptions.outlinesRef) {
186
+ dict.set("Outlines", pdfRef(resolvedOptions.outlinesRef));
153
187
  dict.set("PageMode", "/UseOutlines");
154
188
  }
189
+ if (resolvedOptions.extraEntries) {
190
+ for (const [key, value] of resolvedOptions.extraEntries) {
191
+ dict.set(key, value);
192
+ }
193
+ }
155
194
  this.addObject(objNum, dict);
156
195
  this.setCatalog(objNum);
157
196
  return objNum;
@@ -171,7 +210,7 @@ export class PdfWriter {
171
210
  let byteOffset = 0;
172
211
  // --- Header ---
173
212
  // Include a comment with high bytes to signal binary content per PDF spec §3.4.1
174
- const headerStr = "%PDF-2.0\n";
213
+ const headerStr = `%PDF-${this.pdfVersion}\n`;
175
214
  const headerStrBytes = encoder.encode(headerStr);
176
215
  chunks.push(headerStrBytes);
177
216
  byteOffset += headerStrBytes.length;
@@ -460,3 +499,229 @@ function unescapePdfString(value) {
460
499
  }
461
500
  return result;
462
501
  }
502
+ // =============================================================================
503
+ // Incremental Update
504
+ // =============================================================================
505
+ /**
506
+ * Build an incremental update that appends new/modified objects to an
507
+ * existing PDF without rewriting the original bytes.
508
+ *
509
+ * The result is `originalData + "\n" + new objects + xref + trailer + %%EOF`.
510
+ * The new trailer's `/Prev` points to the original xref offset so that PDF
511
+ * readers can follow the chain of incremental updates.
512
+ *
513
+ * @param originalData - The original, unmodified PDF bytes (preserved byte-for-byte)
514
+ * @param modifiedObjects - Map of object number → serialized content.
515
+ * Values are either a plain string (for non-stream objects) or
516
+ * `{ dict, data }` for stream objects.
517
+ * @param newTrailerEntries - Additional/override entries for the new trailer.
518
+ * Keys like `/Root`, `/Info`, `/Encrypt`, `/ID` are preserved from the
519
+ * original trailer by default but can be overridden here.
520
+ *
521
+ * @see ISO 32000-2:2020, §7.5.6 — Incremental Updates
522
+ */
523
+ export function buildIncremental(originalData, modifiedObjects, newTrailerEntries) {
524
+ if (modifiedObjects.size === 0) {
525
+ return originalData;
526
+ }
527
+ const encoder = new TextEncoder();
528
+ // --- Locate the original startxref offset ---
529
+ const oldXrefOffset = findOriginalXrefOffset(originalData);
530
+ // --- Extract original trailer entries we want to preserve ---
531
+ const originalTrailerEntries = extractOriginalTrailerEntries(originalData);
532
+ // --- Determine /Size for the new trailer ---
533
+ // /Size must be one more than the highest object number across original + new
534
+ const originalSize = originalTrailerEntries.get("Size") ?? "0";
535
+ let maxObjNum = parseInt(originalSize, 10) - 1;
536
+ for (const objNum of modifiedObjects.keys()) {
537
+ if (objNum > maxObjNum) {
538
+ maxObjNum = objNum;
539
+ }
540
+ }
541
+ const newSize = maxObjNum + 1;
542
+ // --- Build the appended body ---
543
+ const chunks = [];
544
+ let byteOffset = originalData.length;
545
+ // Start with a newline separator after the original %%EOF
546
+ const separator = encoder.encode("\n");
547
+ chunks.push(separator);
548
+ byteOffset += separator.length;
549
+ // Sort modified objects by object number for deterministic output
550
+ const sortedObjects = [...modifiedObjects.entries()].sort((a, b) => a[0] - b[0]);
551
+ // Track offsets for the xref entries
552
+ const objectOffsets = new Map();
553
+ for (const [objNum, content] of sortedObjects) {
554
+ objectOffsets.set(objNum, byteOffset);
555
+ const objHeader = encoder.encode(`${objNum} 0 obj\n`);
556
+ chunks.push(objHeader);
557
+ byteOffset += objHeader.length;
558
+ if (typeof content === "string") {
559
+ // Non-stream object
560
+ const contentBytes = encoder.encode(content + "\n");
561
+ chunks.push(contentBytes);
562
+ byteOffset += contentBytes.length;
563
+ }
564
+ else {
565
+ // Stream object: dict + stream data
566
+ let streamData = content.data;
567
+ const dict = content.dict;
568
+ // Compress if beneficial and not already filtered
569
+ if (streamData.length > 256 && !dict.toString().includes("/Filter")) {
570
+ const compressed = zlibSync(streamData, { level: 6 });
571
+ if (compressed.length < streamData.length) {
572
+ dict.set("Filter", "/FlateDecode");
573
+ streamData = compressed;
574
+ }
575
+ }
576
+ dict.set("Length", pdfNumber(streamData.length));
577
+ const dictBytes = encoder.encode(dict.toString() + "\n");
578
+ chunks.push(dictBytes);
579
+ byteOffset += dictBytes.length;
580
+ const streamStart = encoder.encode("stream\n");
581
+ chunks.push(streamStart);
582
+ byteOffset += streamStart.length;
583
+ chunks.push(streamData);
584
+ byteOffset += streamData.length;
585
+ const streamEnd = encoder.encode("\nendstream\n");
586
+ chunks.push(streamEnd);
587
+ byteOffset += streamEnd.length;
588
+ }
589
+ const objFooter = encoder.encode("endobj\n");
590
+ chunks.push(objFooter);
591
+ byteOffset += objFooter.length;
592
+ }
593
+ // --- Build the new xref section ---
594
+ const xrefOffset = byteOffset;
595
+ // Group consecutive object numbers into subsections
596
+ const objNums = [...objectOffsets.keys()].sort((a, b) => a - b);
597
+ const subsections = [];
598
+ for (const objNum of objNums) {
599
+ const last = subsections[subsections.length - 1];
600
+ if (last && objNum === last.start + last.entries.length) {
601
+ // Consecutive — extend current subsection
602
+ last.entries.push({ objNum, offset: objectOffsets.get(objNum) });
603
+ }
604
+ else {
605
+ // New subsection
606
+ subsections.push({
607
+ start: objNum,
608
+ entries: [{ objNum, offset: objectOffsets.get(objNum) }]
609
+ });
610
+ }
611
+ }
612
+ let xrefStr = "xref\n";
613
+ for (const sub of subsections) {
614
+ xrefStr += `${sub.start} ${sub.entries.length}\n`;
615
+ for (const entry of sub.entries) {
616
+ const offsetStr = entry.offset.toString().padStart(10, "0");
617
+ xrefStr += `${offsetStr} 00000 n \n`;
618
+ }
619
+ }
620
+ const xrefBytes = encoder.encode(xrefStr);
621
+ chunks.push(xrefBytes);
622
+ // --- Build the new trailer ---
623
+ let trailerStr = "trailer\n<<\n";
624
+ trailerStr += `/Size ${newSize}\n`;
625
+ // Preserve original trailer keys: Root, Info, Encrypt, ID
626
+ for (const key of ["Root", "Info", "Encrypt", "ID"]) {
627
+ if (newTrailerEntries.has(key)) {
628
+ trailerStr += `/${key} ${newTrailerEntries.get(key)}\n`;
629
+ }
630
+ else if (originalTrailerEntries.has(key)) {
631
+ trailerStr += `/${key} ${originalTrailerEntries.get(key)}\n`;
632
+ }
633
+ }
634
+ // Add any extra new trailer entries not already handled
635
+ for (const [key, value] of newTrailerEntries) {
636
+ if (key === "Root" || key === "Info" || key === "Encrypt" || key === "ID" || key === "Size") {
637
+ continue; // Already handled above
638
+ }
639
+ trailerStr += `/${key} ${value}\n`;
640
+ }
641
+ // /Prev points to the original xref offset
642
+ trailerStr += `/Prev ${oldXrefOffset}\n`;
643
+ trailerStr += ">>\n";
644
+ trailerStr += "startxref\n";
645
+ trailerStr += `${xrefOffset}\n`;
646
+ trailerStr += "%%EOF\n";
647
+ const trailerBytes = encoder.encode(trailerStr);
648
+ chunks.push(trailerBytes);
649
+ // --- Concatenate: originalData + appended chunks ---
650
+ return concatUint8Arrays([originalData, ...chunks]);
651
+ }
652
+ /**
653
+ * Find the xref offset stored after the last `startxref` keyword in the PDF.
654
+ */
655
+ function findOriginalXrefOffset(data) {
656
+ // Scan backward from the end to find "startxref"
657
+ const keyword = "startxref";
658
+ const decoder = new TextDecoder("latin1");
659
+ // Search in the last 1024 bytes (%%EOF + startxref are typically near the end)
660
+ const searchStart = Math.max(0, data.length - 1024);
661
+ const tail = decoder.decode(data.subarray(searchStart));
662
+ const idx = tail.lastIndexOf(keyword);
663
+ if (idx < 0) {
664
+ throw new PdfStructureError("Could not find startxref in original PDF");
665
+ }
666
+ // Extract the number after "startxref"
667
+ const afterKeyword = tail.substring(idx + keyword.length).trim();
668
+ const match = afterKeyword.match(/^(\d+)/);
669
+ if (!match) {
670
+ throw new PdfStructureError("Invalid startxref offset in original PDF");
671
+ }
672
+ return parseInt(match[1], 10);
673
+ }
674
+ /**
675
+ * Extract key trailer entries from the original PDF as serialized strings.
676
+ * This is a lightweight scan — it doesn't fully parse the trailer, just
677
+ * extracts the values we need for preservation.
678
+ */
679
+ function extractOriginalTrailerEntries(data) {
680
+ const entries = new Map();
681
+ const decoder = new TextDecoder("latin1");
682
+ // Find the last "trailer" keyword — scan backward
683
+ const text = decoder.decode(data);
684
+ // Find the last trailer dict. For PDFs with incremental updates,
685
+ // we want the most recent (last) trailer.
686
+ const trailerIdx = text.lastIndexOf("trailer");
687
+ if (trailerIdx < 0) {
688
+ // Could be an xref stream PDF — no traditional trailer
689
+ return entries;
690
+ }
691
+ // Find the << >> dict after "trailer"
692
+ const afterTrailer = text.substring(trailerIdx + 7);
693
+ const dictStart = afterTrailer.indexOf("<<");
694
+ if (dictStart < 0) {
695
+ return entries;
696
+ }
697
+ // Find the matching >>
698
+ let depth = 0;
699
+ let dictEnd = -1;
700
+ for (let i = dictStart; i < afterTrailer.length - 1; i++) {
701
+ if (afterTrailer[i] === "<" && afterTrailer[i + 1] === "<") {
702
+ depth++;
703
+ i++;
704
+ }
705
+ else if (afterTrailer[i] === ">" && afterTrailer[i + 1] === ">") {
706
+ depth--;
707
+ i++;
708
+ if (depth === 0) {
709
+ dictEnd = i + 1;
710
+ break;
711
+ }
712
+ }
713
+ }
714
+ if (dictEnd < 0) {
715
+ return entries;
716
+ }
717
+ const dictStr = afterTrailer.substring(dictStart, dictEnd);
718
+ // Extract known keys with a simple regex-based approach
719
+ for (const key of ["Root", "Info", "Encrypt", "ID", "Size"]) {
720
+ const keyPattern = new RegExp(`/${key}\\s+(.+?)(?=\\s*/[A-Z]|\\s*>>)`, "s");
721
+ const match = dictStr.match(keyPattern);
722
+ if (match) {
723
+ entries.set(key, match[1].trim());
724
+ }
725
+ }
726
+ return entries;
727
+ }