@cj-tech-master/excelts 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +14 -1
  2. package/README_zh.md +6 -0
  3. package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
  4. package/dist/browser/modules/archive/zip/stream.js +53 -0
  5. package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
  6. package/dist/browser/modules/pdf/core/crypto.js +637 -0
  7. package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
  8. package/dist/browser/modules/pdf/core/encryption.js +88 -261
  9. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
  10. package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
  11. package/dist/browser/modules/pdf/index.d.ts +23 -2
  12. package/dist/browser/modules/pdf/index.js +21 -3
  13. package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  14. package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
  15. package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
  16. package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
  17. package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
  18. package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
  19. package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
  20. package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
  21. package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
  22. package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
  23. package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
  24. package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
  25. package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
  26. package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
  27. package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  28. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
  29. package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
  30. package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
  31. package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
  32. package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
  33. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
  34. package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
  35. package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  36. package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
  37. package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
  38. package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
  39. package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
  40. package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
  41. package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  42. package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
  43. package/dist/cjs/modules/archive/zip/stream.js +53 -0
  44. package/dist/cjs/modules/pdf/core/crypto.js +649 -0
  45. package/dist/cjs/modules/pdf/core/encryption.js +88 -263
  46. package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
  47. package/dist/cjs/modules/pdf/index.js +23 -4
  48. package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
  49. package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
  50. package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
  51. package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
  52. package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
  53. package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
  54. package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
  55. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
  56. package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
  57. package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
  58. package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
  59. package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
  60. package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
  61. package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
  62. package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
  63. package/dist/esm/modules/archive/zip/stream.js +53 -0
  64. package/dist/esm/modules/pdf/core/crypto.js +637 -0
  65. package/dist/esm/modules/pdf/core/encryption.js +88 -261
  66. package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
  67. package/dist/esm/modules/pdf/index.js +21 -3
  68. package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
  69. package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
  70. package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
  71. package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
  72. package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
  73. package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
  74. package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
  75. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
  76. package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
  77. package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
  78. package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
  79. package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
  80. package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
  81. package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
  82. package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
  83. package/dist/iife/excelts.iife.js +703 -267
  84. package/dist/iife/excelts.iife.js.map +1 -1
  85. package/dist/iife/excelts.iife.min.js +35 -35
  86. package/dist/types/modules/archive/zip/stream.d.ts +4 -0
  87. package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
  88. package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
  89. package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
  90. package/dist/types/modules/pdf/index.d.ts +23 -2
  91. package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  92. package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
  93. package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
  94. package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
  95. package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
  96. package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
  97. package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
  98. package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  99. package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
  100. package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
  101. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
  102. package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  103. package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
  104. package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
  105. package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  106. package/package.json +1 -1
package/README.md CHANGED
@@ -14,6 +14,12 @@ ExcelTS is a zero-dependency TypeScript toolkit for spreadsheets and documents:
14
14
  - **Cross-Platform** — Node.js 22+, Bun, Chrome 89+, Firefox 102+, Safari 14.1+
15
15
  - **ESM First** — Native ES Modules with CommonJS compatibility and full tree-shaking
16
16
 
17
+ ## Motivation
18
+
19
+ The TypeScript ecosystem is heavily fragmented when it comes to document and data processing. To handle Excel, PDF, CSV, XML, ZIP, and streaming, developers often need to pull in a different package for each task — and then yet another set of packages to make them work in the browser, plus separate streaming wrappers on top. These libraries vary in API style, quality, and maintenance status, creating a tax on every project that needs more than one of them.
20
+
21
+ ExcelTS was built to fix this. One package, one consistent API, one codebase — working identically across Node.js, Bun, and browsers. Streaming is a first-class citizen in every module, not an afterthought bolted on through a third-party adapter. The goal is simple: install once, import what you need, and get the same reliable behavior everywhere — with maximum streaming performance out of the box.
22
+
17
23
  ## Modules
18
24
 
19
25
  ExcelTS is organized into seven standalone modules. Each module has its own documentation and runnable examples.
@@ -27,7 +33,7 @@ Create, read, and modify Excel spreadsheets with full styling, formulas, images,
27
33
 
28
34
  ### PDF — Zero-Dependency PDF Engine
29
35
 
30
- Full-featured PDF generation with font embedding, encryption, images, and Excel-to-PDF conversion.
36
+ Full-featured PDF generation and reading. Write PDFs with font embedding, AES-256 encryption, images, and Excel-to-PDF conversion. Read any PDF with text, image, annotation, form field, and metadata extraction.
31
37
 
32
38
  - [Documentation](src/modules/pdf/README.md) | [中文](src/modules/pdf/README_zh.md)
33
39
  - [Examples](src/modules/pdf/examples/)
@@ -99,6 +105,12 @@ const pdfBytes = pdf([
99
105
  ["Widget", 1000]
100
106
  ]);
101
107
 
108
+ // PDF — read text, images, and metadata from any PDF
109
+ import { readPdf } from "@cj-tech-master/excelts/pdf";
110
+ const result = readPdf(pdfBytes);
111
+ console.log(result.text); // extracted text
112
+ console.log(result.metadata); // title, author, etc.
113
+
102
114
  // CSV — parse and format
103
115
  import { parseCsv, formatCsv } from "@cj-tech-master/excelts/csv";
104
116
  const rows = parseCsv("name,age\nAlice,30", { headers: true });
@@ -123,6 +135,7 @@ Each module is available as a standalone subpath export:
123
135
 
124
136
  ```typescript
125
137
  import { Workbook, WorkbookWriter } from "@cj-tech-master/excelts";
138
+ import { pdf, readPdf, excelToPdf } from "@cj-tech-master/excelts/pdf";
126
139
  import { SaxParser, parseXml, XmlWriter, query } from "@cj-tech-master/excelts/xml";
127
140
  import { zip, unzip, ZipArchive, compress } from "@cj-tech-master/excelts/zip";
128
141
  import { parseCsv, formatCsv, CsvParserStream } from "@cj-tech-master/excelts/csv";
package/README_zh.md CHANGED
@@ -14,6 +14,12 @@ ExcelTS 是一个零依赖的 TypeScript 电子表格和文档工具包:
14
14
  - **跨平台** — Node.js 22+、Bun、Chrome 89+、Firefox 102+、Safari 14.1+
15
15
  - **ESM 优先** — 原生 ES Modules,兼容 CommonJS,完整的 tree-shaking 支持
16
16
 
17
+ ## 项目初衷
18
+
19
+ TypeScript 生态在文档和数据处理领域长期存在碎片化问题。处理 Excel、PDF、CSV、XML、ZIP 和流式操作,往往需要分别引入不同的包;到了浏览器端又要换一套方案;流式处理还得再额外接入一个适配库。这些库的 API 风格、质量和维护状态参差不齐,给每个需要组合使用它们的项目都带来了额外的负担。
20
+
21
+ ExcelTS 正是为了解决这个问题而生。一个包、一套 API、一份代码 — 在 Node.js、Bun 和浏览器中行为完全一致。流式处理是每个模块的一等公民,而非通过第三方适配器后期拼装的附属品。目标很简单:安装一次,按需导入,在任何环境下都获得相同的可靠体验 — 同时将流式处理的性能发挥到极致。
22
+
17
23
  ## 模块
18
24
 
19
25
  ExcelTS 由七个独立模块组成,每个模块都有自己的文档和可运行示例。
@@ -60,6 +60,8 @@ export declare class ZipDeflateFile {
60
60
  private _dataQueue;
61
61
  private _finalQueued;
62
62
  private _pushChain;
63
+ private _inputBuf;
64
+ private _inputPos;
63
65
  private _syncDeflater;
64
66
  private _syncZlibReady;
65
67
  readonly name: string;
@@ -159,6 +161,8 @@ export declare class ZipDeflateFile {
159
161
  * memory growth when callers push data in a tight synchronous loop.
160
162
  */
161
163
  push(data: Uint8Array, final?: boolean, callback?: (err?: Error | null) => void): Promise<void>;
164
+ /** Enqueue an async push through the _pushChain serialization. */
165
+ private _pushAsync;
162
166
  /**
163
167
  * Synchronous push path — compresses and emits data without any Promises.
164
168
  *
@@ -19,6 +19,12 @@ import { createAbortError, toError } from "../shared/errors.js";
19
19
  import { measureCentralDirectoryAndEocd, writeCentralDirectoryAndEocdInto } from "./writer-core.js";
20
20
  import { buildDataDescriptor, buildDataDescriptorZip64, concatExtraFields, UINT16_MAX, UINT32_MAX, buildLocalFileHeader, VERSION_ZIP64, VERSION_NEEDED, FLAG_ENCRYPTED, FLAG_DATA_DESCRIPTOR, FLAG_UTF8, COMPRESSION_AES, getUnixModeFromExternalAttributes, isSymlinkMode } from "../zip-spec/zip-records.js";
21
21
  const SMART_STORE_DECIDE_BYTES = 16 * 1024;
22
+ /** Input batching threshold for push(). Small chunks are accumulated in an
23
+ * internal buffer and flushed to the compression pipeline once this size is
24
+ * reached. 64 KB matches the standard deflate window and keeps the number
25
+ * of async push() calls — each of which creates a full Promise chain in the
26
+ * browser CompressionStream path — down to a manageable level. */
27
+ const INPUT_BATCH_BYTES = 65536;
22
28
  /**
23
29
  * True Streaming ZIP File - compresses chunk by chunk
24
30
  */
@@ -62,6 +68,12 @@ export class ZipDeflateFile {
62
68
  this._finalQueued = false;
63
69
  // Serialize push() calls so callers don't need to await to preserve ordering.
64
70
  this._pushChain = Promise.resolve();
71
+ // Input batching: accumulate small chunks before feeding the compression
72
+ // pipeline. This collapses thousands of tiny push() calls (each creating a
73
+ // full async Promise chain on browsers) into a handful of large pushes.
74
+ // Threshold matches the common deflate window size (64 KB).
75
+ this._inputBuf = null;
76
+ this._inputPos = 0;
65
77
  // Synchronous compression state for pushSync() path.
66
78
  this._syncDeflater = null;
67
79
  this._syncZlibReady = false;
@@ -639,6 +651,47 @@ export class ZipDeflateFile {
639
651
  }
640
652
  return Promise.resolve();
641
653
  }
654
+ // --- Async path: batch small chunks to reduce Promise-chain overhead ---
655
+ // Each real push through the async pipeline creates a full Promise chain
656
+ // (push → _pushChain → _pushUnchained → AsyncStreamCodec.writeChain →
657
+ // CompressionStream.writer.write). By accumulating small chunks into a
658
+ // 64 KB buffer we reduce the number of async round-trips by ~100x for
659
+ // typical XML workloads without sacrificing streaming semantics.
660
+ if (!final && data.length > 0 && data.length < INPUT_BATCH_BYTES) {
661
+ // Lazy-allocate the batch buffer.
662
+ if (!this._inputBuf) {
663
+ this._inputBuf = new Uint8Array(INPUT_BATCH_BYTES);
664
+ this._inputPos = 0;
665
+ }
666
+ // If the chunk fits in the remaining space, just copy it in.
667
+ if (this._inputPos + data.length <= INPUT_BATCH_BYTES) {
668
+ this._inputBuf.set(data, this._inputPos);
669
+ this._inputPos += data.length;
670
+ // Not full yet — return resolved promise, no async work.
671
+ callback?.();
672
+ return Promise.resolve();
673
+ }
674
+ // Buffer would overflow — flush everything (buffered + new data) together.
675
+ const combined = new Uint8Array(this._inputPos + data.length);
676
+ combined.set(this._inputBuf.subarray(0, this._inputPos));
677
+ combined.set(data, this._inputPos);
678
+ this._inputPos = 0;
679
+ return this._pushAsync(combined, false, callback);
680
+ }
681
+ // Large chunk or final — flush any buffered data first, then push.
682
+ if (this._inputPos > 0) {
683
+ const flushData = this._inputBuf.slice(0, this._inputPos);
684
+ this._inputPos = 0;
685
+ // Chain: flush buffered → push current
686
+ const flushPromise = this._pushAsync(flushData, false);
687
+ const promise = (this._pushChain = flushPromise.then(() => this._pushUnchained(data, final, callback), () => this._pushUnchained(data, final, callback)));
688
+ promise.catch(() => { });
689
+ return promise;
690
+ }
691
+ return this._pushAsync(data, final, callback);
692
+ }
693
+ /** Enqueue an async push through the _pushChain serialization. */
694
+ _pushAsync(data, final, callback) {
642
695
  // Chain the async push so calls are serialized. Use a recovery wrapper
643
696
  // so that a single failed push does not break the chain for subsequent
644
697
  // pushes — errors are surfaced via onerror/rejectComplete instead.
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Shared cryptographic primitives for PDF encryption/decryption.
3
+ *
4
+ * Zero-dependency, pure JavaScript implementations of:
5
+ * - AES (128/256-bit) CBC encrypt and decrypt
6
+ * - SHA-256
7
+ * - MD5
8
+ * - RC4 (for reading legacy PDFs only)
9
+ *
10
+ * @see FIPS 197 — AES
11
+ * @see FIPS 180-4 — SHA-256
12
+ * @see RFC 1321 — MD5
13
+ */
14
+ /**
15
+ * AES-CBC encryption with PKCS#7 padding.
16
+ * Supports AES-128 (16-byte key) and AES-256 (32-byte key).
17
+ */
18
+ export declare function aesCbcEncrypt(plaintext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
19
+ /**
20
+ * AES-CBC decryption with PKCS#7 padding removal.
21
+ * Supports AES-128 (16-byte key) and AES-256 (32-byte key).
22
+ */
23
+ export declare function aesCbcDecrypt(ciphertext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
24
+ /**
25
+ * AES-CBC decryption WITHOUT PKCS#7 padding removal.
26
+ * Used for key derivation in V=5 where the output length is known.
27
+ */
28
+ export declare function aesCbcDecryptRaw(ciphertext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
29
+ /**
30
+ * AES-CBC encryption WITHOUT PKCS#7 padding.
31
+ * Used when the plaintext is already block-aligned (e.g., encrypting
32
+ * the 32-byte file encryption key in V=5).
33
+ *
34
+ * @throws if plaintext length is not a multiple of 16.
35
+ */
36
+ export declare function aesCbcEncryptRaw(plaintext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
37
+ /**
38
+ * AES-ECB encryption of a single 16-byte block (no padding, no IV).
39
+ * Used for the /Perms value in V=5 encryption.
40
+ */
41
+ export declare function aesEcbEncrypt(block: Uint8Array, key: Uint8Array): Uint8Array;
42
+ /**
43
+ * SHA-256 hash function.
44
+ * @returns 32-byte digest
45
+ */
46
+ export declare function sha256(input: Uint8Array): Uint8Array;
47
+ /**
48
+ * MD5 hash function (RFC 1321).
49
+ * @returns 16-byte digest
50
+ */
51
+ export declare function md5(input: Uint8Array): Uint8Array;
52
+ /**
53
+ * RC4 stream cipher.
54
+ * @deprecated Only used for reading legacy encrypted PDFs. Writer uses AES-256.
55
+ */
56
+ export declare function rc4(key: Uint8Array, data: Uint8Array): Uint8Array;
57
+ /**
58
+ * Generate pseudo-random bytes.
59
+ * Uses Math.random — adequate for PDF IVs but not cryptographically secure.
60
+ */
61
+ export declare function randomBytes(length: number): Uint8Array;
62
+ /**
63
+ * Concatenate multiple Uint8Arrays.
64
+ */
65
+ export declare function concatArrays(...arrays: Uint8Array[]): Uint8Array;