@cj-tech-master/excelts 8.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/README_zh.md +6 -0
- package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
- package/dist/browser/modules/archive/zip/stream.js +53 -0
- package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/browser/modules/pdf/core/crypto.js +637 -0
- package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/browser/modules/pdf/core/encryption.js +88 -261
- package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/browser/modules/pdf/index.d.ts +23 -2
- package/dist/browser/modules/pdf/index.js +21 -3
- package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/cjs/modules/archive/zip/stream.js +53 -0
- package/dist/cjs/modules/pdf/core/crypto.js +649 -0
- package/dist/cjs/modules/pdf/core/encryption.js +88 -263
- package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/cjs/modules/pdf/index.js +23 -4
- package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
- package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
- package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
- package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
- package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
- package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
- package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
- package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
- package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
- package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
- package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
- package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
- package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
- package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
- package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
- package/dist/esm/modules/archive/zip/stream.js +53 -0
- package/dist/esm/modules/pdf/core/crypto.js +637 -0
- package/dist/esm/modules/pdf/core/encryption.js +88 -261
- package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/esm/modules/pdf/index.js +21 -3
- package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/iife/excelts.iife.js +703 -267
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +35 -35
- package/dist/types/modules/archive/zip/stream.d.ts +4 -0
- package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/types/modules/pdf/index.d.ts +23 -2
- package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,6 +14,12 @@ ExcelTS is a zero-dependency TypeScript toolkit for spreadsheets and documents:
|
|
|
14
14
|
- **Cross-Platform** — Node.js 22+, Bun, Chrome 89+, Firefox 102+, Safari 14.1+
|
|
15
15
|
- **ESM First** — Native ES Modules with CommonJS compatibility and full tree-shaking
|
|
16
16
|
|
|
17
|
+
## Motivation
|
|
18
|
+
|
|
19
|
+
The TypeScript ecosystem is heavily fragmented when it comes to document and data processing. To handle Excel, PDF, CSV, XML, ZIP, and streaming, developers often need to pull in a different package for each task — and then yet another set of packages to make them work in the browser, plus separate streaming wrappers on top. These libraries vary in API style, quality, and maintenance status, creating a tax on every project that needs more than one of them.
|
|
20
|
+
|
|
21
|
+
ExcelTS was built to fix this. One package, one consistent API, one codebase — working identically across Node.js, Bun, and browsers. Streaming is a first-class citizen in every module, not an afterthought bolted on through a third-party adapter. The goal is simple: install once, import what you need, and get the same reliable behavior everywhere — with maximum streaming performance out of the box.
|
|
22
|
+
|
|
17
23
|
## Modules
|
|
18
24
|
|
|
19
25
|
ExcelTS is organized into seven standalone modules. Each module has its own documentation and runnable examples.
|
|
@@ -27,7 +33,7 @@ Create, read, and modify Excel spreadsheets with full styling, formulas, images,
|
|
|
27
33
|
|
|
28
34
|
### PDF — Zero-Dependency PDF Engine
|
|
29
35
|
|
|
30
|
-
Full-featured PDF generation with font embedding, encryption, images, and Excel-to-PDF conversion.
|
|
36
|
+
Full-featured PDF generation and reading. Write PDFs with font embedding, AES-256 encryption, images, and Excel-to-PDF conversion. Read any PDF with text, image, annotation, form field, and metadata extraction.
|
|
31
37
|
|
|
32
38
|
- [Documentation](src/modules/pdf/README.md) | [中文](src/modules/pdf/README_zh.md)
|
|
33
39
|
- [Examples](src/modules/pdf/examples/)
|
|
@@ -99,6 +105,12 @@ const pdfBytes = pdf([
|
|
|
99
105
|
["Widget", 1000]
|
|
100
106
|
]);
|
|
101
107
|
|
|
108
|
+
// PDF — read text, images, and metadata from any PDF
|
|
109
|
+
import { readPdf } from "@cj-tech-master/excelts/pdf";
|
|
110
|
+
const result = readPdf(pdfBytes);
|
|
111
|
+
console.log(result.text); // extracted text
|
|
112
|
+
console.log(result.metadata); // title, author, etc.
|
|
113
|
+
|
|
102
114
|
// CSV — parse and format
|
|
103
115
|
import { parseCsv, formatCsv } from "@cj-tech-master/excelts/csv";
|
|
104
116
|
const rows = parseCsv("name,age\nAlice,30", { headers: true });
|
|
@@ -123,6 +135,7 @@ Each module is available as a standalone subpath export:
|
|
|
123
135
|
|
|
124
136
|
```typescript
|
|
125
137
|
import { Workbook, WorkbookWriter } from "@cj-tech-master/excelts";
|
|
138
|
+
import { pdf, readPdf, excelToPdf } from "@cj-tech-master/excelts/pdf";
|
|
126
139
|
import { SaxParser, parseXml, XmlWriter, query } from "@cj-tech-master/excelts/xml";
|
|
127
140
|
import { zip, unzip, ZipArchive, compress } from "@cj-tech-master/excelts/zip";
|
|
128
141
|
import { parseCsv, formatCsv, CsvParserStream } from "@cj-tech-master/excelts/csv";
|
package/README_zh.md
CHANGED
|
@@ -14,6 +14,12 @@ ExcelTS 是一个零依赖的 TypeScript 电子表格和文档工具包:
|
|
|
14
14
|
- **跨平台** — Node.js 22+、Bun、Chrome 89+、Firefox 102+、Safari 14.1+
|
|
15
15
|
- **ESM 优先** — 原生 ES Modules,兼容 CommonJS,完整的 tree-shaking 支持
|
|
16
16
|
|
|
17
|
+
## 项目初衷
|
|
18
|
+
|
|
19
|
+
TypeScript 生态在文档和数据处理领域长期存在碎片化问题。处理 Excel、PDF、CSV、XML、ZIP 和流式操作,往往需要分别引入不同的包;到了浏览器端又要换一套方案;流式处理还得再额外接入一个适配库。这些库的 API 风格、质量和维护状态参差不齐,给每个需要组合使用它们的项目都带来了额外的负担。
|
|
20
|
+
|
|
21
|
+
ExcelTS 正是为了解决这个问题而生。一个包、一套 API、一份代码 — 在 Node.js、Bun 和浏览器中行为完全一致。流式处理是每个模块的一等公民,而非通过第三方适配器后期拼装的附属品。目标很简单:安装一次,按需导入,在任何环境下都获得相同的可靠体验 — 同时将流式处理的性能发挥到极致。
|
|
22
|
+
|
|
17
23
|
## 模块
|
|
18
24
|
|
|
19
25
|
ExcelTS 由七个独立模块组成,每个模块都有自己的文档和可运行示例。
|
|
@@ -60,6 +60,8 @@ export declare class ZipDeflateFile {
|
|
|
60
60
|
private _dataQueue;
|
|
61
61
|
private _finalQueued;
|
|
62
62
|
private _pushChain;
|
|
63
|
+
private _inputBuf;
|
|
64
|
+
private _inputPos;
|
|
63
65
|
private _syncDeflater;
|
|
64
66
|
private _syncZlibReady;
|
|
65
67
|
readonly name: string;
|
|
@@ -159,6 +161,8 @@ export declare class ZipDeflateFile {
|
|
|
159
161
|
* memory growth when callers push data in a tight synchronous loop.
|
|
160
162
|
*/
|
|
161
163
|
push(data: Uint8Array, final?: boolean, callback?: (err?: Error | null) => void): Promise<void>;
|
|
164
|
+
/** Enqueue an async push through the _pushChain serialization. */
|
|
165
|
+
private _pushAsync;
|
|
162
166
|
/**
|
|
163
167
|
* Synchronous push path — compresses and emits data without any Promises.
|
|
164
168
|
*
|
|
@@ -19,6 +19,12 @@ import { createAbortError, toError } from "../shared/errors.js";
|
|
|
19
19
|
import { measureCentralDirectoryAndEocd, writeCentralDirectoryAndEocdInto } from "./writer-core.js";
|
|
20
20
|
import { buildDataDescriptor, buildDataDescriptorZip64, concatExtraFields, UINT16_MAX, UINT32_MAX, buildLocalFileHeader, VERSION_ZIP64, VERSION_NEEDED, FLAG_ENCRYPTED, FLAG_DATA_DESCRIPTOR, FLAG_UTF8, COMPRESSION_AES, getUnixModeFromExternalAttributes, isSymlinkMode } from "../zip-spec/zip-records.js";
|
|
21
21
|
const SMART_STORE_DECIDE_BYTES = 16 * 1024;
|
|
22
|
+
/** Input batching threshold for push(). Small chunks are accumulated in an
|
|
23
|
+
* internal buffer and flushed to the compression pipeline once this size is
|
|
24
|
+
* reached. 64 KB matches the standard deflate window and keeps the number
|
|
25
|
+
* of async push() calls — each of which creates a full Promise chain in the
|
|
26
|
+
* browser CompressionStream path — down to a manageable level. */
|
|
27
|
+
const INPUT_BATCH_BYTES = 65536;
|
|
22
28
|
/**
|
|
23
29
|
* True Streaming ZIP File - compresses chunk by chunk
|
|
24
30
|
*/
|
|
@@ -62,6 +68,12 @@ export class ZipDeflateFile {
|
|
|
62
68
|
this._finalQueued = false;
|
|
63
69
|
// Serialize push() calls so callers don't need to await to preserve ordering.
|
|
64
70
|
this._pushChain = Promise.resolve();
|
|
71
|
+
// Input batching: accumulate small chunks before feeding the compression
|
|
72
|
+
// pipeline. This collapses thousands of tiny push() calls (each creating a
|
|
73
|
+
// full async Promise chain on browsers) into a handful of large pushes.
|
|
74
|
+
// Threshold matches the common deflate window size (64 KB).
|
|
75
|
+
this._inputBuf = null;
|
|
76
|
+
this._inputPos = 0;
|
|
65
77
|
// Synchronous compression state for pushSync() path.
|
|
66
78
|
this._syncDeflater = null;
|
|
67
79
|
this._syncZlibReady = false;
|
|
@@ -639,6 +651,47 @@ export class ZipDeflateFile {
|
|
|
639
651
|
}
|
|
640
652
|
return Promise.resolve();
|
|
641
653
|
}
|
|
654
|
+
// --- Async path: batch small chunks to reduce Promise-chain overhead ---
|
|
655
|
+
// Each real push through the async pipeline creates a full Promise chain
|
|
656
|
+
// (push → _pushChain → _pushUnchained → AsyncStreamCodec.writeChain →
|
|
657
|
+
// CompressionStream.writer.write). By accumulating small chunks into a
|
|
658
|
+
// 64 KB buffer we reduce the number of async round-trips by ~100x for
|
|
659
|
+
// typical XML workloads without sacrificing streaming semantics.
|
|
660
|
+
if (!final && data.length > 0 && data.length < INPUT_BATCH_BYTES) {
|
|
661
|
+
// Lazy-allocate the batch buffer.
|
|
662
|
+
if (!this._inputBuf) {
|
|
663
|
+
this._inputBuf = new Uint8Array(INPUT_BATCH_BYTES);
|
|
664
|
+
this._inputPos = 0;
|
|
665
|
+
}
|
|
666
|
+
// If the chunk fits in the remaining space, just copy it in.
|
|
667
|
+
if (this._inputPos + data.length <= INPUT_BATCH_BYTES) {
|
|
668
|
+
this._inputBuf.set(data, this._inputPos);
|
|
669
|
+
this._inputPos += data.length;
|
|
670
|
+
// Not full yet — return resolved promise, no async work.
|
|
671
|
+
callback?.();
|
|
672
|
+
return Promise.resolve();
|
|
673
|
+
}
|
|
674
|
+
// Buffer would overflow — flush everything (buffered + new data) together.
|
|
675
|
+
const combined = new Uint8Array(this._inputPos + data.length);
|
|
676
|
+
combined.set(this._inputBuf.subarray(0, this._inputPos));
|
|
677
|
+
combined.set(data, this._inputPos);
|
|
678
|
+
this._inputPos = 0;
|
|
679
|
+
return this._pushAsync(combined, false, callback);
|
|
680
|
+
}
|
|
681
|
+
// Large chunk or final — flush any buffered data first, then push.
|
|
682
|
+
if (this._inputPos > 0) {
|
|
683
|
+
const flushData = this._inputBuf.slice(0, this._inputPos);
|
|
684
|
+
this._inputPos = 0;
|
|
685
|
+
// Chain: flush buffered → push current
|
|
686
|
+
const flushPromise = this._pushAsync(flushData, false);
|
|
687
|
+
const promise = (this._pushChain = flushPromise.then(() => this._pushUnchained(data, final, callback), () => this._pushUnchained(data, final, callback)));
|
|
688
|
+
promise.catch(() => { });
|
|
689
|
+
return promise;
|
|
690
|
+
}
|
|
691
|
+
return this._pushAsync(data, final, callback);
|
|
692
|
+
}
|
|
693
|
+
/** Enqueue an async push through the _pushChain serialization. */
|
|
694
|
+
_pushAsync(data, final, callback) {
|
|
642
695
|
// Chain the async push so calls are serialized. Use a recovery wrapper
|
|
643
696
|
// so that a single failed push does not break the chain for subsequent
|
|
644
697
|
// pushes — errors are surfaced via onerror/rejectComplete instead.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared cryptographic primitives for PDF encryption/decryption.
|
|
3
|
+
*
|
|
4
|
+
* Zero-dependency, pure JavaScript implementations of:
|
|
5
|
+
* - AES (128/256-bit) CBC encrypt and decrypt
|
|
6
|
+
* - SHA-256
|
|
7
|
+
* - MD5
|
|
8
|
+
* - RC4 (for reading legacy PDFs only)
|
|
9
|
+
*
|
|
10
|
+
* @see FIPS 197 — AES
|
|
11
|
+
* @see FIPS 180-4 — SHA-256
|
|
12
|
+
* @see RFC 1321 — MD5
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* AES-CBC encryption with PKCS#7 padding.
|
|
16
|
+
* Supports AES-128 (16-byte key) and AES-256 (32-byte key).
|
|
17
|
+
*/
|
|
18
|
+
export declare function aesCbcEncrypt(plaintext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
|
|
19
|
+
/**
|
|
20
|
+
* AES-CBC decryption with PKCS#7 padding removal.
|
|
21
|
+
* Supports AES-128 (16-byte key) and AES-256 (32-byte key).
|
|
22
|
+
*/
|
|
23
|
+
export declare function aesCbcDecrypt(ciphertext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
|
|
24
|
+
/**
|
|
25
|
+
* AES-CBC decryption WITHOUT PKCS#7 padding removal.
|
|
26
|
+
* Used for key derivation in V=5 where the output length is known.
|
|
27
|
+
*/
|
|
28
|
+
export declare function aesCbcDecryptRaw(ciphertext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
|
|
29
|
+
/**
|
|
30
|
+
* AES-CBC encryption WITHOUT PKCS#7 padding.
|
|
31
|
+
* Used when the plaintext is already block-aligned (e.g., encrypting
|
|
32
|
+
* the 32-byte file encryption key in V=5).
|
|
33
|
+
*
|
|
34
|
+
* @throws if plaintext length is not a multiple of 16.
|
|
35
|
+
*/
|
|
36
|
+
export declare function aesCbcEncryptRaw(plaintext: Uint8Array, key: Uint8Array, iv: Uint8Array): Uint8Array;
|
|
37
|
+
/**
|
|
38
|
+
* AES-ECB encryption of a single 16-byte block (no padding, no IV).
|
|
39
|
+
* Used for the /Perms value in V=5 encryption.
|
|
40
|
+
*/
|
|
41
|
+
export declare function aesEcbEncrypt(block: Uint8Array, key: Uint8Array): Uint8Array;
|
|
42
|
+
/**
|
|
43
|
+
* SHA-256 hash function.
|
|
44
|
+
* @returns 32-byte digest
|
|
45
|
+
*/
|
|
46
|
+
export declare function sha256(input: Uint8Array): Uint8Array;
|
|
47
|
+
/**
|
|
48
|
+
* MD5 hash function (RFC 1321).
|
|
49
|
+
* @returns 16-byte digest
|
|
50
|
+
*/
|
|
51
|
+
export declare function md5(input: Uint8Array): Uint8Array;
|
|
52
|
+
/**
|
|
53
|
+
* RC4 stream cipher.
|
|
54
|
+
* @deprecated Only used for reading legacy encrypted PDFs. Writer uses AES-256.
|
|
55
|
+
*/
|
|
56
|
+
export declare function rc4(key: Uint8Array, data: Uint8Array): Uint8Array;
|
|
57
|
+
/**
|
|
58
|
+
* Generate pseudo-random bytes.
|
|
59
|
+
* Uses Math.random — adequate for PDF IVs but not cryptographically secure.
|
|
60
|
+
*/
|
|
61
|
+
export declare function randomBytes(length: number): Uint8Array;
|
|
62
|
+
/**
|
|
63
|
+
* Concatenate multiple Uint8Arrays.
|
|
64
|
+
*/
|
|
65
|
+
export declare function concatArrays(...arrays: Uint8Array[]): Uint8Array;
|