@cj-tech-master/excelts 7.6.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +99 -577
  2. package/README_zh.md +101 -577
  3. package/dist/browser/index.browser.d.ts +3 -0
  4. package/dist/browser/index.browser.js +2 -0
  5. package/dist/browser/index.d.ts +3 -0
  6. package/dist/browser/index.js +2 -0
  7. package/dist/browser/modules/archive/compression/compress.browser.js +4 -4
  8. package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +24 -22
  9. package/dist/browser/modules/archive/compression/deflate-fallback.js +664 -360
  10. package/dist/browser/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
  11. package/dist/browser/modules/archive/compression/streaming-compress.browser.js +15 -3
  12. package/dist/browser/modules/archive/compression/streaming-compress.d.ts +5 -0
  13. package/dist/browser/modules/archive/compression/streaming-compress.js +7 -0
  14. package/dist/browser/modules/archive/zip/stream.js +27 -3
  15. package/dist/browser/modules/excel/workbook.browser.d.ts +72 -0
  16. package/dist/browser/modules/excel/workbook.browser.js +226 -0
  17. package/dist/browser/modules/excel/workbook.d.ts +32 -1
  18. package/dist/browser/modules/excel/workbook.js +47 -2
  19. package/dist/browser/modules/excel/xlsx/xlsx.browser.js +42 -4
  20. package/dist/browser/modules/markdown/constants.d.ts +30 -0
  21. package/dist/browser/modules/markdown/constants.js +30 -0
  22. package/dist/browser/modules/markdown/errors.d.ts +21 -0
  23. package/dist/browser/modules/markdown/errors.js +23 -0
  24. package/dist/browser/modules/markdown/format/index.d.ts +54 -0
  25. package/dist/browser/modules/markdown/format/index.js +307 -0
  26. package/dist/browser/modules/markdown/index.d.ts +15 -0
  27. package/dist/browser/modules/markdown/index.js +22 -0
  28. package/dist/browser/modules/markdown/parse/index.d.ts +70 -0
  29. package/dist/browser/modules/markdown/parse/index.js +428 -0
  30. package/dist/browser/modules/markdown/types.d.ts +130 -0
  31. package/dist/browser/modules/markdown/types.js +6 -0
  32. package/dist/cjs/index.js +5 -1
  33. package/dist/cjs/modules/archive/compression/compress.browser.js +4 -4
  34. package/dist/cjs/modules/archive/compression/deflate-fallback.js +664 -360
  35. package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +15 -2
  36. package/dist/cjs/modules/archive/compression/streaming-compress.js +8 -0
  37. package/dist/cjs/modules/archive/zip/stream.js +26 -2
  38. package/dist/cjs/modules/excel/workbook.browser.js +226 -0
  39. package/dist/cjs/modules/excel/workbook.js +46 -1
  40. package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +42 -4
  41. package/dist/cjs/modules/markdown/constants.js +33 -0
  42. package/dist/cjs/modules/markdown/errors.js +28 -0
  43. package/dist/cjs/modules/markdown/format/index.js +310 -0
  44. package/dist/cjs/modules/markdown/index.js +30 -0
  45. package/dist/cjs/modules/markdown/parse/index.js +432 -0
  46. package/dist/cjs/modules/markdown/types.js +7 -0
  47. package/dist/esm/index.browser.js +2 -0
  48. package/dist/esm/index.js +2 -0
  49. package/dist/esm/modules/archive/compression/compress.browser.js +4 -4
  50. package/dist/esm/modules/archive/compression/deflate-fallback.js +664 -360
  51. package/dist/esm/modules/archive/compression/streaming-compress.browser.js +15 -3
  52. package/dist/esm/modules/archive/compression/streaming-compress.js +7 -0
  53. package/dist/esm/modules/archive/zip/stream.js +27 -3
  54. package/dist/esm/modules/excel/workbook.browser.js +226 -0
  55. package/dist/esm/modules/excel/workbook.js +47 -2
  56. package/dist/esm/modules/excel/xlsx/xlsx.browser.js +42 -4
  57. package/dist/esm/modules/markdown/constants.js +30 -0
  58. package/dist/esm/modules/markdown/errors.js +23 -0
  59. package/dist/esm/modules/markdown/format/index.js +307 -0
  60. package/dist/esm/modules/markdown/index.js +22 -0
  61. package/dist/esm/modules/markdown/parse/index.js +428 -0
  62. package/dist/esm/modules/markdown/types.js +6 -0
  63. package/dist/iife/excelts.iife.js +1342 -283
  64. package/dist/iife/excelts.iife.js.map +1 -1
  65. package/dist/iife/excelts.iife.min.js +38 -34
  66. package/dist/types/index.browser.d.ts +3 -0
  67. package/dist/types/index.d.ts +3 -0
  68. package/dist/types/modules/archive/compression/deflate-fallback.d.ts +24 -22
  69. package/dist/types/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
  70. package/dist/types/modules/archive/compression/streaming-compress.d.ts +5 -0
  71. package/dist/types/modules/excel/workbook.browser.d.ts +72 -0
  72. package/dist/types/modules/excel/workbook.d.ts +32 -1
  73. package/dist/types/modules/markdown/constants.d.ts +30 -0
  74. package/dist/types/modules/markdown/errors.d.ts +21 -0
  75. package/dist/types/modules/markdown/format/index.d.ts +54 -0
  76. package/dist/types/modules/markdown/index.d.ts +15 -0
  77. package/dist/types/modules/markdown/parse/index.d.ts +70 -0
  78. package/dist/types/modules/markdown/types.d.ts +130 -0
  79. package/package.json +56 -32
@@ -0,0 +1,428 @@
1
+ /**
2
+ * Markdown Table Parser
3
+ *
4
+ * Parses Markdown tables into structured data.
5
+ *
6
+ * Supports:
7
+ * - Standard GFM (GitHub Flavored Markdown) table syntax
8
+ * - Column alignment detection via separator row
9
+ * - Escaped pipes (`\|`) in cell content
10
+ * - Tables with or without leading/trailing pipes
11
+ * - Tolerant parsing (mismatched column counts, extra whitespace)
12
+ * - Multiline cell content via `<br>` / `<br/>` / `<br />` tags
13
+ *
14
+ * @example
15
+ * ```ts
16
+ * const result = parseMarkdown("| Name | Age |\n| --- | --- |\n| Alice | 30 |");
17
+ * // result.headers = ["Name", "Age"]
18
+ * // result.rows = [["Alice", "30"]]
19
+ * // result.alignments = ["none", "none"]
20
+ * ```
21
+ */
22
+ import { BR_TAG_REGEX, LINEBREAK_REGEX, UNESCAPE_REGEX } from "../constants.js";
23
+ import { MarkdownParseError } from "../errors.js";
24
+ // =============================================================================
25
+ // Character Codes (avoid repeated charCodeAt comparisons with magic numbers)
26
+ // =============================================================================
27
+ const CH_PIPE = 0x7c; // |
28
+ const CH_BACKSLASH = 0x5c; // \
29
+ const CH_COLON = 0x3a; // :
30
+ const CH_DASH = 0x2d; // -
31
+ const CH_SPACE = 0x20; // space
32
+ const CH_TAB = 0x09; // tab
33
+ function resolveParseOpts(options) {
34
+ return {
35
+ trim: options.trim !== false,
36
+ unescape: options.unescape !== false,
37
+ skipEmpty: options.skipEmptyRows !== false,
38
+ maxRows: options.maxRows,
39
+ convertBr: options.convertBr === true
40
+ };
41
+ }
42
+ // =============================================================================
43
+ // Internal Helpers
44
+ // =============================================================================
45
+ /**
46
+ * Split a Markdown table row into cell values.
47
+ * Handles escaped pipes (`\|`) correctly by scanning character by character.
48
+ *
49
+ * Optimized: uses start/end index tracking with `slice()` instead of
50
+ * character-by-character string concatenation to avoid O(n²) worst case.
51
+ */
52
+ function splitRow(line) {
53
+ const cells = [];
54
+ const len = line.length;
55
+ // Skip leading pipe
56
+ let i = len > 0 && line.charCodeAt(0) === CH_PIPE ? 1 : 0;
57
+ // Check for trailing pipe (to exclude it from the last cell)
58
+ // Must count consecutive backslashes before the pipe:
59
+ // even count (0, 2, 4...) → backslashes are all escaped, pipe is real
60
+ // odd count (1, 3, 5...) → last backslash escapes the pipe
61
+ let end = len;
62
+ if (len > 1 && line.charCodeAt(len - 1) === CH_PIPE) {
63
+ let backslashCount = 0;
64
+ let k = len - 2;
65
+ while (k >= 0 && line.charCodeAt(k) === CH_BACKSLASH) {
66
+ backslashCount++;
67
+ k--;
68
+ }
69
+ if (backslashCount % 2 === 0) {
70
+ end = len - 1;
71
+ }
72
+ }
73
+ // Track the start of the current cell segment
74
+ // We collect segments (between escape sequences) to minimize allocations
75
+ let segStart = i;
76
+ let current = "";
77
+ let hasEscape = false;
78
+ while (i < end) {
79
+ const ch = line.charCodeAt(i);
80
+ if (ch === CH_BACKSLASH && i + 1 < end) {
81
+ // Escape sequence: flush preceding segment, add escape pair
82
+ hasEscape = true;
83
+ if (i > segStart) {
84
+ current += line.slice(segStart, i);
85
+ }
86
+ current += line.slice(i, i + 2);
87
+ i += 2;
88
+ segStart = i;
89
+ }
90
+ else if (ch === CH_PIPE) {
91
+ // Cell boundary: flush and push
92
+ if (hasEscape) {
93
+ if (i > segStart) {
94
+ current += line.slice(segStart, i);
95
+ }
96
+ cells.push(current);
97
+ current = "";
98
+ hasEscape = false;
99
+ }
100
+ else {
101
+ cells.push(line.slice(segStart, i));
102
+ }
103
+ i++;
104
+ segStart = i;
105
+ }
106
+ else {
107
+ i++;
108
+ }
109
+ }
110
+ // Push the last cell
111
+ if (hasEscape) {
112
+ if (end > segStart) {
113
+ current += line.slice(segStart, end);
114
+ }
115
+ cells.push(current);
116
+ }
117
+ else {
118
+ cells.push(line.slice(segStart, end));
119
+ }
120
+ return cells;
121
+ }
122
+ /**
123
+ * Determine column alignment from a separator cell.
124
+ *
125
+ * - `:---:` → center
126
+ * - `:---` → left
127
+ * - `---:` → right
128
+ * - `---` → none
129
+ */
130
+ function parseAlignment(cell) {
131
+ const trimmed = cell.trim();
132
+ const tLen = trimmed.length;
133
+ if (tLen === 0) {
134
+ return "none";
135
+ }
136
+ const leftColon = trimmed.charCodeAt(0) === CH_COLON;
137
+ const rightColon = trimmed.charCodeAt(tLen - 1) === CH_COLON;
138
+ if (leftColon && rightColon) {
139
+ return "center";
140
+ }
141
+ if (leftColon) {
142
+ return "left";
143
+ }
144
+ if (rightColon) {
145
+ return "right";
146
+ }
147
+ return "none";
148
+ }
149
+ /**
150
+ * Check if a cell string is a valid separator cell.
151
+ * Hand-rolled check replacing regex for better performance.
152
+ * Pattern: optional whitespace, optional colon, one or more dashes, optional colon, optional whitespace.
153
+ */
154
+ function isSeparatorCell(cell) {
155
+ const len = cell.length;
156
+ let i = 0;
157
+ // Skip leading whitespace
158
+ while (i < len) {
159
+ const ch = cell.charCodeAt(i);
160
+ if (ch !== CH_SPACE && ch !== CH_TAB) {
161
+ break;
162
+ }
163
+ i++;
164
+ }
165
+ // Optional leading colon
166
+ if (i < len && cell.charCodeAt(i) === CH_COLON) {
167
+ i++;
168
+ }
169
+ // At least one dash required
170
+ const dashStart = i;
171
+ while (i < len && cell.charCodeAt(i) === CH_DASH) {
172
+ i++;
173
+ }
174
+ if (i === dashStart) {
175
+ return false;
176
+ }
177
+ // Optional trailing colon
178
+ if (i < len && cell.charCodeAt(i) === CH_COLON) {
179
+ i++;
180
+ }
181
+ // Skip trailing whitespace
182
+ while (i < len) {
183
+ const ch = cell.charCodeAt(i);
184
+ if (ch !== CH_SPACE && ch !== CH_TAB) {
185
+ return false;
186
+ }
187
+ i++;
188
+ }
189
+ return true;
190
+ }
191
+ /**
192
+ * Check if a line is a valid separator row.
193
+ * A separator row consists entirely of cells matching the pattern `:?-+:?`.
194
+ */
195
+ function isSeparatorRow(cells) {
196
+ if (cells.length === 0) {
197
+ return false;
198
+ }
199
+ for (let i = 0; i < cells.length; i++) {
200
+ if (!isSeparatorCell(cells[i])) {
201
+ return false;
202
+ }
203
+ }
204
+ return true;
205
+ }
206
+ /**
207
+ * Process cell content: trim, optionally unescape, and optionally convert `<br>` to newlines.
208
+ */
209
+ function processCell(value, opts) {
210
+ let result = opts.trim ? value.trim() : value;
211
+ if (opts.unescape) {
212
+ result = result.replace(UNESCAPE_REGEX, "$1");
213
+ }
214
+ if (opts.convertBr) {
215
+ result = result.replace(BR_TAG_REGEX, "\n");
216
+ }
217
+ return result;
218
+ }
219
+ /**
220
+ * Normalize a row to the expected column count.
221
+ * - If row has fewer cells, pad with empty strings
222
+ * - If row has more cells, truncate
223
+ */
224
+ function normalizeRow(cells, columnCount, opts) {
225
+ const row = new Array(columnCount);
226
+ for (let i = 0; i < columnCount; i++) {
227
+ row[i] = i < cells.length ? processCell(cells[i], opts) : "";
228
+ }
229
+ return row;
230
+ }
231
+ /**
232
+ * Check if a row is empty (all cells are empty strings).
233
+ */
234
+ function isEmptyRow(row) {
235
+ for (let i = 0; i < row.length; i++) {
236
+ if (row[i] !== "") {
237
+ return false;
238
+ }
239
+ }
240
+ return true;
241
+ }
242
+ /**
243
+ * Check if a line could be part of a table (contains a pipe character).
244
+ */
245
+ function isTableLine(line) {
246
+ return line.indexOf("|") !== -1;
247
+ }
248
+ /**
249
+ * Check if a line starts with a pipe (after optional leading whitespace).
250
+ * Used to determine the table's "piped" style for data row validation.
251
+ */
252
+ function startsWithPipe(line) {
253
+ const len = line.length;
254
+ let i = 0;
255
+ while (i < len) {
256
+ const ch = line.charCodeAt(i);
257
+ if (ch !== CH_SPACE && ch !== CH_TAB) {
258
+ return ch === CH_PIPE;
259
+ }
260
+ i++;
261
+ }
262
+ return false;
263
+ }
264
+ /**
265
+ * Check if a line could be a separator candidate (contains a dash).
266
+ */
267
+ function hasDash(line) {
268
+ return line.indexOf("-") !== -1;
269
+ }
270
+ // =============================================================================
271
+ // Core Table Parser (shared between parseMarkdown and parseMarkdownAll)
272
+ // =============================================================================
273
+ /**
274
+ * Attempt to parse a table starting at line index `startLine`.
275
+ *
276
+ * Returns `{ result, endLine }` if a valid table starts here, or `null` otherwise.
277
+ */
278
+ function parseTableAt(lines, startLine, lineCount, opts) {
279
+ if (startLine >= lineCount - 1) {
280
+ return null;
281
+ }
282
+ const line = lines[startLine].trim();
283
+ // Skip empty lines and non-table content
284
+ if (line === "" || !isTableLine(line)) {
285
+ return null;
286
+ }
287
+ // Candidate header row
288
+ const headerCells = splitRow(line);
289
+ if (headerCells.length < 1) {
290
+ return null;
291
+ }
292
+ // Check if the next line is a valid separator row
293
+ const separatorLine = lines[startLine + 1].trim();
294
+ if (separatorLine === "" || !hasDash(separatorLine)) {
295
+ return null;
296
+ }
297
+ const separatorCells = splitRow(separatorLine);
298
+ if (!isSeparatorRow(separatorCells)) {
299
+ return null;
300
+ }
301
+ // Valid table found — extract headers and alignments
302
+ const columnCount = headerCells.length;
303
+ const headers = new Array(columnCount);
304
+ const alignments = new Array(columnCount);
305
+ for (let c = 0; c < columnCount; c++) {
306
+ headers[c] = processCell(headerCells[c], opts);
307
+ alignments[c] = c < separatorCells.length ? parseAlignment(separatorCells[c]) : "none";
308
+ }
309
+ // Determine if this is a "piped" table (header starts with `|`).
310
+ // When the header has a leading pipe, data rows must also start with `|`.
311
+ // This prevents prose like "This has a | pipe" from being swallowed as data.
312
+ const piped = startsWithPipe(line);
313
+ // Parse data rows
314
+ const rows = [];
315
+ let j = startLine + 2;
316
+ for (; j < lineCount; j++) {
317
+ const dataLine = lines[j].trim();
318
+ // Stop at empty line or non-table content (end of table)
319
+ if (dataLine === "" || !isTableLine(dataLine)) {
320
+ break;
321
+ }
322
+ // For piped tables, data rows must also start with `|`
323
+ if (piped && !startsWithPipe(dataLine)) {
324
+ break;
325
+ }
326
+ // Check maxRows limit
327
+ if (opts.maxRows !== undefined && rows.length >= opts.maxRows) {
328
+ // Skip remaining table rows for parseMarkdownAll to correctly advance
329
+ while (j < lineCount) {
330
+ const remaining = lines[j].trim();
331
+ if (remaining === "" || !isTableLine(remaining)) {
332
+ break;
333
+ }
334
+ if (piped && !startsWithPipe(remaining)) {
335
+ break;
336
+ }
337
+ j++;
338
+ }
339
+ break;
340
+ }
341
+ const dataCells = splitRow(dataLine);
342
+ const row = normalizeRow(dataCells, columnCount, opts);
343
+ if (opts.skipEmpty && isEmptyRow(row)) {
344
+ continue;
345
+ }
346
+ rows.push(row);
347
+ }
348
+ return { result: { headers, rows, alignments }, endLine: j };
349
+ }
350
+ // =============================================================================
351
+ // Main Parser
352
+ // =============================================================================
353
+ /**
354
+ * Parse a Markdown table string into structured data.
355
+ *
356
+ * The parser looks for the GFM table pattern:
357
+ * 1. A header row (pipe-delimited cells)
358
+ * 2. A separator row (dashes with optional colons for alignment)
359
+ * 3. Zero or more data rows
360
+ *
361
+ * Non-table content before and after the table is ignored.
362
+ *
363
+ * @param input - Markdown string containing a table
364
+ * @param options - Parse options
365
+ * @returns Parsed table data with headers, rows, and alignments
366
+ *
367
+ * @throws {MarkdownParseError} When no valid table is found in the input
368
+ *
369
+ * @example
370
+ * ```ts
371
+ * // Basic table
372
+ * const result = parseMarkdown("| Name | Age |\n| --- | --- |\n| Alice | 30 |");
373
+ *
374
+ * // With alignment
375
+ * const result = parseMarkdown("| Left | Center | Right |\n|:---|:---:|---:|\n|a|b|c|");
376
+ * // result.alignments = ["left", "center", "right"]
377
+ *
378
+ * // From a larger Markdown document
379
+ * const result = parseMarkdown(markdownDoc); // Finds the first table
380
+ *
381
+ * // With options
382
+ * const result = parseMarkdown(input, { trim: false, maxRows: 100 });
383
+ * ```
384
+ */
385
+ export function parseMarkdown(input, options = {}) {
386
+ const opts = resolveParseOpts(options);
387
+ const lines = input.split(LINEBREAK_REGEX);
388
+ const lineCount = lines.length;
389
+ for (let i = 0; i < lineCount - 1; i++) {
390
+ const parsed = parseTableAt(lines, i, lineCount, opts);
391
+ if (parsed) {
392
+ return parsed.result;
393
+ }
394
+ }
395
+ throw new MarkdownParseError("No valid Markdown table found in input", lineCount > 0 ? lineCount : 1);
396
+ }
397
+ /**
398
+ * Parse all Markdown tables from a document.
399
+ *
400
+ * @param input - Markdown string containing one or more tables
401
+ * @param options - Parse options (maxRows applies per table)
402
+ * @returns Array of parsed tables
403
+ *
404
+ * @example
405
+ * ```ts
406
+ * const tables = parseMarkdownAll(markdownDoc);
407
+ * console.log(`Found ${tables.length} tables`);
408
+ * tables.forEach((t, i) => console.log(`Table ${i}: ${t.headers.join(", ")}`));
409
+ * ```
410
+ */
411
+ export function parseMarkdownAll(input, options = {}) {
412
+ const opts = resolveParseOpts(options);
413
+ const lines = input.split(LINEBREAK_REGEX);
414
+ const lineCount = lines.length;
415
+ const tables = [];
416
+ let i = 0;
417
+ while (i < lineCount - 1) {
418
+ const parsed = parseTableAt(lines, i, lineCount, opts);
419
+ if (parsed) {
420
+ tables.push(parsed.result);
421
+ i = parsed.endLine;
422
+ }
423
+ else {
424
+ i++;
425
+ }
426
+ }
427
+ return tables;
428
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Markdown Types
3
+ *
4
+ * Centralized type definitions for the Markdown module.
5
+ */
6
+ export {};