@cj-tech-master/excelts 7.6.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -577
- package/README_zh.md +101 -577
- package/dist/browser/index.browser.d.ts +3 -0
- package/dist/browser/index.browser.js +2 -0
- package/dist/browser/index.d.ts +3 -0
- package/dist/browser/index.js +2 -0
- package/dist/browser/modules/archive/compression/compress.browser.js +4 -4
- package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/browser/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/browser/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/browser/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/browser/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/browser/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/browser/modules/archive/zip/stream.js +27 -3
- package/dist/browser/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/browser/modules/excel/workbook.browser.js +226 -0
- package/dist/browser/modules/excel/workbook.d.ts +32 -1
- package/dist/browser/modules/excel/workbook.js +47 -2
- package/dist/browser/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/browser/modules/markdown/constants.d.ts +30 -0
- package/dist/browser/modules/markdown/constants.js +30 -0
- package/dist/browser/modules/markdown/errors.d.ts +21 -0
- package/dist/browser/modules/markdown/errors.js +23 -0
- package/dist/browser/modules/markdown/format/index.d.ts +54 -0
- package/dist/browser/modules/markdown/format/index.js +307 -0
- package/dist/browser/modules/markdown/index.d.ts +15 -0
- package/dist/browser/modules/markdown/index.js +22 -0
- package/dist/browser/modules/markdown/parse/index.d.ts +70 -0
- package/dist/browser/modules/markdown/parse/index.js +428 -0
- package/dist/browser/modules/markdown/types.d.ts +130 -0
- package/dist/browser/modules/markdown/types.js +6 -0
- package/dist/cjs/index.js +5 -1
- package/dist/cjs/modules/archive/compression/compress.browser.js +4 -4
- package/dist/cjs/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +15 -2
- package/dist/cjs/modules/archive/compression/streaming-compress.js +8 -0
- package/dist/cjs/modules/archive/zip/stream.js +26 -2
- package/dist/cjs/modules/excel/workbook.browser.js +226 -0
- package/dist/cjs/modules/excel/workbook.js +46 -1
- package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/cjs/modules/markdown/constants.js +33 -0
- package/dist/cjs/modules/markdown/errors.js +28 -0
- package/dist/cjs/modules/markdown/format/index.js +310 -0
- package/dist/cjs/modules/markdown/index.js +30 -0
- package/dist/cjs/modules/markdown/parse/index.js +432 -0
- package/dist/cjs/modules/markdown/types.js +7 -0
- package/dist/esm/index.browser.js +2 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/modules/archive/compression/compress.browser.js +4 -4
- package/dist/esm/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/esm/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/esm/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/esm/modules/archive/zip/stream.js +27 -3
- package/dist/esm/modules/excel/workbook.browser.js +226 -0
- package/dist/esm/modules/excel/workbook.js +47 -2
- package/dist/esm/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/esm/modules/markdown/constants.js +30 -0
- package/dist/esm/modules/markdown/errors.js +23 -0
- package/dist/esm/modules/markdown/format/index.js +307 -0
- package/dist/esm/modules/markdown/index.js +22 -0
- package/dist/esm/modules/markdown/parse/index.js +428 -0
- package/dist/esm/modules/markdown/types.js +6 -0
- package/dist/iife/excelts.iife.js +1342 -283
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +38 -34
- package/dist/types/index.browser.d.ts +3 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/types/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/types/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/types/modules/excel/workbook.d.ts +32 -1
- package/dist/types/modules/markdown/constants.d.ts +30 -0
- package/dist/types/modules/markdown/errors.d.ts +21 -0
- package/dist/types/modules/markdown/format/index.d.ts +54 -0
- package/dist/types/modules/markdown/index.d.ts +15 -0
- package/dist/types/modules/markdown/parse/index.d.ts +70 -0
- package/dist/types/modules/markdown/types.d.ts +130 -0
- package/package.json +56 -32
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Table Parser
|
|
3
|
+
*
|
|
4
|
+
* Parses Markdown tables into structured data.
|
|
5
|
+
*
|
|
6
|
+
* Supports:
|
|
7
|
+
* - Standard GFM (GitHub Flavored Markdown) table syntax
|
|
8
|
+
* - Column alignment detection via separator row
|
|
9
|
+
* - Escaped pipes (`\|`) in cell content
|
|
10
|
+
* - Tables with or without leading/trailing pipes
|
|
11
|
+
* - Tolerant parsing (mismatched column counts, extra whitespace)
|
|
12
|
+
* - Multiline cell content via `<br>` / `<br/>` / `<br />` tags
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```ts
|
|
16
|
+
* const result = parseMarkdown("| Name | Age |\n| --- | --- |\n| Alice | 30 |");
|
|
17
|
+
* // result.headers = ["Name", "Age"]
|
|
18
|
+
* // result.rows = [["Alice", "30"]]
|
|
19
|
+
* // result.alignments = ["none", "none"]
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
import { BR_TAG_REGEX, LINEBREAK_REGEX, UNESCAPE_REGEX } from "../constants.js";
|
|
23
|
+
import { MarkdownParseError } from "../errors.js";
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// Character Codes (avoid repeated charCodeAt comparisons with magic numbers)
|
|
26
|
+
// =============================================================================
|
|
27
|
+
const CH_PIPE = 0x7c; // |
|
|
28
|
+
const CH_BACKSLASH = 0x5c; // \
|
|
29
|
+
const CH_COLON = 0x3a; // :
|
|
30
|
+
const CH_DASH = 0x2d; // -
|
|
31
|
+
const CH_SPACE = 0x20; // space
|
|
32
|
+
const CH_TAB = 0x09; // tab
|
|
33
|
+
function resolveParseOpts(options) {
|
|
34
|
+
return {
|
|
35
|
+
trim: options.trim !== false,
|
|
36
|
+
unescape: options.unescape !== false,
|
|
37
|
+
skipEmpty: options.skipEmptyRows !== false,
|
|
38
|
+
maxRows: options.maxRows,
|
|
39
|
+
convertBr: options.convertBr === true
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
// =============================================================================
|
|
43
|
+
// Internal Helpers
|
|
44
|
+
// =============================================================================
|
|
45
|
+
/**
|
|
46
|
+
* Split a Markdown table row into cell values.
|
|
47
|
+
* Handles escaped pipes (`\|`) correctly by scanning character by character.
|
|
48
|
+
*
|
|
49
|
+
* Optimized: uses start/end index tracking with `slice()` instead of
|
|
50
|
+
* character-by-character string concatenation to avoid O(n²) worst case.
|
|
51
|
+
*/
|
|
52
|
+
function splitRow(line) {
|
|
53
|
+
const cells = [];
|
|
54
|
+
const len = line.length;
|
|
55
|
+
// Skip leading pipe
|
|
56
|
+
let i = len > 0 && line.charCodeAt(0) === CH_PIPE ? 1 : 0;
|
|
57
|
+
// Check for trailing pipe (to exclude it from the last cell)
|
|
58
|
+
// Must count consecutive backslashes before the pipe:
|
|
59
|
+
// even count (0, 2, 4...) → backslashes are all escaped, pipe is real
|
|
60
|
+
// odd count (1, 3, 5...) → last backslash escapes the pipe
|
|
61
|
+
let end = len;
|
|
62
|
+
if (len > 1 && line.charCodeAt(len - 1) === CH_PIPE) {
|
|
63
|
+
let backslashCount = 0;
|
|
64
|
+
let k = len - 2;
|
|
65
|
+
while (k >= 0 && line.charCodeAt(k) === CH_BACKSLASH) {
|
|
66
|
+
backslashCount++;
|
|
67
|
+
k--;
|
|
68
|
+
}
|
|
69
|
+
if (backslashCount % 2 === 0) {
|
|
70
|
+
end = len - 1;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// Track the start of the current cell segment
|
|
74
|
+
// We collect segments (between escape sequences) to minimize allocations
|
|
75
|
+
let segStart = i;
|
|
76
|
+
let current = "";
|
|
77
|
+
let hasEscape = false;
|
|
78
|
+
while (i < end) {
|
|
79
|
+
const ch = line.charCodeAt(i);
|
|
80
|
+
if (ch === CH_BACKSLASH && i + 1 < end) {
|
|
81
|
+
// Escape sequence: flush preceding segment, add escape pair
|
|
82
|
+
hasEscape = true;
|
|
83
|
+
if (i > segStart) {
|
|
84
|
+
current += line.slice(segStart, i);
|
|
85
|
+
}
|
|
86
|
+
current += line.slice(i, i + 2);
|
|
87
|
+
i += 2;
|
|
88
|
+
segStart = i;
|
|
89
|
+
}
|
|
90
|
+
else if (ch === CH_PIPE) {
|
|
91
|
+
// Cell boundary: flush and push
|
|
92
|
+
if (hasEscape) {
|
|
93
|
+
if (i > segStart) {
|
|
94
|
+
current += line.slice(segStart, i);
|
|
95
|
+
}
|
|
96
|
+
cells.push(current);
|
|
97
|
+
current = "";
|
|
98
|
+
hasEscape = false;
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
cells.push(line.slice(segStart, i));
|
|
102
|
+
}
|
|
103
|
+
i++;
|
|
104
|
+
segStart = i;
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
i++;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Push the last cell
|
|
111
|
+
if (hasEscape) {
|
|
112
|
+
if (end > segStart) {
|
|
113
|
+
current += line.slice(segStart, end);
|
|
114
|
+
}
|
|
115
|
+
cells.push(current);
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
cells.push(line.slice(segStart, end));
|
|
119
|
+
}
|
|
120
|
+
return cells;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Determine column alignment from a separator cell.
|
|
124
|
+
*
|
|
125
|
+
* - `:---:` → center
|
|
126
|
+
* - `:---` → left
|
|
127
|
+
* - `---:` → right
|
|
128
|
+
* - `---` → none
|
|
129
|
+
*/
|
|
130
|
+
function parseAlignment(cell) {
|
|
131
|
+
const trimmed = cell.trim();
|
|
132
|
+
const tLen = trimmed.length;
|
|
133
|
+
if (tLen === 0) {
|
|
134
|
+
return "none";
|
|
135
|
+
}
|
|
136
|
+
const leftColon = trimmed.charCodeAt(0) === CH_COLON;
|
|
137
|
+
const rightColon = trimmed.charCodeAt(tLen - 1) === CH_COLON;
|
|
138
|
+
if (leftColon && rightColon) {
|
|
139
|
+
return "center";
|
|
140
|
+
}
|
|
141
|
+
if (leftColon) {
|
|
142
|
+
return "left";
|
|
143
|
+
}
|
|
144
|
+
if (rightColon) {
|
|
145
|
+
return "right";
|
|
146
|
+
}
|
|
147
|
+
return "none";
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Check if a cell string is a valid separator cell.
|
|
151
|
+
* Hand-rolled check replacing regex for better performance.
|
|
152
|
+
* Pattern: optional whitespace, optional colon, one or more dashes, optional colon, optional whitespace.
|
|
153
|
+
*/
|
|
154
|
+
function isSeparatorCell(cell) {
|
|
155
|
+
const len = cell.length;
|
|
156
|
+
let i = 0;
|
|
157
|
+
// Skip leading whitespace
|
|
158
|
+
while (i < len) {
|
|
159
|
+
const ch = cell.charCodeAt(i);
|
|
160
|
+
if (ch !== CH_SPACE && ch !== CH_TAB) {
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
i++;
|
|
164
|
+
}
|
|
165
|
+
// Optional leading colon
|
|
166
|
+
if (i < len && cell.charCodeAt(i) === CH_COLON) {
|
|
167
|
+
i++;
|
|
168
|
+
}
|
|
169
|
+
// At least one dash required
|
|
170
|
+
const dashStart = i;
|
|
171
|
+
while (i < len && cell.charCodeAt(i) === CH_DASH) {
|
|
172
|
+
i++;
|
|
173
|
+
}
|
|
174
|
+
if (i === dashStart) {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
// Optional trailing colon
|
|
178
|
+
if (i < len && cell.charCodeAt(i) === CH_COLON) {
|
|
179
|
+
i++;
|
|
180
|
+
}
|
|
181
|
+
// Skip trailing whitespace
|
|
182
|
+
while (i < len) {
|
|
183
|
+
const ch = cell.charCodeAt(i);
|
|
184
|
+
if (ch !== CH_SPACE && ch !== CH_TAB) {
|
|
185
|
+
return false;
|
|
186
|
+
}
|
|
187
|
+
i++;
|
|
188
|
+
}
|
|
189
|
+
return true;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Check if a line is a valid separator row.
|
|
193
|
+
* A separator row consists entirely of cells matching the pattern `:?-+:?`.
|
|
194
|
+
*/
|
|
195
|
+
function isSeparatorRow(cells) {
|
|
196
|
+
if (cells.length === 0) {
|
|
197
|
+
return false;
|
|
198
|
+
}
|
|
199
|
+
for (let i = 0; i < cells.length; i++) {
|
|
200
|
+
if (!isSeparatorCell(cells[i])) {
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return true;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Process cell content: trim, optionally unescape, and optionally convert `<br>` to newlines.
|
|
208
|
+
*/
|
|
209
|
+
function processCell(value, opts) {
|
|
210
|
+
let result = opts.trim ? value.trim() : value;
|
|
211
|
+
if (opts.unescape) {
|
|
212
|
+
result = result.replace(UNESCAPE_REGEX, "$1");
|
|
213
|
+
}
|
|
214
|
+
if (opts.convertBr) {
|
|
215
|
+
result = result.replace(BR_TAG_REGEX, "\n");
|
|
216
|
+
}
|
|
217
|
+
return result;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Normalize a row to the expected column count.
|
|
221
|
+
* - If row has fewer cells, pad with empty strings
|
|
222
|
+
* - If row has more cells, truncate
|
|
223
|
+
*/
|
|
224
|
+
function normalizeRow(cells, columnCount, opts) {
|
|
225
|
+
const row = new Array(columnCount);
|
|
226
|
+
for (let i = 0; i < columnCount; i++) {
|
|
227
|
+
row[i] = i < cells.length ? processCell(cells[i], opts) : "";
|
|
228
|
+
}
|
|
229
|
+
return row;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Check if a row is empty (all cells are empty strings).
|
|
233
|
+
*/
|
|
234
|
+
function isEmptyRow(row) {
|
|
235
|
+
for (let i = 0; i < row.length; i++) {
|
|
236
|
+
if (row[i] !== "") {
|
|
237
|
+
return false;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Check if a line could be part of a table (contains a pipe character).
|
|
244
|
+
*/
|
|
245
|
+
function isTableLine(line) {
|
|
246
|
+
return line.indexOf("|") !== -1;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Check if a line starts with a pipe (after optional leading whitespace).
|
|
250
|
+
* Used to determine the table's "piped" style for data row validation.
|
|
251
|
+
*/
|
|
252
|
+
function startsWithPipe(line) {
|
|
253
|
+
const len = line.length;
|
|
254
|
+
let i = 0;
|
|
255
|
+
while (i < len) {
|
|
256
|
+
const ch = line.charCodeAt(i);
|
|
257
|
+
if (ch !== CH_SPACE && ch !== CH_TAB) {
|
|
258
|
+
return ch === CH_PIPE;
|
|
259
|
+
}
|
|
260
|
+
i++;
|
|
261
|
+
}
|
|
262
|
+
return false;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Check if a line could be a separator candidate (contains a dash).
|
|
266
|
+
*/
|
|
267
|
+
function hasDash(line) {
|
|
268
|
+
return line.indexOf("-") !== -1;
|
|
269
|
+
}
|
|
270
|
+
// =============================================================================
|
|
271
|
+
// Core Table Parser (shared between parseMarkdown and parseMarkdownAll)
|
|
272
|
+
// =============================================================================
|
|
273
|
+
/**
|
|
274
|
+
* Attempt to parse a table starting at line index `startLine`.
|
|
275
|
+
*
|
|
276
|
+
* Returns `{ result, endLine }` if a valid table starts here, or `null` otherwise.
|
|
277
|
+
*/
|
|
278
|
+
function parseTableAt(lines, startLine, lineCount, opts) {
|
|
279
|
+
if (startLine >= lineCount - 1) {
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
const line = lines[startLine].trim();
|
|
283
|
+
// Skip empty lines and non-table content
|
|
284
|
+
if (line === "" || !isTableLine(line)) {
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
// Candidate header row
|
|
288
|
+
const headerCells = splitRow(line);
|
|
289
|
+
if (headerCells.length < 1) {
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
// Check if the next line is a valid separator row
|
|
293
|
+
const separatorLine = lines[startLine + 1].trim();
|
|
294
|
+
if (separatorLine === "" || !hasDash(separatorLine)) {
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
const separatorCells = splitRow(separatorLine);
|
|
298
|
+
if (!isSeparatorRow(separatorCells)) {
|
|
299
|
+
return null;
|
|
300
|
+
}
|
|
301
|
+
// Valid table found — extract headers and alignments
|
|
302
|
+
const columnCount = headerCells.length;
|
|
303
|
+
const headers = new Array(columnCount);
|
|
304
|
+
const alignments = new Array(columnCount);
|
|
305
|
+
for (let c = 0; c < columnCount; c++) {
|
|
306
|
+
headers[c] = processCell(headerCells[c], opts);
|
|
307
|
+
alignments[c] = c < separatorCells.length ? parseAlignment(separatorCells[c]) : "none";
|
|
308
|
+
}
|
|
309
|
+
// Determine if this is a "piped" table (header starts with `|`).
|
|
310
|
+
// When the header has a leading pipe, data rows must also start with `|`.
|
|
311
|
+
// This prevents prose like "This has a | pipe" from being swallowed as data.
|
|
312
|
+
const piped = startsWithPipe(line);
|
|
313
|
+
// Parse data rows
|
|
314
|
+
const rows = [];
|
|
315
|
+
let j = startLine + 2;
|
|
316
|
+
for (; j < lineCount; j++) {
|
|
317
|
+
const dataLine = lines[j].trim();
|
|
318
|
+
// Stop at empty line or non-table content (end of table)
|
|
319
|
+
if (dataLine === "" || !isTableLine(dataLine)) {
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
// For piped tables, data rows must also start with `|`
|
|
323
|
+
if (piped && !startsWithPipe(dataLine)) {
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
// Check maxRows limit
|
|
327
|
+
if (opts.maxRows !== undefined && rows.length >= opts.maxRows) {
|
|
328
|
+
// Skip remaining table rows for parseMarkdownAll to correctly advance
|
|
329
|
+
while (j < lineCount) {
|
|
330
|
+
const remaining = lines[j].trim();
|
|
331
|
+
if (remaining === "" || !isTableLine(remaining)) {
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
if (piped && !startsWithPipe(remaining)) {
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
j++;
|
|
338
|
+
}
|
|
339
|
+
break;
|
|
340
|
+
}
|
|
341
|
+
const dataCells = splitRow(dataLine);
|
|
342
|
+
const row = normalizeRow(dataCells, columnCount, opts);
|
|
343
|
+
if (opts.skipEmpty && isEmptyRow(row)) {
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
rows.push(row);
|
|
347
|
+
}
|
|
348
|
+
return { result: { headers, rows, alignments }, endLine: j };
|
|
349
|
+
}
|
|
350
|
+
// =============================================================================
|
|
351
|
+
// Main Parser
|
|
352
|
+
// =============================================================================
|
|
353
|
+
/**
|
|
354
|
+
* Parse a Markdown table string into structured data.
|
|
355
|
+
*
|
|
356
|
+
* The parser looks for the GFM table pattern:
|
|
357
|
+
* 1. A header row (pipe-delimited cells)
|
|
358
|
+
* 2. A separator row (dashes with optional colons for alignment)
|
|
359
|
+
* 3. Zero or more data rows
|
|
360
|
+
*
|
|
361
|
+
* Non-table content before and after the table is ignored.
|
|
362
|
+
*
|
|
363
|
+
* @param input - Markdown string containing a table
|
|
364
|
+
* @param options - Parse options
|
|
365
|
+
* @returns Parsed table data with headers, rows, and alignments
|
|
366
|
+
*
|
|
367
|
+
* @throws {MarkdownParseError} When no valid table is found in the input
|
|
368
|
+
*
|
|
369
|
+
* @example
|
|
370
|
+
* ```ts
|
|
371
|
+
* // Basic table
|
|
372
|
+
* const result = parseMarkdown("| Name | Age |\n| --- | --- |\n| Alice | 30 |");
|
|
373
|
+
*
|
|
374
|
+
* // With alignment
|
|
375
|
+
* const result = parseMarkdown("| Left | Center | Right |\n|:---|:---:|---:|\n|a|b|c|");
|
|
376
|
+
* // result.alignments = ["left", "center", "right"]
|
|
377
|
+
*
|
|
378
|
+
* // From a larger Markdown document
|
|
379
|
+
* const result = parseMarkdown(markdownDoc); // Finds the first table
|
|
380
|
+
*
|
|
381
|
+
* // With options
|
|
382
|
+
* const result = parseMarkdown(input, { trim: false, maxRows: 100 });
|
|
383
|
+
* ```
|
|
384
|
+
*/
|
|
385
|
+
export function parseMarkdown(input, options = {}) {
|
|
386
|
+
const opts = resolveParseOpts(options);
|
|
387
|
+
const lines = input.split(LINEBREAK_REGEX);
|
|
388
|
+
const lineCount = lines.length;
|
|
389
|
+
for (let i = 0; i < lineCount - 1; i++) {
|
|
390
|
+
const parsed = parseTableAt(lines, i, lineCount, opts);
|
|
391
|
+
if (parsed) {
|
|
392
|
+
return parsed.result;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
throw new MarkdownParseError("No valid Markdown table found in input", lineCount > 0 ? lineCount : 1);
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Parse all Markdown tables from a document.
|
|
399
|
+
*
|
|
400
|
+
* @param input - Markdown string containing one or more tables
|
|
401
|
+
* @param options - Parse options (maxRows applies per table)
|
|
402
|
+
* @returns Array of parsed tables
|
|
403
|
+
*
|
|
404
|
+
* @example
|
|
405
|
+
* ```ts
|
|
406
|
+
* const tables = parseMarkdownAll(markdownDoc);
|
|
407
|
+
* console.log(`Found ${tables.length} tables`);
|
|
408
|
+
* tables.forEach((t, i) => console.log(`Table ${i}: ${t.headers.join(", ")}`));
|
|
409
|
+
* ```
|
|
410
|
+
*/
|
|
411
|
+
export function parseMarkdownAll(input, options = {}) {
|
|
412
|
+
const opts = resolveParseOpts(options);
|
|
413
|
+
const lines = input.split(LINEBREAK_REGEX);
|
|
414
|
+
const lineCount = lines.length;
|
|
415
|
+
const tables = [];
|
|
416
|
+
let i = 0;
|
|
417
|
+
while (i < lineCount - 1) {
|
|
418
|
+
const parsed = parseTableAt(lines, i, lineCount, opts);
|
|
419
|
+
if (parsed) {
|
|
420
|
+
tables.push(parsed.result);
|
|
421
|
+
i = parsed.endLine;
|
|
422
|
+
}
|
|
423
|
+
else {
|
|
424
|
+
i++;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
return tables;
|
|
428
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Types
|
|
3
|
+
*
|
|
4
|
+
* Centralized type definitions for the Markdown module.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Column alignment in a Markdown table.
|
|
8
|
+
* Determined by colon placement in the separator row:
|
|
9
|
+
* - `:---` = left
|
|
10
|
+
* - `:---:` = center
|
|
11
|
+
* - `---:` = right
|
|
12
|
+
* - `---` = none (defaults to left in most renderers)
|
|
13
|
+
*/
|
|
14
|
+
export type MarkdownAlignment = "left" | "center" | "right" | "none";
|
|
15
|
+
/**
|
|
16
|
+
* Result of parsing a Markdown table.
|
|
17
|
+
*/
|
|
18
|
+
export interface MarkdownParseResult {
|
|
19
|
+
/** Parsed header row (column names) */
|
|
20
|
+
headers: string[];
|
|
21
|
+
/** Parsed data rows (each row is an array of cell values) */
|
|
22
|
+
rows: string[][];
|
|
23
|
+
/** Column alignments extracted from the separator row */
|
|
24
|
+
alignments: MarkdownAlignment[];
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Markdown parsing options.
|
|
28
|
+
*/
|
|
29
|
+
export interface MarkdownParseOptions {
|
|
30
|
+
/**
|
|
31
|
+
* Trim whitespace from cell values.
|
|
32
|
+
* @default true
|
|
33
|
+
*/
|
|
34
|
+
trim?: boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Unescape Markdown escape sequences in cell values (e.g. `\|` -> `|`).
|
|
37
|
+
* @default true
|
|
38
|
+
*/
|
|
39
|
+
unescape?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Skip empty rows in the output.
|
|
42
|
+
* @default true
|
|
43
|
+
*/
|
|
44
|
+
skipEmptyRows?: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* Maximum number of data rows to parse (excludes header).
|
|
47
|
+
* Useful for previewing large tables.
|
|
48
|
+
*/
|
|
49
|
+
maxRows?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Convert `<br>`, `<br/>`, `<br />` tags in cell content to newline characters.
|
|
52
|
+
* Useful for round-tripping multiline cell content through Markdown.
|
|
53
|
+
* @default false
|
|
54
|
+
*/
|
|
55
|
+
convertBr?: boolean;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Column configuration for Markdown formatting.
|
|
59
|
+
*/
|
|
60
|
+
export interface MarkdownColumnConfig {
|
|
61
|
+
/** Column header text */
|
|
62
|
+
header: string;
|
|
63
|
+
/**
|
|
64
|
+
* Column alignment.
|
|
65
|
+
* @default "left"
|
|
66
|
+
*/
|
|
67
|
+
alignment?: MarkdownAlignment;
|
|
68
|
+
/**
|
|
69
|
+
* Minimum column width (in characters, excluding padding).
|
|
70
|
+
* The actual width will be the maximum of this value and the widest cell content.
|
|
71
|
+
* @default 3 (minimum for separator `---`)
|
|
72
|
+
*/
|
|
73
|
+
minWidth?: number;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Markdown formatting options.
|
|
77
|
+
*/
|
|
78
|
+
export interface MarkdownFormatOptions {
|
|
79
|
+
/**
|
|
80
|
+
* Column configuration. When provided, overrides auto-detected headers and alignment.
|
|
81
|
+
* Can be an array of strings (header names) or MarkdownColumnConfig objects.
|
|
82
|
+
*/
|
|
83
|
+
columns?: (string | MarkdownColumnConfig)[];
|
|
84
|
+
/**
|
|
85
|
+
* Default alignment for columns without explicit alignment.
|
|
86
|
+
* @default "left"
|
|
87
|
+
*/
|
|
88
|
+
alignment?: MarkdownAlignment;
|
|
89
|
+
/**
|
|
90
|
+
* Align columns to equal width by padding cell content with spaces.
|
|
91
|
+
* When false, disables width-alignment padding but retains the single space
|
|
92
|
+
* around cell content required by most Markdown renderers (`| value |`).
|
|
93
|
+
* @default true
|
|
94
|
+
*/
|
|
95
|
+
padding?: boolean;
|
|
96
|
+
/**
|
|
97
|
+
* Include a trailing newline at the end of the output.
|
|
98
|
+
* @default true
|
|
99
|
+
*/
|
|
100
|
+
trailingNewline?: boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Escape pipe characters (`|`) and backslashes (`\`) in cell content.
|
|
103
|
+
* @default true
|
|
104
|
+
*/
|
|
105
|
+
escapeContent?: boolean;
|
|
106
|
+
/**
|
|
107
|
+
* Custom value-to-string converter.
|
|
108
|
+
* Called for each cell value before formatting.
|
|
109
|
+
* Return value is used as the cell text.
|
|
110
|
+
*/
|
|
111
|
+
stringify?: (value: unknown) => string;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Unified Markdown options for Workbook integration.
|
|
115
|
+
* Combines parse and format options with worksheet-specific settings.
|
|
116
|
+
*/
|
|
117
|
+
export interface MarkdownOptions extends MarkdownParseOptions, MarkdownFormatOptions {
|
|
118
|
+
/** Name of the worksheet to read from or write to */
|
|
119
|
+
sheetName?: string;
|
|
120
|
+
/** ID of the worksheet to read from or write to */
|
|
121
|
+
sheetId?: number;
|
|
122
|
+
/** Custom value mapper for parsing (MD string -> cell value) */
|
|
123
|
+
map?: (value: string, column: number) => unknown;
|
|
124
|
+
/** Date format string for formatting Date values */
|
|
125
|
+
dateFormat?: string;
|
|
126
|
+
/** Use UTC for date formatting */
|
|
127
|
+
dateUTC?: boolean;
|
|
128
|
+
/** Include empty rows in output */
|
|
129
|
+
includeEmptyRows?: boolean;
|
|
130
|
+
}
|
package/dist/cjs/index.js
CHANGED
|
@@ -18,7 +18,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
18
18
|
};
|
|
19
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
20
|
exports.PdfRenderError = exports.PdfError = exports.PageSizes = exports.excelToPdf = exports.pdf = exports.uint8ArrayToString = exports.stringToUint8Array = exports.toUint8Array = exports.concatUint8Arrays = exports.getRootCause = exports.getErrorChain = exports.errorToJSON = exports.toError = exports.BaseError = exports.getSupportedFormats = exports.DateFormatter = exports.DateParser = exports.xmlDecode = exports.xmlEncode = exports.uint8ArrayToBase64 = exports.base64ToUint8Array = exports.excelToDate = exports.dateToExcel = exports.encodeRange = exports.decodeRange = exports.encodeCell = exports.decodeCell = exports.encodeRow = exports.decodeRow = exports.encodeCol = exports.decodeCol = exports.DefinedNames = exports.createCsvFormatterStream = exports.createCsvParserStream = exports.CsvFormatterStream = exports.CsvParserStream = exports.WorksheetReader = exports.WorksheetWriter = exports.WorkbookReader = exports.WorkbookWriter = exports.FormCheckbox = exports.DataValidations = exports.Table = exports.Image = exports.Range = exports.Cell = exports.Column = exports.Row = exports.Worksheet = exports.Workbook = void 0;
|
|
21
|
-
exports.MaxItemsExceededError = exports.ImageError = exports.TableError = exports.PivotTableError = exports.WorksheetNameError = exports.XmlParseError = exports.InvalidValueTypeError = exports.MergeConflictError = exports.RowOutOfBoundsError = exports.ColumnOutOfBoundsError = exports.InvalidAddressError = exports.ExcelStreamStateError = exports.ExcelNotSupportedError = exports.ExcelDownloadError = exports.ExcelFileError = exports.isExcelError = exports.ExcelError = exports.isPdfError = exports.PdfStructureError = exports.PdfFontError = void 0;
|
|
21
|
+
exports.MarkdownParseError = exports.MarkdownError = exports.MaxItemsExceededError = exports.ImageError = exports.TableError = exports.PivotTableError = exports.WorksheetNameError = exports.XmlParseError = exports.InvalidValueTypeError = exports.MergeConflictError = exports.RowOutOfBoundsError = exports.ColumnOutOfBoundsError = exports.InvalidAddressError = exports.ExcelStreamStateError = exports.ExcelNotSupportedError = exports.ExcelDownloadError = exports.ExcelFileError = exports.isExcelError = exports.ExcelError = exports.isPdfError = exports.PdfStructureError = exports.PdfFontError = void 0;
|
|
22
22
|
var workbook_1 = require("./modules/excel/workbook.js");
|
|
23
23
|
Object.defineProperty(exports, "Workbook", { enumerable: true, get: function () { return workbook_1.Workbook; } });
|
|
24
24
|
var worksheet_1 = require("./modules/excel/worksheet.js");
|
|
@@ -147,3 +147,7 @@ Object.defineProperty(exports, "PivotTableError", { enumerable: true, get: funct
|
|
|
147
147
|
Object.defineProperty(exports, "TableError", { enumerable: true, get: function () { return errors_2.TableError; } });
|
|
148
148
|
Object.defineProperty(exports, "ImageError", { enumerable: true, get: function () { return errors_2.ImageError; } });
|
|
149
149
|
Object.defineProperty(exports, "MaxItemsExceededError", { enumerable: true, get: function () { return errors_2.MaxItemsExceededError; } });
|
|
150
|
+
// Markdown errors
|
|
151
|
+
var errors_3 = require("./modules/markdown/errors.js");
|
|
152
|
+
Object.defineProperty(exports, "MarkdownError", { enumerable: true, get: function () { return errors_3.MarkdownError; } });
|
|
153
|
+
Object.defineProperty(exports, "MarkdownParseError", { enumerable: true, get: function () { return errors_3.MarkdownParseError; } });
|
|
@@ -134,7 +134,7 @@ async function processWithStrategy(strategy, data, options) {
|
|
|
134
134
|
});
|
|
135
135
|
}
|
|
136
136
|
// Fallback to pure JS implementation.
|
|
137
|
-
return strategy.jsFallback(data);
|
|
137
|
+
return strategy.jsFallback(data, options.level);
|
|
138
138
|
}
|
|
139
139
|
// =============================================================================
|
|
140
140
|
// Public API
|
|
@@ -159,7 +159,7 @@ function compressSync(data, options = {}) {
|
|
|
159
159
|
if (level === 0) {
|
|
160
160
|
return data;
|
|
161
161
|
}
|
|
162
|
-
return (0, deflate_fallback_1.deflateRawCompressed)(data);
|
|
162
|
+
return (0, deflate_fallback_1.deflateRawCompressed)(data, level);
|
|
163
163
|
}
|
|
164
164
|
/**
|
|
165
165
|
* Decompress data using browser's native DecompressionStream or JS fallback
|
|
@@ -272,7 +272,7 @@ async function gzip(data, options = {}) {
|
|
|
272
272
|
*/
|
|
273
273
|
function gzipSync(data, options = {}) {
|
|
274
274
|
const level = options.level ?? defaults_1.DEFAULT_COMPRESS_LEVEL;
|
|
275
|
-
const deflated = level === 0 ? (0, deflate_fallback_1.deflateRawStore)(data) : (0, deflate_fallback_1.deflateRawCompressed)(data);
|
|
275
|
+
const deflated = level === 0 ? (0, deflate_fallback_1.deflateRawStore)(data) : (0, deflate_fallback_1.deflateRawCompressed)(data, level);
|
|
276
276
|
return wrapGzip(deflated, data);
|
|
277
277
|
}
|
|
278
278
|
/**
|
|
@@ -348,7 +348,7 @@ async function zlib(data, options = {}) {
|
|
|
348
348
|
*/
|
|
349
349
|
function zlibSync(data, options = {}) {
|
|
350
350
|
const level = options.level ?? defaults_1.DEFAULT_COMPRESS_LEVEL;
|
|
351
|
-
const deflated = level === 0 ? (0, deflate_fallback_1.deflateRawStore)(data) : (0, deflate_fallback_1.deflateRawCompressed)(data);
|
|
351
|
+
const deflated = level === 0 ? (0, deflate_fallback_1.deflateRawStore)(data) : (0, deflate_fallback_1.deflateRawCompressed)(data, level);
|
|
352
352
|
return wrapZlib(deflated, data, level);
|
|
353
353
|
}
|
|
354
354
|
/**
|