xlsx-to-markdown 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +300 -0
- package/dist/__tests__/helpers.d.ts +16 -0
- package/dist/__tests__/helpers.d.ts.map +1 -0
- package/dist/__tests__/helpers.js +63 -0
- package/dist/__tests__/helpers.js.map +1 -0
- package/dist/cell-formatter.d.ts +16 -0
- package/dist/cell-formatter.d.ts.map +1 -0
- package/dist/cell-formatter.js +257 -0
- package/dist/cell-formatter.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +129 -0
- package/dist/index.js.map +1 -0
- package/dist/options.d.ts +3 -0
- package/dist/options.d.ts.map +1 -0
- package/dist/options.js +20 -0
- package/dist/options.js.map +1 -0
- package/dist/paragraph-renderer.d.ts +14 -0
- package/dist/paragraph-renderer.d.ts.map +1 -0
- package/dist/paragraph-renderer.js +85 -0
- package/dist/paragraph-renderer.js.map +1 -0
- package/dist/region-detector.d.ts +19 -0
- package/dist/region-detector.d.ts.map +1 -0
- package/dist/region-detector.js +192 -0
- package/dist/region-detector.js.map +1 -0
- package/dist/sheet-converter.d.ts +7 -0
- package/dist/sheet-converter.d.ts.map +1 -0
- package/dist/sheet-converter.js +299 -0
- package/dist/sheet-converter.js.map +1 -0
- package/dist/table-renderer.d.ts +22 -0
- package/dist/table-renderer.d.ts.map +1 -0
- package/dist/table-renderer.js +236 -0
- package/dist/table-renderer.js.map +1 -0
- package/dist/types.d.ts +172 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +62 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 khatada
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# xlsx-to-markdown
|
|
2
|
+
|
|
3
|
+
Node.js / TypeScript library that converts XLSX files to Markdown.
|
|
4
|
+
|
|
5
|
+
Mixed content is handled automatically — paragraphs of text and tables can coexist on the same sheet, in any order. Multiple tables per sheet (including side-by-side tables) are supported.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install xlsx-to-markdown
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
import { convertXlsxToMarkdown } from 'xlsx-to-markdown';
|
|
17
|
+
|
|
18
|
+
const result = await convertXlsxToMarkdown('report.xlsx');
|
|
19
|
+
console.log(result.markdown);
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## API
|
|
23
|
+
|
|
24
|
+
### `convertXlsxToMarkdown(input, options?)`
|
|
25
|
+
|
|
26
|
+
Reads a file from disk (path string) or an in-memory buffer and returns a `ConvertResult`.
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
// From file path
|
|
30
|
+
const result = await convertXlsxToMarkdown('report.xlsx');
|
|
31
|
+
|
|
32
|
+
// From Buffer / Uint8Array
|
|
33
|
+
const buffer = await fs.promises.readFile('report.xlsx');
|
|
34
|
+
const result = await convertXlsxToMarkdown(buffer);
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### `convertWorkbook(workbook, options?)`
|
|
38
|
+
|
|
39
|
+
Converts an already-parsed SheetJS `WorkBook` object. Useful when you manage the SheetJS lifecycle yourself.
|
|
40
|
+
|
|
41
|
+
```ts
|
|
42
|
+
import * as XLSX from 'xlsx';
|
|
43
|
+
import { convertWorkbook } from 'xlsx-to-markdown';
|
|
44
|
+
|
|
45
|
+
const wb = XLSX.readFile('report.xlsx', { cellStyles: true });
|
|
46
|
+
const result = convertWorkbook(wb);
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Return value — `ConvertResult`
|
|
50
|
+
|
|
51
|
+
| Property | Type | Description |
|
|
52
|
+
| --- | --- | --- |
|
|
53
|
+
| `markdown` | `string` | Combined Markdown for all selected sheets |
|
|
54
|
+
| `sheets` | `SheetResult[]` | Per-sheet breakdown |
|
|
55
|
+
|
|
56
|
+
Each `SheetResult` contains:
|
|
57
|
+
|
|
58
|
+
| Property | Type | Description |
|
|
59
|
+
| --- | --- | --- |
|
|
60
|
+
| `name` | `string` | Sheet name |
|
|
61
|
+
| `index` | `number` | 0-based sheet index in the workbook |
|
|
62
|
+
| `markdown` | `string` | Markdown for this sheet only |
|
|
63
|
+
| `regions` | `Region[]` | Detected content regions with type and rendered Markdown |
|
|
64
|
+
|
|
65
|
+
Each `Region` contains `type` (`"table"` \| `"paragraph"`), row/column bounds, and `markdown`.
|
|
66
|
+
|
|
67
|
+
## Options
|
|
68
|
+
|
|
69
|
+
```ts
|
|
70
|
+
const result = await convertXlsxToMarkdown('report.xlsx', {
|
|
71
|
+
sheets: ['Summary', 'Detail'], // include only these sheets (name or 0-based index)
|
|
72
|
+
sheetHeadings: true, // prepend "## Sheet Name" before each sheet
|
|
73
|
+
headerRow: true, // treat first row of every table as a header
|
|
74
|
+
tableDetection: {
|
|
75
|
+
minColumns: 2, // minimum columns to classify a region as a table
|
|
76
|
+
minRows: 2, // minimum rows to classify a region as a table
|
|
77
|
+
useBorders: true, // use cell borders as additional table hints
|
|
78
|
+
},
|
|
79
|
+
richText: true, // convert bold/italic/hyperlinks to inline markup
|
|
80
|
+
emptyCell: '', // placeholder for empty table cells
|
|
81
|
+
dateFormat: 'YYYY-MM-DD', // date format tokens: YYYY MM DD HH mm ss
|
|
82
|
+
blankLinesBetweenRegions: 1, // blank lines inserted between regions
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Option defaults
|
|
87
|
+
|
|
88
|
+
| Option | Default | Notes |
|
|
89
|
+
| --- | --- | --- |
|
|
90
|
+
| `sheets` | all sheets | |
|
|
91
|
+
| `sheetHeadings` | `"auto"` | Headings added automatically when the workbook has >1 sheet |
|
|
92
|
+
| `headerRow` | `true` | |
|
|
93
|
+
| `tableDetection.minColumns` | `2` | |
|
|
94
|
+
| `tableDetection.minRows` | `2` | |
|
|
95
|
+
| `tableDetection.useBorders` | `true` | |
|
|
96
|
+
| `richText` | `true` | |
|
|
97
|
+
| `emptyCell` | `""` | |
|
|
98
|
+
| `dateFormat` | `"YYYY-MM-DD"` | |
|
|
99
|
+
| `blankLinesBetweenRegions` | `1` | |
|
|
100
|
+
|
|
101
|
+
## Content Detection
|
|
102
|
+
|
|
103
|
+
The library uses a recursive row→column→row scan to classify each block of cells as a **table** or **paragraph**.
|
|
104
|
+
|
|
105
|
+
### Detection algorithm
|
|
106
|
+
|
|
107
|
+
1. **Row scan** — split the sheet into bands of consecutive non-empty rows (empty rows act as separators)
|
|
108
|
+
2. **Column scan** — within each band, find columns that are entirely empty and use them as split points → column sub-ranges
|
|
109
|
+
3. **Recurse** — each column sub-range is processed independently by the same algorithm
|
|
110
|
+
4. **Classify** — a band that cannot be split further is classified:
|
|
111
|
+
- **Table** — every row has `≥ minColumns` filled cells, and the band spans `≥ minRows` rows
|
|
112
|
+
- **Paragraph** — everything else
|
|
113
|
+
|
|
114
|
+
When `useBorders: true` (default), an empty cell that has both a left and a right border is counted as "filled" — this keeps bordered-but-valueless table cells from breaking table detection. **Exception**: if every cell in a column within a band is blank *and* has no top or bottom border, the column is treated as empty regardless of left/right borders. This ensures that a separator column between two side-by-side tables is still recognised as a gap even when it carries border styles from the adjacent tables.
|
|
115
|
+
|
|
116
|
+
### Side-by-side tables
|
|
117
|
+
|
|
118
|
+
Tables placed horizontally on the same rows (separated by at least one empty column) are detected as independent regions:
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
A B C E F G
|
|
122
|
+
1 Name Score Grade Item Qty Price ← two separate headers
|
|
123
|
+
2 Alice 90 A Apple 5 100
|
|
124
|
+
3 Bob 75 B Banana 3 60
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Column D is empty → detected as two tables (A–C and E–G), each rendered as its own `<table>`.
|
|
128
|
+
|
|
129
|
+
The separator column may carry left/right border styles from the adjacent table formatting. As long as it has no top or bottom border and no values anywhere within the row-band, it is still treated as an empty gap.
|
|
130
|
+
|
|
131
|
+
### Mixed content example
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
Row 1: "Section 1: Introduction" ← paragraph
|
|
135
|
+
Row 2: (empty)
|
|
136
|
+
Row 3: Name | Department | Salary ← table header
|
|
137
|
+
Row 4: Alice | Engineering| 800,000
|
|
138
|
+
Row 5: Bob | Marketing | 650,000
|
|
139
|
+
Row 6: (empty)
|
|
140
|
+
Row 7: "* Figures are in JPY" ← paragraph
|
|
141
|
+
Row 8: (empty)
|
|
142
|
+
Row 9: Q1 | Q2 ← second table
|
|
143
|
+
Row 10: 1,200 | 1,450
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Output:
|
|
147
|
+
|
|
148
|
+
```markdown
|
|
149
|
+
Section 1: Introduction
|
|
150
|
+
|
|
151
|
+
<table>
|
|
152
|
+
<tr><th>Name</th><th>Department</th><th style="text-align: right">Salary</th></tr>
|
|
153
|
+
<tr><td>Alice</td><td>Engineering</td><td style="text-align: right">800,000</td></tr>
|
|
154
|
+
<tr><td>Bob</td><td>Marketing</td><td style="text-align: right">650,000</td></tr>
|
|
155
|
+
</table>
|
|
156
|
+
|
|
157
|
+
* Figures are in JPY
|
|
158
|
+
|
|
159
|
+
<table>
|
|
160
|
+
<tr><th style="text-align: right">Q1</th><th style="text-align: right">Q2</th></tr>
|
|
161
|
+
<tr><td style="text-align: right">1,200</td><td style="text-align: right">1,450</td></tr>
|
|
162
|
+
</table>
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Recognized table patterns
|
|
166
|
+
|
|
167
|
+
The table below summarises which layouts are detected as a table and which fall back to a paragraph.
|
|
168
|
+
|
|
169
|
+
| Excel layout | Detected as | Reason |
|
|
170
|
+
| --- | --- | --- |
|
|
171
|
+
| 2+ columns × 2+ rows of data | **table** | Meets `minColumns` and `minRows` thresholds |
|
|
172
|
+
| Single column of text | **paragraph** | Below `minColumns` (default 2) |
|
|
173
|
+
| Single row of data | **paragraph** | Below `minRows` (default 2) |
|
|
174
|
+
| Every row has colspan spanning all columns | **paragraph** | Each row renders as one cell — no tabular structure |
|
|
175
|
+
| Header row has colspan, data rows have multiple cells | **table** | At least one row has 2+ visible cells |
|
|
176
|
+
| Cells with `rowspan` spanning multiple rows | **table** | Rendered as `<td rowspan="N">` with no layout breakage |
|
|
177
|
+
| Two blocks separated by an empty column | **two tables** | Column gap triggers independent region detection |
|
|
178
|
+
|
|
179
|
+
#### Pattern: full-width colspan in all rows → paragraph
|
|
180
|
+
|
|
181
|
+
When every row in a region is merged across all columns (e.g. a block of title-style cells), the region has no relational structure and is rendered as a paragraph instead of an HTML table.
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
A B C
|
|
185
|
+
1 [ Title spanning A:C ] ← colspan=3
|
|
186
|
+
2 [ Subtitle spanning A:C ] ← colspan=3
|
|
187
|
+
3 [ Content spanning A:C ] ← colspan=3
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Output:
|
|
191
|
+
|
|
192
|
+
```markdown
|
|
193
|
+
Title
|
|
194
|
+
|
|
195
|
+
Subtitle
|
|
196
|
+
|
|
197
|
+
Content
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
#### Pattern: merged header row + normal data rows → table
|
|
201
|
+
|
|
202
|
+
A full-width merged header (colspan) in the first row is fine as long as at least one data row has multiple cells.
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
A B C
|
|
206
|
+
1 [ Report Title ] ← colspan=3
|
|
207
|
+
2 Name Score Grade ← 3 normal cells
|
|
208
|
+
3 Alice 90 A
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Output:
|
|
212
|
+
|
|
213
|
+
```html
|
|
214
|
+
<table>
|
|
215
|
+
<tr>
|
|
216
|
+
<th colspan="3">Report Title</th>
|
|
217
|
+
</tr>
|
|
218
|
+
<tr>
|
|
219
|
+
<td>Name</td>
|
|
220
|
+
<td style="text-align: right">Score</td>
|
|
221
|
+
<td>Grade</td>
|
|
222
|
+
</tr>
|
|
223
|
+
<tr>
|
|
224
|
+
<td>Alice</td>
|
|
225
|
+
<td style="text-align: right">90</td>
|
|
226
|
+
<td>A</td>
|
|
227
|
+
</tr>
|
|
228
|
+
</table>
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### Pattern: rowspan across rows → table
|
|
232
|
+
|
|
233
|
+
Cells with `rowspan` are rendered using the `rowspan` attribute. Because `<thead>`/`<tbody>` are not emitted, a `<th rowspan="N">` that visually spans into data rows does not cause layout breakage.
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
A B C
|
|
237
|
+
1 Name [ Period (B:C) ] ← B1:C1 colspan=2
|
|
238
|
+
2 Alice Q1 ← A2:A3 rowspan=2
|
|
239
|
+
3 Q2
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Output:
|
|
243
|
+
|
|
244
|
+
```html
|
|
245
|
+
<table>
|
|
246
|
+
<tr>
|
|
247
|
+
<th>Name</th>
|
|
248
|
+
<th colspan="2">Period</th>
|
|
249
|
+
</tr>
|
|
250
|
+
<tr>
|
|
251
|
+
<td rowspan="2">Alice</td>
|
|
252
|
+
<td>Q1</td>
|
|
253
|
+
</tr>
|
|
254
|
+
<tr>
|
|
255
|
+
<td>Q2</td>
|
|
256
|
+
</tr>
|
|
257
|
+
</table>
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Rich Text
|
|
261
|
+
|
|
262
|
+
When `richText: true` (default), cell formatting is converted:
|
|
263
|
+
|
|
264
|
+
| Excel format | Table output (HTML) | Paragraph output (Markdown) |
|
|
265
|
+
| --- | --- | --- |
|
|
266
|
+
| Bold | `<strong>text</strong>` | `**text**` |
|
|
267
|
+
| Italic | `<em>text</em>` | `_text_` |
|
|
268
|
+
| Bold + Italic | `<strong><em>text</em></strong>` | `***text***` |
|
|
269
|
+
| Hyperlink | `<a href="url">text</a>` | `[text](url)` |
|
|
270
|
+
|
|
271
|
+
## Table Formatting Details
|
|
272
|
+
|
|
273
|
+
Tables are output as HTML (`<table>`) to support all Excel features:
|
|
274
|
+
|
|
275
|
+
- **Merged cells** — `colspan` and `rowspan` attributes are set on the master (top-left) cell; child cells are omitted entirely.
|
|
276
|
+
- **Header row** — rendered as `<th>` elements when `headerRow: true`.
|
|
277
|
+
- **Column alignment** — columns whose data cells are all numeric (`cell.t === "n"`) are automatically right-aligned (`style="text-align: right"`), including currency-formatted values such as `¥1,000` or `$100`. Explicit cell alignment takes precedence.
|
|
278
|
+
- **Newlines within cells** — converted to `<br>`.
|
|
279
|
+
- **HTML escaping** — `&`, `<`, `>`, `"` in cell values are escaped to HTML entities.
|
|
280
|
+
- **Formula cells** — the computed value is used; the formula string is never output.
|
|
281
|
+
|
|
282
|
+
## Architecture Decision Records
|
|
283
|
+
|
|
284
|
+
Design decisions are documented in [`docs/adr/`](docs/adr/).
|
|
285
|
+
|
|
286
|
+
## Development
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
npm install
|
|
290
|
+
npm test # run all tests (vitest)
|
|
291
|
+
npm run build # compile TypeScript → dist/
|
|
292
|
+
npm run lint # oxlint
|
|
293
|
+
npm run fmt # oxfmt
|
|
294
|
+
npm run fmt:check # check formatting (CI)
|
|
295
|
+
npm run secretlint # scan for secrets
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## License
|
|
299
|
+
|
|
300
|
+
MIT
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import * as XLSX from "xlsx";
|
|
2
|
+
/**
|
|
3
|
+
* Build a simple WorkBook from a 2-D array of cell values.
|
|
4
|
+
* Row/column indices are 0-based.
|
|
5
|
+
* Pass `undefined` for empty cells.
|
|
6
|
+
*/
|
|
7
|
+
export declare function buildWorkbook(sheets: {
|
|
8
|
+
name: string;
|
|
9
|
+
data: (string | number | undefined)[][];
|
|
10
|
+
}[]): XLSX.WorkBook;
|
|
11
|
+
/**
|
|
12
|
+
* Normalise whitespace/newlines in markdown output so test assertions are
|
|
13
|
+
* not fragile against trailing spaces.
|
|
14
|
+
*/
|
|
15
|
+
export declare function normalise(md: string): string;
|
|
16
|
+
//# sourceMappingURL=helpers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../src/__tests__/helpers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,CAAC,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC,EAAE,EAAE,CAAA;CAAE,EAAE,GAClE,IAAI,CAAC,QAAQ,CAOf;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAM5C"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.buildWorkbook = buildWorkbook;
|
|
37
|
+
exports.normalise = normalise;
|
|
38
|
+
const XLSX = __importStar(require("xlsx"));
|
|
39
|
+
/**
|
|
40
|
+
* Build a simple WorkBook from a 2-D array of cell values.
|
|
41
|
+
* Row/column indices are 0-based.
|
|
42
|
+
* Pass `undefined` for empty cells.
|
|
43
|
+
*/
|
|
44
|
+
function buildWorkbook(sheets) {
|
|
45
|
+
const wb = XLSX.utils.book_new();
|
|
46
|
+
for (const { name, data } of sheets) {
|
|
47
|
+
const ws = XLSX.utils.aoa_to_sheet(data.map((row) => row.map((v) => v ?? null)));
|
|
48
|
+
XLSX.utils.book_append_sheet(wb, ws, name);
|
|
49
|
+
}
|
|
50
|
+
return wb;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Normalise whitespace/newlines in markdown output so test assertions are
|
|
54
|
+
* not fragile against trailing spaces.
|
|
55
|
+
*/
|
|
56
|
+
function normalise(md) {
|
|
57
|
+
return md
|
|
58
|
+
.split("\n")
|
|
59
|
+
.map((l) => l.trimEnd())
|
|
60
|
+
.join("\n")
|
|
61
|
+
.trim();
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=helpers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../src/__tests__/helpers.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAOA,sCASC;AAMD,8BAMC;AA5BD,2CAA6B;AAE7B;;;;GAIG;AACH,SAAgB,aAAa,CAC3B,MAAmE;IAEnE,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;IACjC,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QACpC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAgB,SAAS,CAAC,EAAU;IAClC,OAAO,EAAE;SACN,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACvB,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import * as XLSX from "xlsx";
|
|
2
|
+
import type { CellData, ResolvedOptions } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Escape pipe and backslash characters inside a GFM table cell value.
|
|
5
|
+
* @deprecated Not used in HTML table rendering; kept for potential external use.
|
|
6
|
+
*/
|
|
7
|
+
export declare function escapeTableCell(value: string): string;
|
|
8
|
+
/**
|
|
9
|
+
* Escape HTML special characters for safe embedding in HTML attributes and text.
|
|
10
|
+
*/
|
|
11
|
+
export declare function escapeHtml(value: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Extract a CellData object from an xlsx cell.
|
|
14
|
+
*/
|
|
15
|
+
export declare function extractCellData(cell: XLSX.CellObject | undefined, mergedChildCells: Set<string>, cellAddress: string, opts: ResolvedOptions): CellData;
|
|
16
|
+
//# sourceMappingURL=cell-formatter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cell-formatter.d.ts","sourceRoot":"","sources":["../src/cell-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,QAAQ,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAqE5D;;;GAGG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMhD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,SAAS,EACjC,gBAAgB,EAAE,GAAG,CAAC,MAAM,CAAC,EAC7B,WAAW,EAAE,MAAM,EACnB,IAAI,EAAE,eAAe,GACpB,QAAQ,CAoIV"}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.escapeTableCell = escapeTableCell;
|
|
37
|
+
exports.escapeHtml = escapeHtml;
|
|
38
|
+
exports.extractCellData = extractCellData;
|
|
39
|
+
const XLSX = __importStar(require("xlsx"));
|
|
40
|
+
/**
|
|
41
|
+
* Format a date serial number (Excel date) to a string using a simple format.
|
|
42
|
+
* Tokens: YYYY, MM, DD, HH, mm, ss
|
|
43
|
+
*/
|
|
44
|
+
function formatDate(dateSerial, fmt) {
|
|
45
|
+
// Excel date serial: days since 1899-12-30 (accounting for the Lotus 1-2-3 bug)
|
|
46
|
+
const date = XLSX.SSF.parse_date_code(dateSerial);
|
|
47
|
+
if (!date)
|
|
48
|
+
return String(dateSerial);
|
|
49
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
50
|
+
return fmt
|
|
51
|
+
.replace("YYYY", String(date.y))
|
|
52
|
+
.replace("MM", pad(date.m))
|
|
53
|
+
.replace("DD", pad(date.d))
|
|
54
|
+
.replace("HH", pad(date.H))
|
|
55
|
+
.replace("mm", pad(date.M))
|
|
56
|
+
.replace("ss", pad(date.S));
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Apply rich-text inline markdown (bold/italic) to a string.
|
|
60
|
+
*/
|
|
61
|
+
function applyInlineFormatting(text, bold, italic) {
|
|
62
|
+
if (!text)
|
|
63
|
+
return text;
|
|
64
|
+
if (bold && italic)
|
|
65
|
+
return `***${text}***`;
|
|
66
|
+
if (bold)
|
|
67
|
+
return `**${text}**`;
|
|
68
|
+
if (italic)
|
|
69
|
+
return `_${text}_`;
|
|
70
|
+
return text;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Unescape XML character entities.
|
|
74
|
+
*/
|
|
75
|
+
function unescapeXml(s) {
|
|
76
|
+
return s
|
|
77
|
+
.replace(/&/g, "&")
|
|
78
|
+
.replace(/</g, "<")
|
|
79
|
+
.replace(/>/g, ">")
|
|
80
|
+
.replace(/"/g, '"')
|
|
81
|
+
.replace(/'/g, "'");
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Parse XLSX rich-text XML (cell.r) into an array of styled text runs.
|
|
85
|
+
* Returns null when the input is not rich-text XML or contains no runs.
|
|
86
|
+
*
|
|
87
|
+
* Expected XML format (OOXML shared-string rich text):
|
|
88
|
+
* <r><rPr><b/></rPr><t xml:space="preserve">bold </t></r><r><t>normal</t></r>
|
|
89
|
+
*/
|
|
90
|
+
function parseRichTextRuns(xml) {
|
|
91
|
+
if (!xml.includes("<r>") && !xml.includes("<r "))
|
|
92
|
+
return null;
|
|
93
|
+
const runs = [];
|
|
94
|
+
const rPattern = /<r>([\s\S]*?)<\/r>/g;
|
|
95
|
+
let m;
|
|
96
|
+
while ((m = rPattern.exec(xml)) !== null) {
|
|
97
|
+
const inner = m[1];
|
|
98
|
+
const rPr = (/<rPr>([\s\S]*?)<\/rPr>/.exec(inner) ?? [])[1] ?? "";
|
|
99
|
+
const bold = /<b\b[^>]*\/?>/.test(rPr);
|
|
100
|
+
const italic = /<i\b[^>]*\/?>/.test(rPr);
|
|
101
|
+
const tMatch = /<t[^>]*>([\s\S]*?)<\/t>/.exec(inner);
|
|
102
|
+
if (tMatch) {
|
|
103
|
+
runs.push({ text: unescapeXml(tMatch[1]), bold, italic });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return runs.length > 0 ? runs : null;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Escape pipe and backslash characters inside a GFM table cell value.
|
|
110
|
+
* @deprecated Not used in HTML table rendering; kept for potential external use.
|
|
111
|
+
*/
|
|
112
|
+
function escapeTableCell(value) {
|
|
113
|
+
return value.replace(/\\/g, "\\\\").replace(/\|/g, "\\|");
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Escape HTML special characters for safe embedding in HTML attributes and text.
|
|
117
|
+
*/
|
|
118
|
+
function escapeHtml(value) {
|
|
119
|
+
return value
|
|
120
|
+
.replace(/&/g, "&")
|
|
121
|
+
.replace(/</g, "<")
|
|
122
|
+
.replace(/>/g, ">")
|
|
123
|
+
.replace(/"/g, """);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Extract a CellData object from an xlsx cell.
|
|
127
|
+
*/
|
|
128
|
+
function extractCellData(cell, mergedChildCells, cellAddress, opts) {
|
|
129
|
+
const isMergedChild = mergedChildCells.has(cellAddress);
|
|
130
|
+
if (!cell || cell.v === undefined || cell.v === null) {
|
|
131
|
+
return {
|
|
132
|
+
rawValue: "",
|
|
133
|
+
value: "",
|
|
134
|
+
bold: false,
|
|
135
|
+
italic: false,
|
|
136
|
+
isMergedChild,
|
|
137
|
+
hasBorder: false,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
// --- Extract style-independent fields up front ---
|
|
141
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
142
|
+
const style = cell.s;
|
|
143
|
+
// Hyperlinks
|
|
144
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
145
|
+
const links = cell.l;
|
|
146
|
+
let hyperlink;
|
|
147
|
+
if (links?.Target) {
|
|
148
|
+
hyperlink = links.Target;
|
|
149
|
+
}
|
|
150
|
+
// Issue 4: =HYPERLINK("url", ...) formula — cell.l is absent for formula-based links
|
|
151
|
+
if (!hyperlink && cell.f) {
|
|
152
|
+
const match = cell.f.match(/^HYPERLINK\s*\(\s*"([^"]+)"/i);
|
|
153
|
+
if (match)
|
|
154
|
+
hyperlink = match[1];
|
|
155
|
+
}
|
|
156
|
+
// Border detection
|
|
157
|
+
let hasBorder = false;
|
|
158
|
+
if (style?.border) {
|
|
159
|
+
const b = style.border;
|
|
160
|
+
hasBorder = !!(b.top?.style || b.bottom?.style || b.left?.style || b.right?.style);
|
|
161
|
+
}
|
|
162
|
+
// Alignment
|
|
163
|
+
let alignment;
|
|
164
|
+
if (style?.alignment?.horizontal) {
|
|
165
|
+
const h = style.alignment.horizontal;
|
|
166
|
+
if (h === "center" || h === "right")
|
|
167
|
+
alignment = h;
|
|
168
|
+
else
|
|
169
|
+
alignment = "left";
|
|
170
|
+
}
|
|
171
|
+
// --- Inline rich text: parse cell.r XML (ADR-0019) ---
|
|
172
|
+
if (opts.richText && cell.r) {
|
|
173
|
+
const runs = parseRichTextRuns(String(cell.r));
|
|
174
|
+
if (runs) {
|
|
175
|
+
const nl = (s) => s.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
176
|
+
const rawText = nl(runs.map((r) => r.text).join(""));
|
|
177
|
+
// Move leading/trailing whitespace outside bold/italic markers so that
|
|
178
|
+
// CommonMark right-flanking delimiter rules are satisfied:
|
|
179
|
+
// "**bold** " instead of "**bold **" (trailing space inside breaks rendering).
|
|
180
|
+
let markdownText = runs
|
|
181
|
+
.map((r) => {
|
|
182
|
+
const text = nl(r.text);
|
|
183
|
+
if (!r.bold && !r.italic)
|
|
184
|
+
return text;
|
|
185
|
+
const lead = /^\s*/.exec(text)[0];
|
|
186
|
+
const trail = /\s*$/.exec(text)[0];
|
|
187
|
+
const core = text.slice(lead.length, text.length - trail.length);
|
|
188
|
+
return core ? lead + applyInlineFormatting(core, r.bold, r.italic) + trail : text;
|
|
189
|
+
})
|
|
190
|
+
.join("");
|
|
191
|
+
if (hyperlink)
|
|
192
|
+
markdownText = `[${markdownText}](${hyperlink})`;
|
|
193
|
+
const richTextHtml = runs
|
|
194
|
+
.map((r) => {
|
|
195
|
+
let t = escapeHtml(nl(r.text)).replace(/\n/g, "<br>");
|
|
196
|
+
if (r.bold && r.italic)
|
|
197
|
+
t = `<strong><em>${t}</em></strong>`;
|
|
198
|
+
else if (r.bold)
|
|
199
|
+
t = `<strong>${t}</strong>`;
|
|
200
|
+
else if (r.italic)
|
|
201
|
+
t = `<em>${t}</em>`;
|
|
202
|
+
return t;
|
|
203
|
+
})
|
|
204
|
+
.join("");
|
|
205
|
+
return {
|
|
206
|
+
rawValue: rawText,
|
|
207
|
+
value: markdownText,
|
|
208
|
+
bold: false,
|
|
209
|
+
italic: false,
|
|
210
|
+
hyperlink,
|
|
211
|
+
alignment,
|
|
212
|
+
isMergedChild,
|
|
213
|
+
hasBorder,
|
|
214
|
+
richTextHtml,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// --- Fallback: plain value extraction ---
|
|
219
|
+
let value = "";
|
|
220
|
+
if (cell.t === "d") {
|
|
221
|
+
// Date
|
|
222
|
+
const raw = typeof cell.v === "number" ? cell.v : Number(cell.v);
|
|
223
|
+
value = formatDate(raw, opts.dateFormat);
|
|
224
|
+
}
|
|
225
|
+
else if (cell.t === "n") {
|
|
226
|
+
// Number: use formatted value if available, otherwise toString
|
|
227
|
+
value = cell.w ?? String(cell.v);
|
|
228
|
+
}
|
|
229
|
+
else if (cell.t === "b") {
|
|
230
|
+
value = cell.v ? "TRUE" : "FALSE";
|
|
231
|
+
}
|
|
232
|
+
else {
|
|
233
|
+
value = cell.w ?? String(cell.v);
|
|
234
|
+
}
|
|
235
|
+
// Normalize newlines within cells (for table rendering, replace with <br>)
|
|
236
|
+
value = value.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
|
237
|
+
// Cell-level style (bold/italic applied to whole cell)
|
|
238
|
+
let bold = false;
|
|
239
|
+
let italic = false;
|
|
240
|
+
if (opts.richText && style) {
|
|
241
|
+
bold = !!style.font?.bold;
|
|
242
|
+
italic = !!style.font?.italic;
|
|
243
|
+
}
|
|
244
|
+
const formatted = opts.richText ? applyInlineFormatting(value, bold, italic) : value;
|
|
245
|
+
const final = hyperlink ? `[${formatted}](${hyperlink})` : formatted;
|
|
246
|
+
return {
|
|
247
|
+
rawValue: value,
|
|
248
|
+
value: final,
|
|
249
|
+
bold,
|
|
250
|
+
italic,
|
|
251
|
+
hyperlink,
|
|
252
|
+
alignment,
|
|
253
|
+
isMergedChild,
|
|
254
|
+
hasBorder,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
//# sourceMappingURL=cell-formatter.js.map
|