xlsx-to-markdown 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +300 -0
  3. package/dist/__tests__/helpers.d.ts +16 -0
  4. package/dist/__tests__/helpers.d.ts.map +1 -0
  5. package/dist/__tests__/helpers.js +63 -0
  6. package/dist/__tests__/helpers.js.map +1 -0
  7. package/dist/cell-formatter.d.ts +16 -0
  8. package/dist/cell-formatter.d.ts.map +1 -0
  9. package/dist/cell-formatter.js +257 -0
  10. package/dist/cell-formatter.js.map +1 -0
  11. package/dist/index.d.ts +21 -0
  12. package/dist/index.d.ts.map +1 -0
  13. package/dist/index.js +129 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/options.d.ts +3 -0
  16. package/dist/options.d.ts.map +1 -0
  17. package/dist/options.js +20 -0
  18. package/dist/options.js.map +1 -0
  19. package/dist/paragraph-renderer.d.ts +14 -0
  20. package/dist/paragraph-renderer.d.ts.map +1 -0
  21. package/dist/paragraph-renderer.js +85 -0
  22. package/dist/paragraph-renderer.js.map +1 -0
  23. package/dist/region-detector.d.ts +19 -0
  24. package/dist/region-detector.d.ts.map +1 -0
  25. package/dist/region-detector.js +192 -0
  26. package/dist/region-detector.js.map +1 -0
  27. package/dist/sheet-converter.d.ts +7 -0
  28. package/dist/sheet-converter.d.ts.map +1 -0
  29. package/dist/sheet-converter.js +299 -0
  30. package/dist/sheet-converter.js.map +1 -0
  31. package/dist/table-renderer.d.ts +22 -0
  32. package/dist/table-renderer.d.ts.map +1 -0
  33. package/dist/table-renderer.js +236 -0
  34. package/dist/table-renderer.js.map +1 -0
  35. package/dist/types.d.ts +172 -0
  36. package/dist/types.d.ts.map +1 -0
  37. package/dist/types.js +3 -0
  38. package/dist/types.js.map +1 -0
  39. package/package.json +62 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 khatada
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,300 @@
1
+ # xlsx-to-markdown
2
+
3
+ Node.js / TypeScript library that converts XLSX files to Markdown.
4
+
5
+ Mixed content is handled automatically — paragraphs of text and tables can coexist on the same sheet, in any order. Multiple tables per sheet (including side-by-side tables) are supported.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install xlsx-to-markdown
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```ts
16
+ import { convertXlsxToMarkdown } from 'xlsx-to-markdown';
17
+
18
+ const result = await convertXlsxToMarkdown('report.xlsx');
19
+ console.log(result.markdown);
20
+ ```
21
+
22
+ ## API
23
+
24
+ ### `convertXlsxToMarkdown(input, options?)`
25
+
26
+ Reads a file from disk (path string) or an in-memory buffer and returns a `ConvertResult`.
27
+
28
+ ```ts
29
+ // From file path
30
+ const result = await convertXlsxToMarkdown('report.xlsx');
31
+
32
+ // From Buffer / Uint8Array
33
+ const buffer = await fs.promises.readFile('report.xlsx');
34
+ const result = await convertXlsxToMarkdown(buffer);
35
+ ```
36
+
37
+ ### `convertWorkbook(workbook, options?)`
38
+
39
+ Converts an already-parsed SheetJS `WorkBook` object. Useful when you manage the SheetJS lifecycle yourself.
40
+
41
+ ```ts
42
+ import * as XLSX from 'xlsx';
43
+ import { convertWorkbook } from 'xlsx-to-markdown';
44
+
45
+ const wb = XLSX.readFile('report.xlsx', { cellStyles: true });
46
+ const result = convertWorkbook(wb);
47
+ ```
48
+
49
+ ### Return value — `ConvertResult`
50
+
51
+ | Property | Type | Description |
52
+ | --- | --- | --- |
53
+ | `markdown` | `string` | Combined Markdown for all selected sheets |
54
+ | `sheets` | `SheetResult[]` | Per-sheet breakdown |
55
+
56
+ Each `SheetResult` contains:
57
+
58
+ | Property | Type | Description |
59
+ | --- | --- | --- |
60
+ | `name` | `string` | Sheet name |
61
+ | `index` | `number` | 0-based sheet index in the workbook |
62
+ | `markdown` | `string` | Markdown for this sheet only |
63
+ | `regions` | `Region[]` | Detected content regions with type and rendered Markdown |
64
+
65
+ Each `Region` contains `type` (`"table"` \| `"paragraph"`), row/column bounds, and `markdown`.
66
+
67
+ ## Options
68
+
69
+ ```ts
70
+ const result = await convertXlsxToMarkdown('report.xlsx', {
71
+ sheets: ['Summary', 'Detail'], // include only these sheets (name or 0-based index)
72
+ sheetHeadings: true, // prepend "## Sheet Name" before each sheet
73
+ headerRow: true, // treat first row of every table as a header
74
+ tableDetection: {
75
+ minColumns: 2, // minimum columns to classify a region as a table
76
+ minRows: 2, // minimum rows to classify a region as a table
77
+ useBorders: true, // use cell borders as additional table hints
78
+ },
79
+ richText: true, // convert bold/italic/hyperlinks to inline markup
80
+ emptyCell: '', // placeholder for empty table cells
81
+ dateFormat: 'YYYY-MM-DD', // date format tokens: YYYY MM DD HH mm ss
82
+ blankLinesBetweenRegions: 1, // blank lines inserted between regions
83
+ });
84
+ ```
85
+
86
+ ### Option defaults
87
+
88
+ | Option | Default | Notes |
89
+ | --- | --- | --- |
90
+ | `sheets` | all sheets | |
91
+ | `sheetHeadings` | `"auto"` | Headings added automatically when the workbook has >1 sheet |
92
+ | `headerRow` | `true` | |
93
+ | `tableDetection.minColumns` | `2` | |
94
+ | `tableDetection.minRows` | `2` | |
95
+ | `tableDetection.useBorders` | `true` | |
96
+ | `richText` | `true` | |
97
+ | `emptyCell` | `""` | |
98
+ | `dateFormat` | `"YYYY-MM-DD"` | |
99
+ | `blankLinesBetweenRegions` | `1` | |
100
+
101
+ ## Content Detection
102
+
103
+ The library uses a recursive row→column→row scan to classify each block of cells as a **table** or **paragraph**.
104
+
105
+ ### Detection algorithm
106
+
107
+ 1. **Row scan** — split the sheet into bands of consecutive non-empty rows (empty rows act as separators)
108
+ 2. **Column scan** — within each band, find columns that are entirely empty and use them as split points → column sub-ranges
109
+ 3. **Recurse** — each column sub-range is processed independently by the same algorithm
110
+ 4. **Classify** — a band that cannot be split further is classified:
111
+ - **Table** — every row has `≥ minColumns` filled cells, and the band spans `≥ minRows` rows
112
+ - **Paragraph** — everything else
113
+
114
+ When `useBorders: true` (default), an empty cell that has both a left and a right border is counted as "filled" — this keeps bordered-but-valueless table cells from breaking table detection. **Exception**: if every cell in a column within a band is blank *and* has no top or bottom border, the column is treated as empty regardless of left/right borders. This ensures that a separator column between two side-by-side tables is still recognised as a gap even when it carries border styles from the adjacent tables.
115
+
116
+ ### Side-by-side tables
117
+
118
+ Tables placed horizontally on the same rows (separated by at least one empty column) are detected as independent regions:
119
+
120
+ ```
121
+ A B C E F G
122
+ 1 Name Score Grade Item Qty Price ← two separate headers
123
+ 2 Alice 90 A Apple 5 100
124
+ 3 Bob 75 B Banana 3 60
125
+ ```
126
+
127
+ Column D is empty → detected as two tables (A–C and E–G), each rendered as its own `<table>`.
128
+
129
+ The separator column may carry left/right border styles from the adjacent table formatting. As long as it has no top or bottom border and no values anywhere within the row-band, it is still treated as an empty gap.
130
+
131
+ ### Mixed content example
132
+
133
+ ```
134
+ Row 1: "Section 1: Introduction" ← paragraph
135
+ Row 2: (empty)
136
+ Row 3: Name | Department | Salary ← table header
137
+ Row 4: Alice | Engineering| 800,000
138
+ Row 5: Bob | Marketing | 650,000
139
+ Row 6: (empty)
140
+ Row 7: "* Figures are in JPY" ← paragraph
141
+ Row 8: (empty)
142
+ Row 9: Q1 | Q2 ← second table
143
+ Row 10: 1,200 | 1,450
144
+ ```
145
+
146
+ Output:
147
+
148
+ ```markdown
149
+ Section 1: Introduction
150
+
151
+ <table>
152
+ <tr><th>Name</th><th>Department</th><th style="text-align: right">Salary</th></tr>
153
+ <tr><td>Alice</td><td>Engineering</td><td style="text-align: right">800,000</td></tr>
154
+ <tr><td>Bob</td><td>Marketing</td><td style="text-align: right">650,000</td></tr>
155
+ </table>
156
+
157
+ * Figures are in JPY
158
+
159
+ <table>
160
+ <tr><th style="text-align: right">Q1</th><th style="text-align: right">Q2</th></tr>
161
+ <tr><td style="text-align: right">1,200</td><td style="text-align: right">1,450</td></tr>
162
+ </table>
163
+ ```
164
+
165
+ ### Recognized table patterns
166
+
167
+ The table below summarises which layouts are detected as a table and which fall back to a paragraph.
168
+
169
+ | Excel layout | Detected as | Reason |
170
+ | --- | --- | --- |
171
+ | 2+ columns × 2+ rows of data | **table** | Meets `minColumns` and `minRows` thresholds |
172
+ | Single column of text | **paragraph** | Below `minColumns` (default 2) |
173
+ | Single row of data | **paragraph** | Below `minRows` (default 2) |
174
+ | Every row has colspan spanning all columns | **paragraph** | Each row renders as one cell — no tabular structure |
175
+ | Header row has colspan, data rows have multiple cells | **table** | At least one row has 2+ visible cells |
176
+ | Cells with `rowspan` spanning multiple rows | **table** | Rendered as `<td rowspan="N">` with no layout breakage |
177
+ | Two blocks separated by an empty column | **two tables** | Column gap triggers independent region detection |
178
+
179
+ #### Pattern: full-width colspan in all rows → paragraph
180
+
181
+ When every row in a region is merged across all columns (e.g. a block of title-style cells), the region has no relational structure and is rendered as a paragraph instead of an HTML table.
182
+
183
+ ```
184
+ A B C
185
+ 1 [ Title spanning A:C ] ← colspan=3
186
+ 2 [ Subtitle spanning A:C ] ← colspan=3
187
+ 3 [ Content spanning A:C ] ← colspan=3
188
+ ```
189
+
190
+ Output:
191
+
192
+ ```markdown
193
+ Title
194
+
195
+ Subtitle
196
+
197
+ Content
198
+ ```
199
+
200
+ #### Pattern: merged header row + normal data rows → table
201
+
202
+ A full-width merged header (colspan) in the first row is fine as long as at least one data row has multiple cells.
203
+
204
+ ```
205
+ A B C
206
+ 1 [ Report Title ] ← colspan=3
207
+ 2 Name Score Grade ← 3 normal cells
208
+ 3 Alice 90 A
209
+ ```
210
+
211
+ Output:
212
+
213
+ ```html
214
+ <table>
215
+ <tr>
216
+ <th colspan="3">Report Title</th>
217
+ </tr>
218
+ <tr>
219
+ <td>Name</td>
220
+ <td style="text-align: right">Score</td>
221
+ <td>Grade</td>
222
+ </tr>
223
+ <tr>
224
+ <td>Alice</td>
225
+ <td style="text-align: right">90</td>
226
+ <td>A</td>
227
+ </tr>
228
+ </table>
229
+ ```
230
+
231
+ #### Pattern: rowspan across rows → table
232
+
233
+ Cells with `rowspan` are rendered using the `rowspan` attribute. Because `<thead>`/`<tbody>` are not emitted, a `<th rowspan="N">` that visually spans into data rows does not cause layout breakage.
234
+
235
+ ```
236
+ A B C
237
+ 1 Name [ Period (B:C) ] ← B1:C1 colspan=2
238
+ 2 Alice Q1 ← A2:A3 rowspan=2
239
+ 3 Q2
240
+ ```
241
+
242
+ Output:
243
+
244
+ ```html
245
+ <table>
246
+ <tr>
247
+ <th>Name</th>
248
+ <th colspan="2">Period</th>
249
+ </tr>
250
+ <tr>
251
+ <td rowspan="2">Alice</td>
252
+ <td>Q1</td>
253
+ </tr>
254
+ <tr>
255
+ <td>Q2</td>
256
+ </tr>
257
+ </table>
258
+ ```
259
+
260
+ ## Rich Text
261
+
262
+ When `richText: true` (default), cell formatting is converted:
263
+
264
+ | Excel format | Table output (HTML) | Paragraph output (Markdown) |
265
+ | --- | --- | --- |
266
+ | Bold | `<strong>text</strong>` | `**text**` |
267
+ | Italic | `<em>text</em>` | `_text_` |
268
+ | Bold + Italic | `<strong><em>text</em></strong>` | `***text***` |
269
+ | Hyperlink | `<a href="url">text</a>` | `[text](url)` |
270
+
271
+ ## Table Formatting Details
272
+
273
+ Tables are output as HTML (`<table>`) to support all Excel features:
274
+
275
+ - **Merged cells** — `colspan` and `rowspan` attributes are set on the master (top-left) cell; child cells are omitted entirely.
276
+ - **Header row** — rendered as `<th>` elements when `headerRow: true`.
277
+ - **Column alignment** — columns whose data cells are all numeric (`cell.t === "n"`) are automatically right-aligned (`style="text-align: right"`), including currency-formatted values such as `¥1,000` or `$100`. Explicit cell alignment takes precedence.
278
+ - **Newlines within cells** — converted to `<br>`.
279
+ - **HTML escaping** — `&`, `<`, `>`, `"` in cell values are escaped to HTML entities.
280
+ - **Formula cells** — the computed value is used; the formula string is never output.
281
+
282
+ ## Architecture Decision Records
283
+
284
+ Design decisions are documented in [`docs/adr/`](docs/adr/).
285
+
286
+ ## Development
287
+
288
+ ```bash
289
+ npm install
290
+ npm test # run all tests (vitest)
291
+ npm run build # compile TypeScript → dist/
292
+ npm run lint # oxlint
293
+ npm run fmt # oxfmt
294
+ npm run fmt:check # check formatting (CI)
295
+ npm run secretlint # scan for secrets
296
+ ```
297
+
298
+ ## License
299
+
300
+ MIT
@@ -0,0 +1,16 @@
1
+ import * as XLSX from "xlsx";
2
+ /**
3
+ * Build a simple WorkBook from a 2-D array of cell values.
4
+ * Row/column indices are 0-based.
5
+ * Pass `undefined` for empty cells.
6
+ */
7
+ export declare function buildWorkbook(sheets: {
8
+ name: string;
9
+ data: (string | number | undefined)[][];
10
+ }[]): XLSX.WorkBook;
11
+ /**
12
+ * Normalise whitespace/newlines in markdown output so test assertions are
13
+ * not fragile against trailing spaces.
14
+ */
15
+ export declare function normalise(md: string): string;
16
+ //# sourceMappingURL=helpers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../src/__tests__/helpers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,CAAC,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC,EAAE,EAAE,CAAA;CAAE,EAAE,GAClE,IAAI,CAAC,QAAQ,CAOf;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAM5C"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.buildWorkbook = buildWorkbook;
37
+ exports.normalise = normalise;
38
+ const XLSX = __importStar(require("xlsx"));
39
+ /**
40
+ * Build a simple WorkBook from a 2-D array of cell values.
41
+ * Row/column indices are 0-based.
42
+ * Pass `undefined` for empty cells.
43
+ */
44
+ function buildWorkbook(sheets) {
45
+ const wb = XLSX.utils.book_new();
46
+ for (const { name, data } of sheets) {
47
+ const ws = XLSX.utils.aoa_to_sheet(data.map((row) => row.map((v) => v ?? null)));
48
+ XLSX.utils.book_append_sheet(wb, ws, name);
49
+ }
50
+ return wb;
51
+ }
52
+ /**
53
+ * Normalise whitespace/newlines in markdown output so test assertions are
54
+ * not fragile against trailing spaces.
55
+ */
56
+ function normalise(md) {
57
+ return md
58
+ .split("\n")
59
+ .map((l) => l.trimEnd())
60
+ .join("\n")
61
+ .trim();
62
+ }
63
+ //# sourceMappingURL=helpers.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../src/__tests__/helpers.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAOA,sCASC;AAMD,8BAMC;AA5BD,2CAA6B;AAE7B;;;;GAIG;AACH,SAAgB,aAAa,CAC3B,MAAmE;IAEnE,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;IACjC,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QACpC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAgB,SAAS,CAAC,EAAU;IAClC,OAAO,EAAE;SACN,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACvB,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,16 @@
1
+ import * as XLSX from "xlsx";
2
+ import type { CellData, ResolvedOptions } from "./types.js";
3
+ /**
4
+ * Escape pipe and backslash characters inside a GFM table cell value.
5
+ * @deprecated Not used in HTML table rendering; kept for potential external use.
6
+ */
7
+ export declare function escapeTableCell(value: string): string;
8
+ /**
9
+ * Escape HTML special characters for safe embedding in HTML attributes and text.
10
+ */
11
+ export declare function escapeHtml(value: string): string;
12
+ /**
13
+ * Extract a CellData object from an xlsx cell.
14
+ */
15
+ export declare function extractCellData(cell: XLSX.CellObject | undefined, mergedChildCells: Set<string>, cellAddress: string, opts: ResolvedOptions): CellData;
16
+ //# sourceMappingURL=cell-formatter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cell-formatter.d.ts","sourceRoot":"","sources":["../src/cell-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,QAAQ,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAqE5D;;;GAGG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMhD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,SAAS,EACjC,gBAAgB,EAAE,GAAG,CAAC,MAAM,CAAC,EAC7B,WAAW,EAAE,MAAM,EACnB,IAAI,EAAE,eAAe,GACpB,QAAQ,CAoIV"}
@@ -0,0 +1,257 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.escapeTableCell = escapeTableCell;
37
+ exports.escapeHtml = escapeHtml;
38
+ exports.extractCellData = extractCellData;
39
+ const XLSX = __importStar(require("xlsx"));
40
+ /**
41
+ * Format a date serial number (Excel date) to a string using a simple format.
42
+ * Tokens: YYYY, MM, DD, HH, mm, ss
43
+ */
44
+ function formatDate(dateSerial, fmt) {
45
+ // Excel date serial: days since 1899-12-30 (accounting for the Lotus 1-2-3 bug)
46
+ const date = XLSX.SSF.parse_date_code(dateSerial);
47
+ if (!date)
48
+ return String(dateSerial);
49
+ const pad = (n) => String(n).padStart(2, "0");
50
+ return fmt
51
+ .replace("YYYY", String(date.y))
52
+ .replace("MM", pad(date.m))
53
+ .replace("DD", pad(date.d))
54
+ .replace("HH", pad(date.H))
55
+ .replace("mm", pad(date.M))
56
+ .replace("ss", pad(date.S));
57
+ }
58
+ /**
59
+ * Apply rich-text inline markdown (bold/italic) to a string.
60
+ */
61
+ function applyInlineFormatting(text, bold, italic) {
62
+ if (!text)
63
+ return text;
64
+ if (bold && italic)
65
+ return `***${text}***`;
66
+ if (bold)
67
+ return `**${text}**`;
68
+ if (italic)
69
+ return `_${text}_`;
70
+ return text;
71
+ }
72
+ /**
73
+ * Unescape XML character entities.
74
+ */
75
+ function unescapeXml(s) {
76
+ return s
77
+ .replace(/&amp;/g, "&")
78
+ .replace(/&lt;/g, "<")
79
+ .replace(/&gt;/g, ">")
80
+ .replace(/&quot;/g, '"')
81
+ .replace(/&apos;/g, "'");
82
+ }
83
+ /**
84
+ * Parse XLSX rich-text XML (cell.r) into an array of styled text runs.
85
+ * Returns null when the input is not rich-text XML or contains no runs.
86
+ *
87
+ * Expected XML format (OOXML shared-string rich text):
88
+ * <r><rPr><b/></rPr><t xml:space="preserve">bold </t></r><r><t>normal</t></r>
89
+ */
90
+ function parseRichTextRuns(xml) {
91
+ if (!xml.includes("<r>") && !xml.includes("<r "))
92
+ return null;
93
+ const runs = [];
94
+ const rPattern = /<r>([\s\S]*?)<\/r>/g;
95
+ let m;
96
+ while ((m = rPattern.exec(xml)) !== null) {
97
+ const inner = m[1];
98
+ const rPr = (/<rPr>([\s\S]*?)<\/rPr>/.exec(inner) ?? [])[1] ?? "";
99
+ const bold = /<b\b[^>]*\/?>/.test(rPr);
100
+ const italic = /<i\b[^>]*\/?>/.test(rPr);
101
+ const tMatch = /<t[^>]*>([\s\S]*?)<\/t>/.exec(inner);
102
+ if (tMatch) {
103
+ runs.push({ text: unescapeXml(tMatch[1]), bold, italic });
104
+ }
105
+ }
106
+ return runs.length > 0 ? runs : null;
107
+ }
108
+ /**
109
+ * Escape pipe and backslash characters inside a GFM table cell value.
110
+ * @deprecated Not used in HTML table rendering; kept for potential external use.
111
+ */
112
+ function escapeTableCell(value) {
113
+ return value.replace(/\\/g, "\\\\").replace(/\|/g, "\\|");
114
+ }
115
+ /**
116
+ * Escape HTML special characters for safe embedding in HTML attributes and text.
117
+ */
118
+ function escapeHtml(value) {
119
+ return value
120
+ .replace(/&/g, "&amp;")
121
+ .replace(/</g, "&lt;")
122
+ .replace(/>/g, "&gt;")
123
+ .replace(/"/g, "&quot;");
124
+ }
125
+ /**
126
+ * Extract a CellData object from an xlsx cell.
127
+ */
128
+ function extractCellData(cell, mergedChildCells, cellAddress, opts) {
129
+ const isMergedChild = mergedChildCells.has(cellAddress);
130
+ if (!cell || cell.v === undefined || cell.v === null) {
131
+ return {
132
+ rawValue: "",
133
+ value: "",
134
+ bold: false,
135
+ italic: false,
136
+ isMergedChild,
137
+ hasBorder: false,
138
+ };
139
+ }
140
+ // --- Extract style-independent fields up front ---
141
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
142
+ const style = cell.s;
143
+ // Hyperlinks
144
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
145
+ const links = cell.l;
146
+ let hyperlink;
147
+ if (links?.Target) {
148
+ hyperlink = links.Target;
149
+ }
150
+ // Issue 4: =HYPERLINK("url", ...) formula — cell.l is absent for formula-based links
151
+ if (!hyperlink && cell.f) {
152
+ const match = cell.f.match(/^HYPERLINK\s*\(\s*"([^"]+)"/i);
153
+ if (match)
154
+ hyperlink = match[1];
155
+ }
156
+ // Border detection
157
+ let hasBorder = false;
158
+ if (style?.border) {
159
+ const b = style.border;
160
+ hasBorder = !!(b.top?.style || b.bottom?.style || b.left?.style || b.right?.style);
161
+ }
162
+ // Alignment
163
+ let alignment;
164
+ if (style?.alignment?.horizontal) {
165
+ const h = style.alignment.horizontal;
166
+ if (h === "center" || h === "right")
167
+ alignment = h;
168
+ else
169
+ alignment = "left";
170
+ }
171
+ // --- Inline rich text: parse cell.r XML (ADR-0019) ---
172
+ if (opts.richText && cell.r) {
173
+ const runs = parseRichTextRuns(String(cell.r));
174
+ if (runs) {
175
+ const nl = (s) => s.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
176
+ const rawText = nl(runs.map((r) => r.text).join(""));
177
+ // Move leading/trailing whitespace outside bold/italic markers so that
178
+ // CommonMark right-flanking delimiter rules are satisfied:
179
+ // "**bold** " instead of "**bold **" (trailing space inside breaks rendering).
180
+ let markdownText = runs
181
+ .map((r) => {
182
+ const text = nl(r.text);
183
+ if (!r.bold && !r.italic)
184
+ return text;
185
+ const lead = /^\s*/.exec(text)[0];
186
+ const trail = /\s*$/.exec(text)[0];
187
+ const core = text.slice(lead.length, text.length - trail.length);
188
+ return core ? lead + applyInlineFormatting(core, r.bold, r.italic) + trail : text;
189
+ })
190
+ .join("");
191
+ if (hyperlink)
192
+ markdownText = `[${markdownText}](${hyperlink})`;
193
+ const richTextHtml = runs
194
+ .map((r) => {
195
+ let t = escapeHtml(nl(r.text)).replace(/\n/g, "<br>");
196
+ if (r.bold && r.italic)
197
+ t = `<strong><em>${t}</em></strong>`;
198
+ else if (r.bold)
199
+ t = `<strong>${t}</strong>`;
200
+ else if (r.italic)
201
+ t = `<em>${t}</em>`;
202
+ return t;
203
+ })
204
+ .join("");
205
+ return {
206
+ rawValue: rawText,
207
+ value: markdownText,
208
+ bold: false,
209
+ italic: false,
210
+ hyperlink,
211
+ alignment,
212
+ isMergedChild,
213
+ hasBorder,
214
+ richTextHtml,
215
+ };
216
+ }
217
+ }
218
+ // --- Fallback: plain value extraction ---
219
+ let value = "";
220
+ if (cell.t === "d") {
221
+ // Date
222
+ const raw = typeof cell.v === "number" ? cell.v : Number(cell.v);
223
+ value = formatDate(raw, opts.dateFormat);
224
+ }
225
+ else if (cell.t === "n") {
226
+ // Number: use formatted value if available, otherwise toString
227
+ value = cell.w ?? String(cell.v);
228
+ }
229
+ else if (cell.t === "b") {
230
+ value = cell.v ? "TRUE" : "FALSE";
231
+ }
232
+ else {
233
+ value = cell.w ?? String(cell.v);
234
+ }
235
+ // Normalize newlines within cells (for table rendering, replace with <br>)
236
+ value = value.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
237
+ // Cell-level style (bold/italic applied to whole cell)
238
+ let bold = false;
239
+ let italic = false;
240
+ if (opts.richText && style) {
241
+ bold = !!style.font?.bold;
242
+ italic = !!style.font?.italic;
243
+ }
244
+ const formatted = opts.richText ? applyInlineFormatting(value, bold, italic) : value;
245
+ const final = hyperlink ? `[${formatted}](${hyperlink})` : formatted;
246
+ return {
247
+ rawValue: value,
248
+ value: final,
249
+ bold,
250
+ italic,
251
+ hyperlink,
252
+ alignment,
253
+ isMergedChild,
254
+ hasBorder,
255
+ };
256
+ }
257
+ //# sourceMappingURL=cell-formatter.js.map