@origints/xlsx 0.1.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -105
- package/dist/index.cjs +40 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +12 -3
- package/dist/index.es.js +23997 -832
- package/dist/index.es.js.map +1 -1
- package/dist/parse.d.ts +11 -2
- package/dist/xlsx-cell.d.ts +85 -18
- package/dist/xlsx-cursor.d.ts +2 -0
- package/dist/xlsx-predicate-compiler.d.ts +24 -0
- package/dist/xlsx-predicate-spec.d.ts +211 -0
- package/dist/xlsx-range.d.ts +10 -4
- package/dist/xlsx-sheet.d.ts +18 -15
- package/dist/xlsx-spec-builder.d.ts +220 -0
- package/dist/xlsx-spec-executor.d.ts +25 -0
- package/dist/xlsx-spec.d.ts +121 -0
- package/dist/xlsx-workbook.d.ts +8 -11
- package/package.json +23 -3
package/README.md
CHANGED
|
@@ -4,14 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
## Why
|
|
8
|
-
|
|
9
|
-
Extracting data from Excel files often means losing track of where values came from. When a number is wrong, you can't easily trace it back to cell B47 on the "Q3 Sales" sheet.
|
|
10
|
-
|
|
11
|
-
This package provides rich navigation APIs for Excel workbooks while maintaining complete provenance. Every value knows its exact location: workbook, sheet, row, column, and cell address.
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
7
|
## Features
|
|
16
8
|
|
|
17
9
|
- Parse XLSX files from streams, buffers, or file paths
|
|
@@ -24,158 +16,296 @@ This package provides rich navigation APIs for Excel workbooks while maintaining
|
|
|
24
16
|
|
|
25
17
|
---
|
|
26
18
|
|
|
27
|
-
##
|
|
19
|
+
## Installation
|
|
28
20
|
|
|
29
21
|
```bash
|
|
30
22
|
npm install @origints/xlsx @origints/core
|
|
31
23
|
```
|
|
32
24
|
|
|
33
|
-
|
|
34
|
-
import { parseXlsxFile } from "@origints/xlsx";
|
|
25
|
+
---
|
|
35
26
|
|
|
36
|
-
|
|
27
|
+
## Usage with Planner
|
|
37
28
|
|
|
38
|
-
|
|
39
|
-
const workbook = result.value;
|
|
40
|
-
const sheet = workbook.getSheet("Sheet1");
|
|
41
|
-
const value = sheet.cell("A1").asString();
|
|
42
|
-
console.log(value);
|
|
43
|
-
}
|
|
44
|
-
```
|
|
29
|
+
### Extract cell values from a spreadsheet
|
|
45
30
|
|
|
46
|
-
|
|
31
|
+
```ts
|
|
32
|
+
import { Planner, loadFile, run } from '@origints/core'
|
|
33
|
+
import { parseXlsx } from '@origints/xlsx'
|
|
47
34
|
|
|
35
|
+
const plan = new Planner()
|
|
36
|
+
.in(loadFile('data.xlsx'))
|
|
37
|
+
.mapIn(parseXlsx())
|
|
38
|
+
.emit((out, $) =>
|
|
39
|
+
out
|
|
40
|
+
.add('title', $.firstSheet().cell('A1').string())
|
|
41
|
+
.add('revenue', $.firstSheet().cell('B2').number())
|
|
42
|
+
)
|
|
43
|
+
.compile()
|
|
44
|
+
|
|
45
|
+
const result = await run(plan, { readFile, registry })
|
|
46
|
+
// result.value: { title: 'Q4 Report', revenue: 150000 }
|
|
48
47
|
```
|
|
49
|
-
Hello
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
---
|
|
53
|
-
|
|
54
|
-
## Installation
|
|
55
48
|
|
|
56
|
-
|
|
57
|
-
- macOS / Linux / Windows
|
|
58
|
-
- Runtime requirements:
|
|
59
|
-
- Node.js >= 18
|
|
60
|
-
- Package managers:
|
|
61
|
-
- npm, pnpm, yarn
|
|
62
|
-
- Peer dependencies:
|
|
63
|
-
- @origints/core ^0.1.0
|
|
49
|
+
### Extract from specific sheets
|
|
64
50
|
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
51
|
+
```ts
|
|
52
|
+
const plan = new Planner()
|
|
53
|
+
.in(loadFile('report.xlsx'))
|
|
54
|
+
.mapIn(parseXlsx())
|
|
55
|
+
.emit((out, $) =>
|
|
56
|
+
out
|
|
57
|
+
.add('totalSales', $.sheet('Sales').cell('B10').number())
|
|
58
|
+
.add('totalExpenses', $.sheet('Expenses').cell('B10').number())
|
|
59
|
+
)
|
|
60
|
+
.compile()
|
|
69
61
|
```
|
|
70
62
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
## Usage
|
|
63
|
+
### Extract rows from a range
|
|
74
64
|
|
|
75
|
-
|
|
65
|
+
Use `range().rows()` to iterate over rows in a range and extract structured data:
|
|
76
66
|
|
|
77
67
|
```ts
|
|
78
|
-
|
|
68
|
+
// Spreadsheet has headers in row 1: Name | Age | Department
|
|
69
|
+
// Data rows in A2:C10
|
|
79
70
|
|
|
80
|
-
const
|
|
71
|
+
const plan = new Planner()
|
|
72
|
+
.in(loadFile('employees.xlsx'))
|
|
73
|
+
.mapIn(parseXlsx())
|
|
74
|
+
.emit((out, $) =>
|
|
75
|
+
out.add(
|
|
76
|
+
'employees',
|
|
77
|
+
$.firstSheet()
|
|
78
|
+
.range('A2:C4')
|
|
79
|
+
.rows(row => ({
|
|
80
|
+
kind: 'object',
|
|
81
|
+
properties: {
|
|
82
|
+
name: row.col(1).string(),
|
|
83
|
+
age: row.col(2).number(),
|
|
84
|
+
dept: row.col(3).string(),
|
|
85
|
+
},
|
|
86
|
+
}))
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
.compile()
|
|
90
|
+
|
|
91
|
+
const result = await run(plan, { readFile, registry })
|
|
92
|
+
// result.value: {
|
|
93
|
+
// employees: [
|
|
94
|
+
// { name: 'Alice', age: 30, dept: 'Engineering' },
|
|
95
|
+
// { name: 'Bob', age: 25, dept: 'Marketing' },
|
|
96
|
+
// ...
|
|
97
|
+
// ]
|
|
98
|
+
// }
|
|
99
|
+
```
|
|
81
100
|
|
|
82
|
-
|
|
83
|
-
const sheet = result.value.getSheet("Data");
|
|
101
|
+
### Range from two corners
|
|
84
102
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
103
|
+
`range()` accepts two addresses or two dynamic cell builders:
|
|
104
|
+
|
|
105
|
+
```ts
|
|
106
|
+
// Two string addresses — equivalent to range("A2:C10")
|
|
107
|
+
$.firstSheet().range("A2", "C10").rows(...)
|
|
108
|
+
|
|
109
|
+
// Dynamic corners — cells resolved at runtime
|
|
110
|
+
const topLeft = $.firstSheet().find(cell.equals("Name"));
|
|
111
|
+
const bottomRight = $.firstSheet().find(cell.equals("Total")).left();
|
|
112
|
+
$.firstSheet().range(topLeft, bottomRight).rows((row) => ({
|
|
113
|
+
kind: "object",
|
|
114
|
+
properties: {
|
|
115
|
+
name: row.col(1).string(),
|
|
116
|
+
value: row.col(2).number(),
|
|
117
|
+
},
|
|
118
|
+
}));
|
|
89
119
|
```
|
|
90
120
|
|
|
91
|
-
###
|
|
121
|
+
### Collect rows with predicates
|
|
92
122
|
|
|
93
|
-
|
|
94
|
-
const sheet = workbook.getSheet("Sales");
|
|
123
|
+
Use `eachSlice()` to iterate rows from a starting cell while a predicate holds, with header-relative column access:
|
|
95
124
|
|
|
96
|
-
|
|
97
|
-
|
|
125
|
+
```ts
|
|
126
|
+
import { cell, rowCol } from '@origints/xlsx'
|
|
98
127
|
|
|
99
|
-
|
|
100
|
-
|
|
128
|
+
const hasData = rowCol(0, cell.isNotEmpty()).and(
|
|
129
|
+
rowCol(0, cell.startsWith('Total').not())
|
|
130
|
+
)
|
|
101
131
|
|
|
102
|
-
|
|
103
|
-
|
|
132
|
+
const plan = new Planner()
|
|
133
|
+
.in(loadFile('report.xlsx'))
|
|
134
|
+
.mapIn(parseXlsx())
|
|
135
|
+
.emit((out, $) => {
|
|
136
|
+
const header = $.firstSheet().find(cell.equals('Name'))
|
|
137
|
+
|
|
138
|
+
return out.add(
|
|
139
|
+
'people',
|
|
140
|
+
header.down().eachSlice('down', hasData, row => ({
|
|
141
|
+
kind: 'object',
|
|
142
|
+
properties: {
|
|
143
|
+
name: row.colWhere(header, cell.equals('Name')).string(),
|
|
144
|
+
role: row.colWhere(header, cell.equals('Role')).string(),
|
|
145
|
+
},
|
|
146
|
+
}))
|
|
147
|
+
)
|
|
148
|
+
})
|
|
149
|
+
.compile()
|
|
104
150
|
```
|
|
105
151
|
|
|
106
|
-
###
|
|
107
|
-
|
|
108
|
-
```ts
|
|
109
|
-
import { XlsxCursor } from "@origints/xlsx";
|
|
152
|
+
### Collect cell values in a direction
|
|
110
153
|
|
|
111
|
-
|
|
154
|
+
Use `eachCell()` to gather cells in a direction while a predicate matches:
|
|
112
155
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
const value = cursor.grab();
|
|
156
|
+
```ts
|
|
157
|
+
import { cell } from '@origints/xlsx'
|
|
116
158
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
159
|
+
const plan = new Planner()
|
|
160
|
+
.in(loadFile('data.xlsx'))
|
|
161
|
+
.mapIn(parseXlsx())
|
|
162
|
+
.emit((out, $) =>
|
|
163
|
+
out.add(
|
|
164
|
+
'values',
|
|
165
|
+
$.firstSheet()
|
|
166
|
+
.cell('B2')
|
|
167
|
+
.eachCell('down', cell.isNotEmpty(), c => c.number())
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
.compile()
|
|
171
|
+
|
|
172
|
+
const result = await run(plan, { readFile, registry })
|
|
173
|
+
// result.value: { values: [100, 200, 300, 400] }
|
|
122
174
|
```
|
|
123
175
|
|
|
124
|
-
###
|
|
176
|
+
### Optional and fallback extraction
|
|
125
177
|
|
|
126
|
-
|
|
127
|
-
import { Planner, loadFile, globalRegistry } from "@origints/core";
|
|
128
|
-
import { parseXlsx, registerXlsxTransforms } from "@origints/xlsx";
|
|
178
|
+
Handle missing or invalid cell values using `optional()`, `tryExtract()`, `mapSpec()`, and `guard()`:
|
|
129
179
|
|
|
130
|
-
|
|
180
|
+
```ts
|
|
181
|
+
import { literal, optional, tryExtract, mapSpec, guard } from '@origints/core'
|
|
182
|
+
import { cell, rowCol } from '@origints/xlsx'
|
|
131
183
|
|
|
132
|
-
const plan = Planner
|
|
184
|
+
const plan = new Planner()
|
|
185
|
+
.in(loadFile('portfolio.xlsx'))
|
|
133
186
|
.mapIn(parseXlsx())
|
|
134
187
|
.emit((out, $) => {
|
|
135
|
-
const
|
|
136
|
-
|
|
188
|
+
const header = $.firstSheet().find(cell.equals('Company'))
|
|
189
|
+
const hasData = rowCol(0, cell.isNotEmpty()).and(
|
|
190
|
+
rowCol(0, cell.startsWith('Total').not())
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return out.add(
|
|
194
|
+
'companies',
|
|
195
|
+
header.down().eachSlice('down', hasData, row => ({
|
|
196
|
+
kind: 'object',
|
|
197
|
+
properties: {
|
|
198
|
+
// Required field
|
|
199
|
+
name: row.colWhere(header, cell.equals('Company')).string(),
|
|
200
|
+
// Optional: returns null when cell is empty or has wrong type
|
|
201
|
+
ownership: optional(
|
|
202
|
+
row.colWhere(header, cell.equals('Ownership %')).number(),
|
|
203
|
+
null
|
|
204
|
+
),
|
|
205
|
+
// Fallback: try number first, then parse string, then null
|
|
206
|
+
revenue: tryExtract(
|
|
207
|
+
row.colWhere(header, cell.equals('Revenue')).number(),
|
|
208
|
+
mapSpec(
|
|
209
|
+
row.colWhere(header, cell.equals('Revenue')).string(),
|
|
210
|
+
v => parseFloat((v as string).replace(/[,$]/g, '')),
|
|
211
|
+
'parseFloat'
|
|
212
|
+
),
|
|
213
|
+
literal(null)
|
|
214
|
+
),
|
|
215
|
+
// Guard: ensure investment amount is positive
|
|
216
|
+
investment: guard(
|
|
217
|
+
row.colWhere(header, cell.equals('Investment')).number(),
|
|
218
|
+
v => (v as number) > 0,
|
|
219
|
+
'Investment must be positive'
|
|
220
|
+
),
|
|
221
|
+
},
|
|
222
|
+
}))
|
|
223
|
+
)
|
|
137
224
|
})
|
|
138
|
-
.compile()
|
|
225
|
+
.compile()
|
|
139
226
|
```
|
|
140
227
|
|
|
141
|
-
###
|
|
228
|
+
### Combine with other data sources
|
|
142
229
|
|
|
143
230
|
```ts
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
231
|
+
const plan = new Planner()
|
|
232
|
+
.in(loadFile('budget.xlsx'))
|
|
233
|
+
.mapIn(parseXlsx())
|
|
234
|
+
.emit((out, $) => out.add('budget', $.firstSheet().cell('B2').number()))
|
|
235
|
+
.in(loadFile('config.json'))
|
|
236
|
+
.mapIn(parseJson())
|
|
237
|
+
.emit((out, $) => out.add('department', $.get('department').string()))
|
|
238
|
+
.compile()
|
|
149
239
|
```
|
|
150
240
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
## Project Status
|
|
241
|
+
### Standalone usage (without Planner)
|
|
154
242
|
|
|
155
|
-
|
|
243
|
+
For direct workbook navigation:
|
|
156
244
|
|
|
157
|
-
|
|
245
|
+
```ts
|
|
246
|
+
import { parseXlsxAsyncImpl, XlsxWorkbook } from '@origints/xlsx'
|
|
247
|
+
|
|
248
|
+
const workbook = (await parseXlsxAsyncImpl.execute(buffer)) as XlsxWorkbook
|
|
249
|
+
|
|
250
|
+
// Navigate sheets
|
|
251
|
+
const sheetResult = workbook.sheet('Sheet1')
|
|
252
|
+
if (sheetResult.ok) {
|
|
253
|
+
const sheet = sheetResult.value
|
|
254
|
+
|
|
255
|
+
// Read a cell
|
|
256
|
+
const cellResult = sheet.cell('A1')
|
|
257
|
+
if (cellResult.ok) {
|
|
258
|
+
console.log(cellResult.value.string().value)
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Work with ranges
|
|
262
|
+
const rangeResult = sheet.range('A1:D10')
|
|
263
|
+
if (rangeResult.ok) {
|
|
264
|
+
const range = rangeResult.value
|
|
265
|
+
// Convert to array of arrays
|
|
266
|
+
const rows = range.toArray()
|
|
267
|
+
// Convert to objects using first row as headers
|
|
268
|
+
const records = range.toObjects()
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
```
|
|
158
272
|
|
|
159
|
-
|
|
273
|
+
### Cursor-based iteration
|
|
160
274
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
- Not a chart/image extractor
|
|
275
|
+
```ts
|
|
276
|
+
import { XlsxCursor } from '@origints/xlsx'
|
|
164
277
|
|
|
165
|
-
|
|
278
|
+
const sheetResult = workbook.sheet('Data')
|
|
279
|
+
if (sheetResult.ok) {
|
|
280
|
+
const cursor = new XlsxCursor(sheetResult.value, 'A1')
|
|
166
281
|
|
|
167
|
-
|
|
282
|
+
// Move and grab values
|
|
283
|
+
cursor.move('right', 2)
|
|
284
|
+
const value = cursor.grab()
|
|
168
285
|
|
|
169
|
-
|
|
170
|
-
|
|
286
|
+
// Iterate rows
|
|
287
|
+
while (cursor.hasMore('down')) {
|
|
288
|
+
const row = cursor.grabRow(4)
|
|
289
|
+
cursor.move('down')
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
```
|
|
171
293
|
|
|
172
294
|
---
|
|
173
295
|
|
|
174
|
-
##
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
296
|
+
## API
|
|
297
|
+
|
|
298
|
+
| Export | Description |
|
|
299
|
+
| ---------------------------------- | ----------------------------------------------------- |
|
|
300
|
+
| `parseXlsx(options?)` | Create a transform AST for use with `Planner.mapIn()` |
|
|
301
|
+
| `parseXlsxImpl` | Sync transform implementation |
|
|
302
|
+
| `parseXlsxAsyncImpl` | Async transform implementation (stream/buffer) |
|
|
303
|
+
| `registerXlsxTransforms(registry)` | Register all XLSX transforms with a registry |
|
|
304
|
+
| `XlsxWorkbook` | Workbook navigation |
|
|
305
|
+
| `XlsxSheet` | Sheet navigation with cell/range access |
|
|
306
|
+
| `XlsxRange` | Range operations and conversion |
|
|
307
|
+
| `XlsxCell` | Cell value extraction |
|
|
308
|
+
| `XlsxCursor` | Sequential cursor-based iteration |
|
|
179
309
|
|
|
180
310
|
---
|
|
181
311
|
|