@origints/xlsx 0.1.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,14 +4,6 @@
4
4
 
5
5
  ---
6
6
 
7
- ## Why
8
-
9
- Extracting data from Excel files often means losing track of where values came from. When a number is wrong, you can't easily trace it back to cell B47 on the "Q3 Sales" sheet.
10
-
11
- This package provides rich navigation APIs for Excel workbooks while maintaining complete provenance. Every value knows its exact location: workbook, sheet, row, column, and cell address.
12
-
13
- ---
14
-
15
7
  ## Features
16
8
 
17
9
  - Parse XLSX files from streams, buffers, or file paths
@@ -24,158 +16,296 @@ This package provides rich navigation APIs for Excel workbooks while maintaining
24
16
 
25
17
  ---
26
18
 
27
- ## Quick Start
19
+ ## Installation
28
20
 
29
21
  ```bash
30
22
  npm install @origints/xlsx @origints/core
31
23
  ```
32
24
 
33
- ```ts
34
- import { parseXlsxFile } from "@origints/xlsx";
25
+ ---
35
26
 
36
- const result = await parseXlsxFile("data.xlsx");
27
+ ## Usage with Planner
37
28
 
38
- if (result.ok) {
39
- const workbook = result.value;
40
- const sheet = workbook.getSheet("Sheet1");
41
- const value = sheet.cell("A1").asString();
42
- console.log(value);
43
- }
44
- ```
29
+ ### Extract cell values from a spreadsheet
45
30
 
46
- Expected output:
31
+ ```ts
32
+ import { Planner, loadFile, run } from '@origints/core'
33
+ import { parseXlsx } from '@origints/xlsx'
47
34
 
35
+ const plan = new Planner()
36
+ .in(loadFile('data.xlsx'))
37
+ .mapIn(parseXlsx())
38
+ .emit((out, $) =>
39
+ out
40
+ .add('title', $.firstSheet().cell('A1').string())
41
+ .add('revenue', $.firstSheet().cell('B2').number())
42
+ )
43
+ .compile()
44
+
45
+ const result = await run(plan, { readFile, registry })
46
+ // result.value: { title: 'Q4 Report', revenue: 150000 }
48
47
  ```
49
- Hello
50
- ```
51
-
52
- ---
53
-
54
- ## Installation
55
48
 
56
- - Supported platforms:
57
- - macOS / Linux / Windows
58
- - Runtime requirements:
59
- - Node.js >= 18
60
- - Package managers:
61
- - npm, pnpm, yarn
62
- - Peer dependencies:
63
- - @origints/core ^0.1.0
49
+ ### Extract from specific sheets
64
50
 
65
- ```bash
66
- npm install @origints/xlsx @origints/core
67
- # or
68
- pnpm add @origints/xlsx @origints/core
51
+ ```ts
52
+ const plan = new Planner()
53
+ .in(loadFile('report.xlsx'))
54
+ .mapIn(parseXlsx())
55
+ .emit((out, $) =>
56
+ out
57
+ .add('totalSales', $.sheet('Sales').cell('B10').number())
58
+ .add('totalExpenses', $.sheet('Expenses').cell('B10').number())
59
+ )
60
+ .compile()
69
61
  ```
70
62
 
71
- ---
72
-
73
- ## Usage
63
+ ### Extract rows from a range
74
64
 
75
- ### Reading cells
65
+ Use `range().rows()` to iterate over rows in a range and extract structured data:
76
66
 
77
67
  ```ts
78
- import { parseXlsxFile } from "@origints/xlsx";
68
+ // Spreadsheet has headers in row 1: Name | Age | Department
69
+ // Data rows in A2:C10
79
70
 
80
- const result = await parseXlsxFile("report.xlsx");
71
+ const plan = new Planner()
72
+ .in(loadFile('employees.xlsx'))
73
+ .mapIn(parseXlsx())
74
+ .emit((out, $) =>
75
+ out.add(
76
+ 'employees',
77
+ $.firstSheet()
78
+ .range('A2:C4')
79
+ .rows(row => ({
80
+ kind: 'object',
81
+ properties: {
82
+ name: row.col(1).string(),
83
+ age: row.col(2).number(),
84
+ dept: row.col(3).string(),
85
+ },
86
+ }))
87
+ )
88
+ )
89
+ .compile()
90
+
91
+ const result = await run(plan, { readFile, registry })
92
+ // result.value: {
93
+ // employees: [
94
+ // { name: 'Alice', age: 30, dept: 'Engineering' },
95
+ // { name: 'Bob', age: 25, dept: 'Marketing' },
96
+ // ...
97
+ // ]
98
+ // }
99
+ ```
81
100
 
82
- if (result.ok) {
83
- const sheet = result.value.getSheet("Data");
101
+ ### Range from two corners
84
102
 
85
- const name = sheet.cell("A1").asString();
86
- const amount = sheet.cell("B1").asNumber();
87
- const date = sheet.cell("C1").asDate();
88
- }
103
+ `range()` accepts two addresses or two dynamic cell builders:
104
+
105
+ ```ts
106
+ // Two string addresses — equivalent to range("A2:C10")
107
+ $.firstSheet().range("A2", "C10").rows(...)
108
+
109
+ // Dynamic corners — cells resolved at runtime
110
+ const topLeft = $.firstSheet().find(cell.equals("Name"));
111
+ const bottomRight = $.firstSheet().find(cell.equals("Total")).left();
112
+ $.firstSheet().range(topLeft, bottomRight).rows((row) => ({
113
+ kind: "object",
114
+ properties: {
115
+ name: row.col(1).string(),
116
+ value: row.col(2).number(),
117
+ },
118
+ }));
89
119
  ```
90
120
 
91
- ### Working with ranges
121
+ ### Collect rows with predicates
92
122
 
93
- ```ts
94
- const sheet = workbook.getSheet("Sales");
123
+ Use `eachSlice()` to iterate rows from a starting cell while a predicate holds, with header-relative column access:
95
124
 
96
- // Get a range
97
- const range = sheet.range("A1:D10");
125
+ ```ts
126
+ import { cell, rowCol } from '@origints/xlsx'
98
127
 
99
- // Convert to array of arrays
100
- const rows = rangeToArray(range);
128
+ const hasData = rowCol(0, cell.isNotEmpty()).and(
129
+ rowCol(0, cell.startsWith('Total').not())
130
+ )
101
131
 
102
- // Convert to objects using first row as headers
103
- const records = rangeToObjects(range);
132
+ const plan = new Planner()
133
+ .in(loadFile('report.xlsx'))
134
+ .mapIn(parseXlsx())
135
+ .emit((out, $) => {
136
+ const header = $.firstSheet().find(cell.equals('Name'))
137
+
138
+ return out.add(
139
+ 'people',
140
+ header.down().eachSlice('down', hasData, row => ({
141
+ kind: 'object',
142
+ properties: {
143
+ name: row.colWhere(header, cell.equals('Name')).string(),
144
+ role: row.colWhere(header, cell.equals('Role')).string(),
145
+ },
146
+ }))
147
+ )
148
+ })
149
+ .compile()
104
150
  ```
105
151
 
106
- ### Cursor-based iteration
107
-
108
- ```ts
109
- import { XlsxCursor } from "@origints/xlsx";
152
+ ### Collect cell values in a direction
110
153
 
111
- const cursor = new XlsxCursor(sheet, "A1");
154
+ Use `eachCell()` to gather cells in a direction while a predicate matches:
112
155
 
113
- // Move and grab values
114
- cursor.move("right", 2);
115
- const value = cursor.grab();
156
+ ```ts
157
+ import { cell } from '@origints/xlsx'
116
158
 
117
- // Iterate rows
118
- while (cursor.hasMore("down")) {
119
- const row = cursor.grabRow(4);
120
- cursor.move("down");
121
- }
159
+ const plan = new Planner()
160
+ .in(loadFile('data.xlsx'))
161
+ .mapIn(parseXlsx())
162
+ .emit((out, $) =>
163
+ out.add(
164
+ 'values',
165
+ $.firstSheet()
166
+ .cell('B2')
167
+ .eachCell('down', cell.isNotEmpty(), c => c.number())
168
+ )
169
+ )
170
+ .compile()
171
+
172
+ const result = await run(plan, { readFile, registry })
173
+ // result.value: { values: [100, 200, 300, 400] }
122
174
  ```
123
175
 
124
- ### Using with Origins plans
176
+ ### Optional and fallback extraction
125
177
 
126
- ```ts
127
- import { Planner, loadFile, globalRegistry } from "@origints/core";
128
- import { parseXlsx, registerXlsxTransforms } from "@origints/xlsx";
178
+ Handle missing or invalid cell values using `optional()`, `tryExtract()`, `mapSpec()`, and `guard()`:
129
179
 
130
- registerXlsxTransforms(globalRegistry);
180
+ ```ts
181
+ import { literal, optional, tryExtract, mapSpec, guard } from '@origints/core'
182
+ import { cell, rowCol } from '@origints/xlsx'
131
183
 
132
- const plan = Planner.in(loadFile("data.xlsx"))
184
+ const plan = new Planner()
185
+ .in(loadFile('portfolio.xlsx'))
133
186
  .mapIn(parseXlsx())
134
187
  .emit((out, $) => {
135
- const sheet = $.getSheet("Summary");
136
- out.add("total", sheet.cell("B10").asNumber());
188
+ const header = $.firstSheet().find(cell.equals('Company'))
189
+ const hasData = rowCol(0, cell.isNotEmpty()).and(
190
+ rowCol(0, cell.startsWith('Total').not())
191
+ )
192
+
193
+ return out.add(
194
+ 'companies',
195
+ header.down().eachSlice('down', hasData, row => ({
196
+ kind: 'object',
197
+ properties: {
198
+ // Required field
199
+ name: row.colWhere(header, cell.equals('Company')).string(),
200
+ // Optional: returns null when cell is empty or has wrong type
201
+ ownership: optional(
202
+ row.colWhere(header, cell.equals('Ownership %')).number(),
203
+ null
204
+ ),
205
+ // Fallback: try number first, then parse string, then null
206
+ revenue: tryExtract(
207
+ row.colWhere(header, cell.equals('Revenue')).number(),
208
+ mapSpec(
209
+ row.colWhere(header, cell.equals('Revenue')).string(),
210
+ v => parseFloat((v as string).replace(/[,$]/g, '')),
211
+ 'parseFloat'
212
+ ),
213
+ literal(null)
214
+ ),
215
+ // Guard: ensure investment amount is positive
216
+ investment: guard(
217
+ row.colWhere(header, cell.equals('Investment')).number(),
218
+ v => (v as number) > 0,
219
+ 'Investment must be positive'
220
+ ),
221
+ },
222
+ }))
223
+ )
137
224
  })
138
- .compile();
225
+ .compile()
139
226
  ```
140
227
 
141
- ### Export to CSV
228
+ ### Combine with other data sources
142
229
 
143
230
  ```ts
144
- import { sheetToCsv, rangeToCsv } from "@origints/xlsx";
145
-
146
- const csv = sheetToCsv(sheet);
147
- // or for a specific range
148
- const rangeCsv = rangeToCsv(sheet.range("A1:C10"));
231
+ const plan = new Planner()
232
+ .in(loadFile('budget.xlsx'))
233
+ .mapIn(parseXlsx())
234
+ .emit((out, $) => out.add('budget', $.firstSheet().cell('B2').number()))
235
+ .in(loadFile('config.json'))
236
+ .mapIn(parseJson())
237
+ .emit((out, $) => out.add('department', $.get('department').string()))
238
+ .compile()
149
239
  ```
150
240
 
151
- ---
152
-
153
- ## Project Status
241
+ ### Standalone usage (without Planner)
154
242
 
155
- - **Experimental** - APIs may change
243
+ For direct workbook navigation:
156
244
 
157
- ---
245
+ ```ts
246
+ import { parseXlsxAsyncImpl, XlsxWorkbook } from '@origints/xlsx'
247
+
248
+ const workbook = (await parseXlsxAsyncImpl.execute(buffer)) as XlsxWorkbook
249
+
250
+ // Navigate sheets
251
+ const sheetResult = workbook.sheet('Sheet1')
252
+ if (sheetResult.ok) {
253
+ const sheet = sheetResult.value
254
+
255
+ // Read a cell
256
+ const cellResult = sheet.cell('A1')
257
+ if (cellResult.ok) {
258
+ console.log(cellResult.value.string().value)
259
+ }
260
+
261
+ // Work with ranges
262
+ const rangeResult = sheet.range('A1:D10')
263
+ if (rangeResult.ok) {
264
+ const range = rangeResult.value
265
+ // Convert to array of arrays
266
+ const rows = range.toArray()
267
+ // Convert to objects using first row as headers
268
+ const records = range.toObjects()
269
+ }
270
+ }
271
+ ```
158
272
 
159
- ## Non-Goals
273
+ ### Cursor-based iteration
160
274
 
161
- - Not an XLSX writer/generator
162
- - Not a formula evaluator
163
- - Not a chart/image extractor
275
+ ```ts
276
+ import { XlsxCursor } from '@origints/xlsx'
164
277
 
165
- ---
278
+ const sheetResult = workbook.sheet('Data')
279
+ if (sheetResult.ok) {
280
+ const cursor = new XlsxCursor(sheetResult.value, 'A1')
166
281
 
167
- ## Documentation
282
+ // Move and grab values
283
+ cursor.move('right', 2)
284
+ const value = cursor.grab()
168
285
 
169
- - See `@origints/core` for Origins concepts
170
- - See [exceljs](https://www.npmjs.com/package/exceljs) for underlying parser
286
+ // Iterate rows
287
+ while (cursor.hasMore('down')) {
288
+ const row = cursor.grabRow(4)
289
+ cursor.move('down')
290
+ }
291
+ }
292
+ ```
171
293
 
172
294
  ---
173
295
 
174
- ## Contributing
175
-
176
- - Open an issue before large changes
177
- - Keep PRs focused
178
- - Tests required for new features
296
+ ## API
297
+
298
+ | Export | Description |
299
+ | ---------------------------------- | ----------------------------------------------------- |
300
+ | `parseXlsx(options?)` | Create a transform AST for use with `Planner.mapIn()` |
301
+ | `parseXlsxImpl` | Sync transform implementation |
302
+ | `parseXlsxAsyncImpl` | Async transform implementation (stream/buffer) |
303
+ | `registerXlsxTransforms(registry)` | Register all XLSX transforms with a registry |
304
+ | `XlsxWorkbook` | Workbook navigation |
305
+ | `XlsxSheet` | Sheet navigation with cell/range access |
306
+ | `XlsxRange` | Range operations and conversion |
307
+ | `XlsxCell` | Cell value extraction |
308
+ | `XlsxCursor` | Sequential cursor-based iteration |
179
309
 
180
310
  ---
181
311