@eredzik/calaminejs 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +454 -0
- package/dist/node/calamine_js.d.ts +9 -2
- package/dist/node/calamine_js.js +17 -4
- package/dist/node/calamine_js_bg.wasm +0 -0
- package/dist/node/calamine_js_bg.wasm.d.ts +3 -2
- package/dist/web/calamine_js.d.ts +12 -4
- package/dist/web/calamine_js.js +17 -4
- package/dist/web/calamine_js_bg.wasm +0 -0
- package/dist/web/calamine_js_bg.wasm.d.ts +3 -2
- package/package.json +7 -3
package/README.md
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
# @eredzik/calaminejs
|
|
2
|
+
|
|
3
|
+
A high-performance JavaScript/TypeScript library for reading Excel files (XLS/XLSX) and converting them to Parquet format. Built on the Rust [calamine](https://github.com/tafia/calamine) library using WebAssembly for optimal performance.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 📊 Read Excel files (XLS and XLSX formats)
|
|
8
|
+
- 🚀 High performance through WebAssembly
|
|
9
|
+
- 🔄 Convert sheets to Parquet format
|
|
10
|
+
- 🎯 Smart header row detection
|
|
11
|
+
- 📈 Progress tracking for large files
|
|
12
|
+
- 🌐 Works in Node.js and browsers
|
|
13
|
+
- 📝 Full TypeScript support
|
|
14
|
+
- 🔢 Preserves data types (strings, numbers, booleans, dates, etc.)
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @eredzik/calaminejs
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
### Node.js
|
|
25
|
+
|
|
26
|
+
```javascript
|
|
27
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
28
|
+
import { readFileSync } from 'fs';
|
|
29
|
+
|
|
30
|
+
// Read an Excel file
|
|
31
|
+
const buffer = readFileSync('data.xlsx');
|
|
32
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
33
|
+
|
|
34
|
+
// Get sheet names
|
|
35
|
+
const sheetNames = workbook.sheet_names();
|
|
36
|
+
console.log('Sheets:', sheetNames);
|
|
37
|
+
|
|
38
|
+
// Access a sheet
|
|
39
|
+
const sheet = workbook.get_sheet(sheetNames[0]);
|
|
40
|
+
console.log(`Rows: ${sheet.row_count()}, Columns: ${sheet.col_count()}`);
|
|
41
|
+
|
|
42
|
+
// Access cell data
|
|
43
|
+
const rows = sheet.rows;
|
|
44
|
+
console.log('First row:', rows[0]);
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Browser
|
|
48
|
+
|
|
49
|
+
```javascript
|
|
50
|
+
import { Workbook } from '@eredzik/calaminejs/web';
|
|
51
|
+
|
|
52
|
+
// From file input
|
|
53
|
+
const file = document.querySelector('input[type="file"]').files[0];
|
|
54
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
55
|
+
const workbook = Workbook.from_bytes(new Uint8Array(arrayBuffer));
|
|
56
|
+
|
|
57
|
+
// Process the workbook
|
|
58
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
59
|
+
console.log('Sheet name:', sheet.name);
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## API Reference
|
|
63
|
+
|
|
64
|
+
### Workbook
|
|
65
|
+
|
|
66
|
+
The main class for working with Excel files.
|
|
67
|
+
|
|
68
|
+
#### Static Methods
|
|
69
|
+
|
|
70
|
+
##### `Workbook.from_bytes(data: Uint8Array): Workbook`
|
|
71
|
+
|
|
72
|
+
Load an Excel file from bytes. Automatically detects XLS or XLSX format.
|
|
73
|
+
|
|
74
|
+
```javascript
|
|
75
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
##### `Workbook.from_bytes_with_progress(data: Uint8Array, callback?: Function, interval?: number): Workbook`
|
|
79
|
+
|
|
80
|
+
Load an Excel file with progress tracking.
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
const workbook = Workbook.from_bytes_with_progress(
|
|
84
|
+
new Uint8Array(buffer),
|
|
85
|
+
(progress) => {
|
|
86
|
+
console.log(`Processing sheet ${progress.sheetIndex + 1}/${progress.totalSheets}`);
|
|
87
|
+
console.log(`Sheet: ${progress.sheetName}, Row: ${progress.currentRow}`);
|
|
88
|
+
},
|
|
89
|
+
100 // Report progress every 100 rows
|
|
90
|
+
);
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Progress object properties:**
|
|
94
|
+
- `sheetIndex: number` - Current sheet index (0-based)
|
|
95
|
+
- `totalSheets: number` - Total number of sheets
|
|
96
|
+
- `sheetName: string` - Name of current sheet
|
|
97
|
+
- `currentRow: number` - Current row being processed
|
|
98
|
+
- `totalRows: number | null` - Total rows (available when sheet is complete)
|
|
99
|
+
|
|
100
|
+
#### Instance Methods
|
|
101
|
+
|
|
102
|
+
##### `sheet_names(): string[]`
|
|
103
|
+
|
|
104
|
+
Get an array of all sheet names in the workbook.
|
|
105
|
+
|
|
106
|
+
```javascript
|
|
107
|
+
const names = workbook.sheet_names();
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
##### `get_sheet(name: string): Sheet | undefined`
|
|
111
|
+
|
|
112
|
+
Get a sheet by name.
|
|
113
|
+
|
|
114
|
+
```javascript
|
|
115
|
+
const sheet = workbook.get_sheet('Sheet1');
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
##### `get_sheet_by_index(index: number): Sheet | undefined`
|
|
119
|
+
|
|
120
|
+
Get a sheet by index (0-based).
|
|
121
|
+
|
|
122
|
+
```javascript
|
|
123
|
+
const firstSheet = workbook.get_sheet_by_index(0);
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
##### `sheet_count(): number`
|
|
127
|
+
|
|
128
|
+
Get the total number of sheets.
|
|
129
|
+
|
|
130
|
+
```javascript
|
|
131
|
+
const count = workbook.sheet_count();
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Sheet
|
|
135
|
+
|
|
136
|
+
Represents a single worksheet in an Excel file.
|
|
137
|
+
|
|
138
|
+
#### Properties
|
|
139
|
+
|
|
140
|
+
##### `name: string`
|
|
141
|
+
|
|
142
|
+
The name of the sheet.
|
|
143
|
+
|
|
144
|
+
```javascript
|
|
145
|
+
console.log(sheet.name); // "Sheet1"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
##### `rows: Array<Array<any>>`
|
|
149
|
+
|
|
150
|
+
A 2D array of cell values. Values are converted to native JavaScript types:
|
|
151
|
+
- Strings → `string`
|
|
152
|
+
- Numbers → `number`
|
|
153
|
+
- Booleans → `boolean`
|
|
154
|
+
- Empty cells → `null`
|
|
155
|
+
- Dates → `number` (Excel date format)
|
|
156
|
+
- Errors → `string`
|
|
157
|
+
|
|
158
|
+
```javascript
|
|
159
|
+
const rows = sheet.rows;
|
|
160
|
+
rows.forEach((row, rowIndex) => {
|
|
161
|
+
row.forEach((cell, colIndex) => {
|
|
162
|
+
console.log(`Cell [${rowIndex}, ${colIndex}]:`, cell);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
#### Methods
|
|
168
|
+
|
|
169
|
+
##### `get_cell(row: number, col: number): CellValue | undefined`
|
|
170
|
+
|
|
171
|
+
Get a specific cell with type information.
|
|
172
|
+
|
|
173
|
+
```javascript
|
|
174
|
+
const cell = sheet.get_cell(0, 0);
|
|
175
|
+
if (cell.is_string) {
|
|
176
|
+
console.log('String value:', cell.to_string_value());
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
##### `row_count(): number`
|
|
181
|
+
|
|
182
|
+
Get the number of rows in the sheet.
|
|
183
|
+
|
|
184
|
+
```javascript
|
|
185
|
+
const rowCount = sheet.row_count();
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
##### `col_count(): number`
|
|
189
|
+
|
|
190
|
+
Get the maximum number of columns in the sheet.
|
|
191
|
+
|
|
192
|
+
```javascript
|
|
193
|
+
const colCount = sheet.col_count();
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
##### `infer_header_row(): HeaderInfo | undefined`
|
|
197
|
+
|
|
198
|
+
Automatically detect which row contains the table header using heuristics:
|
|
199
|
+
- Headers typically contain string values in most columns
|
|
200
|
+
- Headers are followed by rows with data
|
|
201
|
+
- Headers have multiple non-empty cells
|
|
202
|
+
- Prioritizes rows in the first 20 rows
|
|
203
|
+
|
|
204
|
+
```javascript
|
|
205
|
+
const headerInfo = sheet.infer_header_row();
|
|
206
|
+
if (headerInfo) {
|
|
207
|
+
console.log('Header found at row:', headerInfo.row_index);
|
|
208
|
+
console.log('Column names:', headerInfo.column_names);
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
##### `to_parquet(): Uint8Array`
|
|
213
|
+
|
|
214
|
+
Convert the sheet to Parquet format. Column types are automatically inferred:
|
|
215
|
+
- All booleans → Boolean column
|
|
216
|
+
- All integers → Int64 column
|
|
217
|
+
- All floats/numbers → Float64 column
|
|
218
|
+
- All dates → Datetime column (millisecond precision)
|
|
219
|
+
- Mixed or strings → String column
|
|
220
|
+
|
|
221
|
+
```javascript
|
|
222
|
+
const parquetBytes = sheet.to_parquet();
|
|
223
|
+
// Save to file or process further
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
##### `to_parquet_with_names(columnNames: string[]): Uint8Array`
|
|
227
|
+
|
|
228
|
+
Convert the sheet to Parquet format with custom column names.
|
|
229
|
+
|
|
230
|
+
```javascript
|
|
231
|
+
const columnNames = ['ID', 'Name', 'Age', 'Email'];
|
|
232
|
+
const parquetBytes = sheet.to_parquet_with_names(columnNames);
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### CellValue
|
|
236
|
+
|
|
237
|
+
Detailed cell information with type checking and conversion methods.
|
|
238
|
+
|
|
239
|
+
#### Type Checking Properties
|
|
240
|
+
|
|
241
|
+
- `is_empty: boolean` - Check if cell is empty
|
|
242
|
+
- `is_string: boolean` - Check if cell contains a string
|
|
243
|
+
- `is_float: boolean` - Check if cell contains a float
|
|
244
|
+
- `is_int: boolean` - Check if cell contains an integer
|
|
245
|
+
- `is_bool: boolean` - Check if cell contains a boolean
|
|
246
|
+
- `is_error: boolean` - Check if cell contains an error
|
|
247
|
+
- `is_datetime: boolean` - Check if cell contains a date/time
|
|
248
|
+
- `is_duration: boolean` - Check if cell contains a duration
|
|
249
|
+
|
|
250
|
+
#### Conversion Methods
|
|
251
|
+
|
|
252
|
+
##### `to_string_value(): string | undefined`
|
|
253
|
+
|
|
254
|
+
Convert cell to string representation.
|
|
255
|
+
|
|
256
|
+
```javascript
|
|
257
|
+
const cell = sheet.get_cell(0, 0);
|
|
258
|
+
const str = cell.to_string_value();
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
##### `to_float_value(): number | undefined`
|
|
262
|
+
|
|
263
|
+
Convert cell to float (works for numbers, booleans, dates).
|
|
264
|
+
|
|
265
|
+
```javascript
|
|
266
|
+
const num = cell.to_float_value();
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
##### `to_int_value(): number | undefined`
|
|
270
|
+
|
|
271
|
+
Convert cell to integer (works for integers, floats, booleans).
|
|
272
|
+
|
|
273
|
+
```javascript
|
|
274
|
+
const int = cell.to_int_value();
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
##### `to_bool_value(): boolean | undefined`
|
|
278
|
+
|
|
279
|
+
Get boolean value (only works for boolean cells).
|
|
280
|
+
|
|
281
|
+
```javascript
|
|
282
|
+
const bool = cell.to_bool_value();
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### HeaderInfo
|
|
286
|
+
|
|
287
|
+
Information about detected header row.
|
|
288
|
+
|
|
289
|
+
#### Properties
|
|
290
|
+
|
|
291
|
+
- `row_index: number` - The index of the header row (0-based)
|
|
292
|
+
- `column_names: string[]` - Array of column names extracted from the header
|
|
293
|
+
|
|
294
|
+
## Examples
|
|
295
|
+
|
|
296
|
+
### Reading and Processing Data
|
|
297
|
+
|
|
298
|
+
```javascript
|
|
299
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
300
|
+
import { readFileSync } from 'fs';
|
|
301
|
+
|
|
302
|
+
const buffer = readFileSync('sales.xlsx');
|
|
303
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
304
|
+
const sheet = workbook.get_sheet('Sales Data');
|
|
305
|
+
|
|
306
|
+
// Detect header
|
|
307
|
+
const headerInfo = sheet.infer_header_row();
|
|
308
|
+
if (headerInfo) {
|
|
309
|
+
console.log('Columns:', headerInfo.column_names);
|
|
310
|
+
|
|
311
|
+
// Process data rows (skip header)
|
|
312
|
+
const dataRows = sheet.rows.slice(headerInfo.row_index + 1);
|
|
313
|
+
dataRows.forEach(row => {
|
|
314
|
+
console.log('Row data:', row);
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Converting to Parquet
|
|
320
|
+
|
|
321
|
+
```javascript
|
|
322
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
323
|
+
import { writeFileSync, readFileSync } from 'fs';
|
|
324
|
+
|
|
325
|
+
const buffer = readFileSync('data.xlsx');
|
|
326
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
327
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
328
|
+
|
|
329
|
+
// Option 1: Auto-generated column names
|
|
330
|
+
const parquet1 = sheet.to_parquet();
|
|
331
|
+
writeFileSync('output1.parquet', parquet1);
|
|
332
|
+
|
|
333
|
+
// Option 2: Custom column names
|
|
334
|
+
const headerInfo = sheet.infer_header_row();
|
|
335
|
+
if (headerInfo) {
|
|
336
|
+
// Skip header row and convert data
|
|
337
|
+
const dataSheet = {
|
|
338
|
+
...sheet,
|
|
339
|
+
rows: sheet.rows.slice(headerInfo.row_index + 1)
|
|
340
|
+
};
|
|
341
|
+
const parquet2 = dataSheet.to_parquet_with_names(headerInfo.column_names);
|
|
342
|
+
writeFileSync('output2.parquet', parquet2);
|
|
343
|
+
}
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Working with Cell Types
|
|
347
|
+
|
|
348
|
+
```javascript
|
|
349
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
350
|
+
|
|
351
|
+
for (let row = 0; row < sheet.row_count(); row++) {
|
|
352
|
+
for (let col = 0; col < sheet.col_count(); col++) {
|
|
353
|
+
const cell = sheet.get_cell(row, col);
|
|
354
|
+
|
|
355
|
+
if (cell.is_string) {
|
|
356
|
+
console.log(`String: ${cell.to_string_value()}`);
|
|
357
|
+
} else if (cell.is_int) {
|
|
358
|
+
console.log(`Integer: ${cell.to_int_value()}`);
|
|
359
|
+
} else if (cell.is_float) {
|
|
360
|
+
console.log(`Float: ${cell.to_float_value()}`);
|
|
361
|
+
} else if (cell.is_bool) {
|
|
362
|
+
console.log(`Boolean: ${cell.to_bool_value()}`);
|
|
363
|
+
} else if (cell.is_datetime) {
|
|
364
|
+
const excelDate = cell.to_float_value();
|
|
365
|
+
// Convert Excel date to JavaScript Date
|
|
366
|
+
const jsDate = new Date((excelDate - 25569) * 86400 * 1000);
|
|
367
|
+
console.log(`Date: ${jsDate.toISOString()}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Progress Tracking for Large Files
|
|
374
|
+
|
|
375
|
+
```javascript
|
|
376
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
377
|
+
import { readFileSync } from 'fs';
|
|
378
|
+
|
|
379
|
+
const buffer = readFileSync('large-file.xlsx');
|
|
380
|
+
|
|
381
|
+
console.log('Loading workbook...');
|
|
382
|
+
const workbook = Workbook.from_bytes_with_progress(
|
|
383
|
+
new Uint8Array(buffer),
|
|
384
|
+
(progress) => {
|
|
385
|
+
const percent = ((progress.currentRow / (progress.totalRows || progress.currentRow)) * 100).toFixed(1);
|
|
386
|
+
console.log(`[${progress.sheetName}] Processing: ${percent}%`);
|
|
387
|
+
},
|
|
388
|
+
500 // Report every 500 rows
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
console.log('Workbook loaded successfully!');
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
## Browser vs Node.js
|
|
395
|
+
|
|
396
|
+
The package provides separate builds optimized for each environment:
|
|
397
|
+
|
|
398
|
+
```javascript
|
|
399
|
+
// Node.js (default)
|
|
400
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
401
|
+
// or
|
|
402
|
+
import { Workbook } from '@eredzik/calaminejs/node';
|
|
403
|
+
|
|
404
|
+
// Browser
|
|
405
|
+
import { Workbook } from '@eredzik/calaminejs/web';
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Performance Tips
|
|
409
|
+
|
|
410
|
+
1. **Use progress callbacks for large files** to provide user feedback and avoid blocking
|
|
411
|
+
2. **Process sheets on-demand** instead of loading all sheets at once
|
|
412
|
+
3. **Use `get_cell()` for sparse data** instead of accessing the full `rows` array
|
|
413
|
+
4. **Infer header once** and reuse the result instead of calling it multiple times
|
|
414
|
+
5. **Convert to Parquet** for efficient storage and further processing with data tools
|
|
415
|
+
|
|
416
|
+
## Requirements
|
|
417
|
+
|
|
418
|
+
- Node.js >= 16.0.0 (for Node.js usage)
|
|
419
|
+
- Modern browser with WebAssembly support (for browser usage)
|
|
420
|
+
|
|
421
|
+
## Supported File Formats
|
|
422
|
+
|
|
423
|
+
- **XLSX** - Excel 2007+ (.xlsx)
|
|
424
|
+
- **XLS** - Excel 97-2003 (.xls)
|
|
425
|
+
|
|
426
|
+
## Supported Data Types
|
|
427
|
+
|
|
428
|
+
The library preserves Excel data types:
|
|
429
|
+
|
|
430
|
+
- **Empty** - Empty cells
|
|
431
|
+
- **String** - Text values
|
|
432
|
+
- **Float** - Floating-point numbers
|
|
433
|
+
- **Int** - Integer numbers
|
|
434
|
+
- **Bool** - Boolean values (TRUE/FALSE)
|
|
435
|
+
- **Error** - Excel error values (#N/A, #REF!, etc.)
|
|
436
|
+
- **DateTime** - Date and time values (stored as Excel serial numbers)
|
|
437
|
+
- **Duration** - Duration values
|
|
438
|
+
|
|
439
|
+
## License
|
|
440
|
+
|
|
441
|
+
MIT
|
|
442
|
+
|
|
443
|
+
## Repository
|
|
444
|
+
|
|
445
|
+
GitHub: [https://github.com/eredzik/calaminejs](https://github.com/eredzik/calaminejs)
|
|
446
|
+
|
|
447
|
+
## Issues
|
|
448
|
+
|
|
449
|
+
Report issues: [https://github.com/eredzik/calaminejs/issues](https://github.com/eredzik/calaminejs/issues)
|
|
450
|
+
|
|
451
|
+
## Credits
|
|
452
|
+
|
|
453
|
+
Built on top of the excellent [calamine](https://github.com/tafia/calamine) Rust library and [Polars](https://www.pola.rs/) for Parquet conversion.
|
|
454
|
+
|
|
@@ -31,8 +31,14 @@ export class Sheet {
|
|
|
31
31
|
/**
|
|
32
32
|
* Convert sheet to Parquet format
|
|
33
33
|
* Returns the Parquet file as bytes
|
|
34
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
34
35
|
*/
|
|
35
|
-
to_parquet(): Uint8Array;
|
|
36
|
+
to_parquet(unmerge_cells: boolean): Uint8Array;
|
|
37
|
+
/**
|
|
38
|
+
* Get the merged regions in this sheet
|
|
39
|
+
* Returns an array of arrays, where each inner array is [start_row, start_col, end_row, end_col]
|
|
40
|
+
*/
|
|
41
|
+
merged_regions(): Array<any>;
|
|
36
42
|
/**
|
|
37
43
|
* Infer which row is the table header
|
|
38
44
|
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
@@ -46,8 +52,9 @@ export class Sheet {
|
|
|
46
52
|
/**
|
|
47
53
|
* Convert sheet to Parquet format with custom column names
|
|
48
54
|
* column_names: array of column names (must match col_count)
|
|
55
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
49
56
|
*/
|
|
50
|
-
to_parquet_with_names(column_names: string[]): Uint8Array;
|
|
57
|
+
to_parquet_with_names(column_names: string[], unmerge_cells: boolean): Uint8Array;
|
|
51
58
|
get_cell(row: number, col: number): CellValue | undefined;
|
|
52
59
|
col_count(): number;
|
|
53
60
|
row_count(): number;
|
package/dist/node/calamine_js.js
CHANGED
|
@@ -338,10 +338,12 @@ class Sheet {
|
|
|
338
338
|
/**
|
|
339
339
|
* Convert sheet to Parquet format
|
|
340
340
|
* Returns the Parquet file as bytes
|
|
341
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
342
|
+
* @param {boolean} unmerge_cells
|
|
341
343
|
* @returns {Uint8Array}
|
|
342
344
|
*/
|
|
343
|
-
to_parquet() {
|
|
344
|
-
const ret = wasm.sheet_to_parquet(this.__wbg_ptr);
|
|
345
|
+
to_parquet(unmerge_cells) {
|
|
346
|
+
const ret = wasm.sheet_to_parquet(this.__wbg_ptr, unmerge_cells);
|
|
345
347
|
if (ret[3]) {
|
|
346
348
|
throw takeFromExternrefTable0(ret[2]);
|
|
347
349
|
}
|
|
@@ -349,6 +351,15 @@ class Sheet {
|
|
|
349
351
|
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
350
352
|
return v1;
|
|
351
353
|
}
|
|
354
|
+
/**
|
|
355
|
+
* Get the merged regions in this sheet
|
|
356
|
+
* Returns an array of arrays, where each inner array is [start_row, start_col, end_row, end_col]
|
|
357
|
+
* @returns {Array<any>}
|
|
358
|
+
*/
|
|
359
|
+
merged_regions() {
|
|
360
|
+
const ret = wasm.sheet_merged_regions(this.__wbg_ptr);
|
|
361
|
+
return ret;
|
|
362
|
+
}
|
|
352
363
|
/**
|
|
353
364
|
* Infer which row is the table header
|
|
354
365
|
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
@@ -366,13 +377,15 @@ class Sheet {
|
|
|
366
377
|
/**
|
|
367
378
|
* Convert sheet to Parquet format with custom column names
|
|
368
379
|
* column_names: array of column names (must match col_count)
|
|
380
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
369
381
|
* @param {string[]} column_names
|
|
382
|
+
* @param {boolean} unmerge_cells
|
|
370
383
|
* @returns {Uint8Array}
|
|
371
384
|
*/
|
|
372
|
-
to_parquet_with_names(column_names) {
|
|
385
|
+
to_parquet_with_names(column_names, unmerge_cells) {
|
|
373
386
|
const ptr0 = passArrayJsValueToWasm0(column_names, wasm.__wbindgen_malloc);
|
|
374
387
|
const len0 = WASM_VECTOR_LEN;
|
|
375
|
-
const ret = wasm.sheet_to_parquet_with_names(this.__wbg_ptr, ptr0, len0);
|
|
388
|
+
const ret = wasm.sheet_to_parquet_with_names(this.__wbg_ptr, ptr0, len0, unmerge_cells);
|
|
376
389
|
if (ret[3]) {
|
|
377
390
|
throw takeFromExternrefTable0(ret[2]);
|
|
378
391
|
}
|
|
Binary file
|
|
@@ -22,11 +22,12 @@ export const headerinfo_row_index: (a: number) => number;
|
|
|
22
22
|
export const sheet_col_count: (a: number) => number;
|
|
23
23
|
export const sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
24
24
|
export const sheet_infer_header_row: (a: number) => number;
|
|
25
|
+
export const sheet_merged_regions: (a: number) => any;
|
|
25
26
|
export const sheet_name: (a: number) => [number, number];
|
|
26
27
|
export const sheet_row_count: (a: number) => number;
|
|
27
28
|
export const sheet_rows: (a: number) => any;
|
|
28
|
-
export const sheet_to_parquet: (a: number) => [number, number, number, number];
|
|
29
|
-
export const sheet_to_parquet_with_names: (a: number, b: number, c: number) => [number, number, number, number];
|
|
29
|
+
export const sheet_to_parquet: (a: number, b: number) => [number, number, number, number];
|
|
30
|
+
export const sheet_to_parquet_with_names: (a: number, b: number, c: number, d: number) => [number, number, number, number];
|
|
30
31
|
export const workbook_from_bytes: (a: number, b: number) => [number, number, number];
|
|
31
32
|
export const workbook_from_bytes_with_progress: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
32
33
|
export const workbook_get_sheet: (a: number, b: number, c: number) => number;
|
|
@@ -31,8 +31,14 @@ export class Sheet {
|
|
|
31
31
|
/**
|
|
32
32
|
* Convert sheet to Parquet format
|
|
33
33
|
* Returns the Parquet file as bytes
|
|
34
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
34
35
|
*/
|
|
35
|
-
to_parquet(): Uint8Array;
|
|
36
|
+
to_parquet(unmerge_cells: boolean): Uint8Array;
|
|
37
|
+
/**
|
|
38
|
+
* Get the merged regions in this sheet
|
|
39
|
+
* Returns an array of arrays, where each inner array is [start_row, start_col, end_row, end_col]
|
|
40
|
+
*/
|
|
41
|
+
merged_regions(): Array<any>;
|
|
36
42
|
/**
|
|
37
43
|
* Infer which row is the table header
|
|
38
44
|
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
@@ -46,8 +52,9 @@ export class Sheet {
|
|
|
46
52
|
/**
|
|
47
53
|
* Convert sheet to Parquet format with custom column names
|
|
48
54
|
* column_names: array of column names (must match col_count)
|
|
55
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
49
56
|
*/
|
|
50
|
-
to_parquet_with_names(column_names: string[]): Uint8Array;
|
|
57
|
+
to_parquet_with_names(column_names: string[], unmerge_cells: boolean): Uint8Array;
|
|
51
58
|
get_cell(row: number, col: number): CellValue | undefined;
|
|
52
59
|
col_count(): number;
|
|
53
60
|
row_count(): number;
|
|
@@ -91,11 +98,12 @@ export interface InitOutput {
|
|
|
91
98
|
readonly sheet_col_count: (a: number) => number;
|
|
92
99
|
readonly sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
93
100
|
readonly sheet_infer_header_row: (a: number) => number;
|
|
101
|
+
readonly sheet_merged_regions: (a: number) => any;
|
|
94
102
|
readonly sheet_name: (a: number) => [number, number];
|
|
95
103
|
readonly sheet_row_count: (a: number) => number;
|
|
96
104
|
readonly sheet_rows: (a: number) => any;
|
|
97
|
-
readonly sheet_to_parquet: (a: number) => [number, number, number, number];
|
|
98
|
-
readonly sheet_to_parquet_with_names: (a: number, b: number, c: number) => [number, number, number, number];
|
|
105
|
+
readonly sheet_to_parquet: (a: number, b: number) => [number, number, number, number];
|
|
106
|
+
readonly sheet_to_parquet_with_names: (a: number, b: number, c: number, d: number) => [number, number, number, number];
|
|
99
107
|
readonly workbook_from_bytes: (a: number, b: number) => [number, number, number];
|
|
100
108
|
readonly workbook_from_bytes_with_progress: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
101
109
|
readonly workbook_get_sheet: (a: number, b: number, c: number) => number;
|
package/dist/web/calamine_js.js
CHANGED
|
@@ -340,10 +340,12 @@ export class Sheet {
|
|
|
340
340
|
/**
|
|
341
341
|
* Convert sheet to Parquet format
|
|
342
342
|
* Returns the Parquet file as bytes
|
|
343
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
344
|
+
* @param {boolean} unmerge_cells
|
|
343
345
|
* @returns {Uint8Array}
|
|
344
346
|
*/
|
|
345
|
-
to_parquet() {
|
|
346
|
-
const ret = wasm.sheet_to_parquet(this.__wbg_ptr);
|
|
347
|
+
to_parquet(unmerge_cells) {
|
|
348
|
+
const ret = wasm.sheet_to_parquet(this.__wbg_ptr, unmerge_cells);
|
|
347
349
|
if (ret[3]) {
|
|
348
350
|
throw takeFromExternrefTable0(ret[2]);
|
|
349
351
|
}
|
|
@@ -351,6 +353,15 @@ export class Sheet {
|
|
|
351
353
|
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
352
354
|
return v1;
|
|
353
355
|
}
|
|
356
|
+
/**
|
|
357
|
+
* Get the merged regions in this sheet
|
|
358
|
+
* Returns an array of arrays, where each inner array is [start_row, start_col, end_row, end_col]
|
|
359
|
+
* @returns {Array<any>}
|
|
360
|
+
*/
|
|
361
|
+
merged_regions() {
|
|
362
|
+
const ret = wasm.sheet_merged_regions(this.__wbg_ptr);
|
|
363
|
+
return ret;
|
|
364
|
+
}
|
|
354
365
|
/**
|
|
355
366
|
* Infer which row is the table header
|
|
356
367
|
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
@@ -368,13 +379,15 @@ export class Sheet {
|
|
|
368
379
|
/**
|
|
369
380
|
* Convert sheet to Parquet format with custom column names
|
|
370
381
|
* column_names: array of column names (must match col_count)
|
|
382
|
+
* unmerge_cells: if true, spread merged cell values to fill empty cells horizontally
|
|
371
383
|
* @param {string[]} column_names
|
|
384
|
+
* @param {boolean} unmerge_cells
|
|
372
385
|
* @returns {Uint8Array}
|
|
373
386
|
*/
|
|
374
|
-
to_parquet_with_names(column_names) {
|
|
387
|
+
to_parquet_with_names(column_names, unmerge_cells) {
|
|
375
388
|
const ptr0 = passArrayJsValueToWasm0(column_names, wasm.__wbindgen_malloc);
|
|
376
389
|
const len0 = WASM_VECTOR_LEN;
|
|
377
|
-
const ret = wasm.sheet_to_parquet_with_names(this.__wbg_ptr, ptr0, len0);
|
|
390
|
+
const ret = wasm.sheet_to_parquet_with_names(this.__wbg_ptr, ptr0, len0, unmerge_cells);
|
|
378
391
|
if (ret[3]) {
|
|
379
392
|
throw takeFromExternrefTable0(ret[2]);
|
|
380
393
|
}
|
|
Binary file
|
|
@@ -22,11 +22,12 @@ export const headerinfo_row_index: (a: number) => number;
|
|
|
22
22
|
export const sheet_col_count: (a: number) => number;
|
|
23
23
|
export const sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
24
24
|
export const sheet_infer_header_row: (a: number) => number;
|
|
25
|
+
export const sheet_merged_regions: (a: number) => any;
|
|
25
26
|
export const sheet_name: (a: number) => [number, number];
|
|
26
27
|
export const sheet_row_count: (a: number) => number;
|
|
27
28
|
export const sheet_rows: (a: number) => any;
|
|
28
|
-
export const sheet_to_parquet: (a: number) => [number, number, number, number];
|
|
29
|
-
export const sheet_to_parquet_with_names: (a: number, b: number, c: number) => [number, number, number, number];
|
|
29
|
+
export const sheet_to_parquet: (a: number, b: number) => [number, number, number, number];
|
|
30
|
+
export const sheet_to_parquet_with_names: (a: number, b: number, c: number, d: number) => [number, number, number, number];
|
|
30
31
|
export const workbook_from_bytes: (a: number, b: number) => [number, number, number];
|
|
31
32
|
export const workbook_from_bytes_with_progress: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
32
33
|
export const workbook_get_sheet: (a: number, b: number, c: number) => number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eredzik/calaminejs",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Rust calamine library bindings for JavaScript/TypeScript - Excel file reading and Parquet conversion",
|
|
5
5
|
"main": "./dist/node/calamine_js.js",
|
|
6
6
|
"module": "./dist/web/calamine_js.js",
|
|
@@ -45,10 +45,14 @@
|
|
|
45
45
|
"build:node:size": "wasm-pack build --target nodejs --out-dir dist/node --profile release-size",
|
|
46
46
|
"build:web:size": "wasm-pack build --target web --out-dir dist/web --profile release-size",
|
|
47
47
|
"postbuild": "rm -f dist/node/.gitignore dist/web/.gitignore dist/node/package.json dist/web/package.json dist/node/README.md dist/web/README.md dist/node/LICENSE dist/web/LICENSE",
|
|
48
|
-
"clean": "rm -rf dist target"
|
|
48
|
+
"clean": "rm -rf dist target",
|
|
49
|
+
"test": "vitest run",
|
|
50
|
+
"test:watch": "vitest"
|
|
49
51
|
},
|
|
50
52
|
"devDependencies": {
|
|
51
|
-
"npm-run-all": "^4.1.5"
|
|
53
|
+
"npm-run-all": "^4.1.5",
|
|
54
|
+
"vitest": "^2.1.8",
|
|
55
|
+
"@types/node": "^22.10.5"
|
|
52
56
|
},
|
|
53
57
|
"keywords": [
|
|
54
58
|
"excel",
|