@eredzik/calaminejs 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +454 -0
- package/dist/node/calamine_js.d.ts +17 -0
- package/dist/node/calamine_js.js +71 -0
- package/dist/node/calamine_js_bg.wasm +0 -0
- package/dist/node/calamine_js_bg.wasm.d.ts +6 -1
- package/dist/web/calamine_js.d.ts +23 -1
- package/dist/web/calamine_js.js +69 -0
- package/dist/web/calamine_js_bg.wasm +0 -0
- package/dist/web/calamine_js_bg.wasm.d.ts +6 -1
- package/package.json +8 -6
package/README.md
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
# @eredzik/calaminejs
|
|
2
|
+
|
|
3
|
+
A high-performance JavaScript/TypeScript library for reading Excel files (XLS/XLSX) and converting them to Parquet format. Built on the Rust [calamine](https://github.com/tafia/calamine) library using WebAssembly for optimal performance.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 📊 Read Excel files (XLS and XLSX formats)
|
|
8
|
+
- 🚀 High performance through WebAssembly
|
|
9
|
+
- 🔄 Convert sheets to Parquet format
|
|
10
|
+
- 🎯 Smart header row detection
|
|
11
|
+
- 📈 Progress tracking for large files
|
|
12
|
+
- 🌐 Works in Node.js and browsers
|
|
13
|
+
- 📝 Full TypeScript support
|
|
14
|
+
- 🔢 Preserves data types (strings, numbers, booleans, dates, etc.)
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @eredzik/calaminejs
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
### Node.js
|
|
25
|
+
|
|
26
|
+
```javascript
|
|
27
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
28
|
+
import { readFileSync } from 'fs';
|
|
29
|
+
|
|
30
|
+
// Read an Excel file
|
|
31
|
+
const buffer = readFileSync('data.xlsx');
|
|
32
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
33
|
+
|
|
34
|
+
// Get sheet names
|
|
35
|
+
const sheetNames = workbook.sheet_names();
|
|
36
|
+
console.log('Sheets:', sheetNames);
|
|
37
|
+
|
|
38
|
+
// Access a sheet
|
|
39
|
+
const sheet = workbook.get_sheet(sheetNames[0]);
|
|
40
|
+
console.log(`Rows: ${sheet.row_count()}, Columns: ${sheet.col_count()}`);
|
|
41
|
+
|
|
42
|
+
// Access cell data
|
|
43
|
+
const rows = sheet.rows;
|
|
44
|
+
console.log('First row:', rows[0]);
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Browser
|
|
48
|
+
|
|
49
|
+
```javascript
|
|
50
|
+
import { Workbook } from '@eredzik/calaminejs/web';
|
|
51
|
+
|
|
52
|
+
// From file input
|
|
53
|
+
const file = document.querySelector('input[type="file"]').files[0];
|
|
54
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
55
|
+
const workbook = Workbook.from_bytes(new Uint8Array(arrayBuffer));
|
|
56
|
+
|
|
57
|
+
// Process the workbook
|
|
58
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
59
|
+
console.log('Sheet name:', sheet.name);
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## API Reference
|
|
63
|
+
|
|
64
|
+
### Workbook
|
|
65
|
+
|
|
66
|
+
The main class for working with Excel files.
|
|
67
|
+
|
|
68
|
+
#### Static Methods
|
|
69
|
+
|
|
70
|
+
##### `Workbook.from_bytes(data: Uint8Array): Workbook`
|
|
71
|
+
|
|
72
|
+
Load an Excel file from bytes. Automatically detects XLS or XLSX format.
|
|
73
|
+
|
|
74
|
+
```javascript
|
|
75
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
##### `Workbook.from_bytes_with_progress(data: Uint8Array, callback?: Function, interval?: number): Workbook`
|
|
79
|
+
|
|
80
|
+
Load an Excel file with progress tracking.
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
const workbook = Workbook.from_bytes_with_progress(
|
|
84
|
+
new Uint8Array(buffer),
|
|
85
|
+
(progress) => {
|
|
86
|
+
console.log(`Processing sheet ${progress.sheetIndex + 1}/${progress.totalSheets}`);
|
|
87
|
+
console.log(`Sheet: ${progress.sheetName}, Row: ${progress.currentRow}`);
|
|
88
|
+
},
|
|
89
|
+
100 // Report progress every 100 rows
|
|
90
|
+
);
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Progress object properties:**
|
|
94
|
+
- `sheetIndex: number` - Current sheet index (0-based)
|
|
95
|
+
- `totalSheets: number` - Total number of sheets
|
|
96
|
+
- `sheetName: string` - Name of current sheet
|
|
97
|
+
- `currentRow: number` - Current row being processed
|
|
98
|
+
- `totalRows: number | null` - Total rows (available when sheet is complete)
|
|
99
|
+
|
|
100
|
+
#### Instance Methods
|
|
101
|
+
|
|
102
|
+
##### `sheet_names(): string[]`
|
|
103
|
+
|
|
104
|
+
Get an array of all sheet names in the workbook.
|
|
105
|
+
|
|
106
|
+
```javascript
|
|
107
|
+
const names = workbook.sheet_names();
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
##### `get_sheet(name: string): Sheet | undefined`
|
|
111
|
+
|
|
112
|
+
Get a sheet by name.
|
|
113
|
+
|
|
114
|
+
```javascript
|
|
115
|
+
const sheet = workbook.get_sheet('Sheet1');
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
##### `get_sheet_by_index(index: number): Sheet | undefined`
|
|
119
|
+
|
|
120
|
+
Get a sheet by index (0-based).
|
|
121
|
+
|
|
122
|
+
```javascript
|
|
123
|
+
const firstSheet = workbook.get_sheet_by_index(0);
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
##### `sheet_count(): number`
|
|
127
|
+
|
|
128
|
+
Get the total number of sheets.
|
|
129
|
+
|
|
130
|
+
```javascript
|
|
131
|
+
const count = workbook.sheet_count();
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Sheet
|
|
135
|
+
|
|
136
|
+
Represents a single worksheet in an Excel file.
|
|
137
|
+
|
|
138
|
+
#### Properties
|
|
139
|
+
|
|
140
|
+
##### `name: string`
|
|
141
|
+
|
|
142
|
+
The name of the sheet.
|
|
143
|
+
|
|
144
|
+
```javascript
|
|
145
|
+
console.log(sheet.name); // "Sheet1"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
##### `rows: Array<Array<any>>`
|
|
149
|
+
|
|
150
|
+
A 2D array of cell values. Values are converted to native JavaScript types:
|
|
151
|
+
- Strings → `string`
|
|
152
|
+
- Numbers → `number`
|
|
153
|
+
- Booleans → `boolean`
|
|
154
|
+
- Empty cells → `null`
|
|
155
|
+
- Dates → `number` (Excel date format)
|
|
156
|
+
- Errors → `string`
|
|
157
|
+
|
|
158
|
+
```javascript
|
|
159
|
+
const rows = sheet.rows;
|
|
160
|
+
rows.forEach((row, rowIndex) => {
|
|
161
|
+
row.forEach((cell, colIndex) => {
|
|
162
|
+
console.log(`Cell [${rowIndex}, ${colIndex}]:`, cell);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
#### Methods
|
|
168
|
+
|
|
169
|
+
##### `get_cell(row: number, col: number): CellValue | undefined`
|
|
170
|
+
|
|
171
|
+
Get a specific cell with type information.
|
|
172
|
+
|
|
173
|
+
```javascript
|
|
174
|
+
const cell = sheet.get_cell(0, 0);
|
|
175
|
+
if (cell.is_string) {
|
|
176
|
+
console.log('String value:', cell.to_string_value());
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
##### `row_count(): number`
|
|
181
|
+
|
|
182
|
+
Get the number of rows in the sheet.
|
|
183
|
+
|
|
184
|
+
```javascript
|
|
185
|
+
const rowCount = sheet.row_count();
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
##### `col_count(): number`
|
|
189
|
+
|
|
190
|
+
Get the maximum number of columns in the sheet.
|
|
191
|
+
|
|
192
|
+
```javascript
|
|
193
|
+
const colCount = sheet.col_count();
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
##### `infer_header_row(): HeaderInfo | undefined`
|
|
197
|
+
|
|
198
|
+
Automatically detect which row contains the table header using heuristics:
|
|
199
|
+
- Headers typically contain string values in most columns
|
|
200
|
+
- Headers are followed by rows with data
|
|
201
|
+
- Headers have multiple non-empty cells
|
|
202
|
+
- Prioritizes rows in the first 20 rows
|
|
203
|
+
|
|
204
|
+
```javascript
|
|
205
|
+
const headerInfo = sheet.infer_header_row();
|
|
206
|
+
if (headerInfo) {
|
|
207
|
+
console.log('Header found at row:', headerInfo.row_index);
|
|
208
|
+
console.log('Column names:', headerInfo.column_names);
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
##### `to_parquet(): Uint8Array`
|
|
213
|
+
|
|
214
|
+
Convert the sheet to Parquet format. Column types are automatically inferred:
|
|
215
|
+
- All booleans → Boolean column
|
|
216
|
+
- All integers → Int64 column
|
|
217
|
+
- All floats/numbers → Float64 column
|
|
218
|
+
- All dates → Datetime column (millisecond precision)
|
|
219
|
+
- Mixed or strings → String column
|
|
220
|
+
|
|
221
|
+
```javascript
|
|
222
|
+
const parquetBytes = sheet.to_parquet();
|
|
223
|
+
// Save to file or process further
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
##### `to_parquet_with_names(columnNames: string[]): Uint8Array`
|
|
227
|
+
|
|
228
|
+
Convert the sheet to Parquet format with custom column names.
|
|
229
|
+
|
|
230
|
+
```javascript
|
|
231
|
+
const columnNames = ['ID', 'Name', 'Age', 'Email'];
|
|
232
|
+
const parquetBytes = sheet.to_parquet_with_names(columnNames);
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### CellValue
|
|
236
|
+
|
|
237
|
+
Detailed cell information with type checking and conversion methods.
|
|
238
|
+
|
|
239
|
+
#### Type Checking Properties
|
|
240
|
+
|
|
241
|
+
- `is_empty: boolean` - Check if cell is empty
|
|
242
|
+
- `is_string: boolean` - Check if cell contains a string
|
|
243
|
+
- `is_float: boolean` - Check if cell contains a float
|
|
244
|
+
- `is_int: boolean` - Check if cell contains an integer
|
|
245
|
+
- `is_bool: boolean` - Check if cell contains a boolean
|
|
246
|
+
- `is_error: boolean` - Check if cell contains an error
|
|
247
|
+
- `is_datetime: boolean` - Check if cell contains a date/time
|
|
248
|
+
- `is_duration: boolean` - Check if cell contains a duration
|
|
249
|
+
|
|
250
|
+
#### Conversion Methods
|
|
251
|
+
|
|
252
|
+
##### `to_string_value(): string | undefined`
|
|
253
|
+
|
|
254
|
+
Convert cell to string representation.
|
|
255
|
+
|
|
256
|
+
```javascript
|
|
257
|
+
const cell = sheet.get_cell(0, 0);
|
|
258
|
+
const str = cell.to_string_value();
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
##### `to_float_value(): number | undefined`
|
|
262
|
+
|
|
263
|
+
Convert cell to float (works for numbers, booleans, dates).
|
|
264
|
+
|
|
265
|
+
```javascript
|
|
266
|
+
const num = cell.to_float_value();
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
##### `to_int_value(): number | undefined`
|
|
270
|
+
|
|
271
|
+
Convert cell to integer (works for integers, floats, booleans).
|
|
272
|
+
|
|
273
|
+
```javascript
|
|
274
|
+
const int = cell.to_int_value();
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
##### `to_bool_value(): boolean | undefined`
|
|
278
|
+
|
|
279
|
+
Get boolean value (only works for boolean cells).
|
|
280
|
+
|
|
281
|
+
```javascript
|
|
282
|
+
const bool = cell.to_bool_value();
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### HeaderInfo
|
|
286
|
+
|
|
287
|
+
Information about detected header row.
|
|
288
|
+
|
|
289
|
+
#### Properties
|
|
290
|
+
|
|
291
|
+
- `row_index: number` - The index of the header row (0-based)
|
|
292
|
+
- `column_names: string[]` - Array of column names extracted from the header
|
|
293
|
+
|
|
294
|
+
## Examples
|
|
295
|
+
|
|
296
|
+
### Reading and Processing Data
|
|
297
|
+
|
|
298
|
+
```javascript
|
|
299
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
300
|
+
import { readFileSync } from 'fs';
|
|
301
|
+
|
|
302
|
+
const buffer = readFileSync('sales.xlsx');
|
|
303
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
304
|
+
const sheet = workbook.get_sheet('Sales Data');
|
|
305
|
+
|
|
306
|
+
// Detect header
|
|
307
|
+
const headerInfo = sheet.infer_header_row();
|
|
308
|
+
if (headerInfo) {
|
|
309
|
+
console.log('Columns:', headerInfo.column_names);
|
|
310
|
+
|
|
311
|
+
// Process data rows (skip header)
|
|
312
|
+
const dataRows = sheet.rows.slice(headerInfo.row_index + 1);
|
|
313
|
+
dataRows.forEach(row => {
|
|
314
|
+
console.log('Row data:', row);
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Converting to Parquet
|
|
320
|
+
|
|
321
|
+
```javascript
|
|
322
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
323
|
+
import { writeFileSync, readFileSync } from 'fs';
|
|
324
|
+
|
|
325
|
+
const buffer = readFileSync('data.xlsx');
|
|
326
|
+
const workbook = Workbook.from_bytes(new Uint8Array(buffer));
|
|
327
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
328
|
+
|
|
329
|
+
// Option 1: Auto-generated column names
|
|
330
|
+
const parquet1 = sheet.to_parquet();
|
|
331
|
+
writeFileSync('output1.parquet', parquet1);
|
|
332
|
+
|
|
333
|
+
// Option 2: Custom column names
|
|
334
|
+
const headerInfo = sheet.infer_header_row();
|
|
335
|
+
if (headerInfo) {
|
|
336
|
+
// Skip header row and convert data
|
|
337
|
+
const dataSheet = {
|
|
338
|
+
...sheet,
|
|
339
|
+
rows: sheet.rows.slice(headerInfo.row_index + 1)
|
|
340
|
+
};
|
|
341
|
+
const parquet2 = dataSheet.to_parquet_with_names(headerInfo.column_names);
|
|
342
|
+
writeFileSync('output2.parquet', parquet2);
|
|
343
|
+
}
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Working with Cell Types
|
|
347
|
+
|
|
348
|
+
```javascript
|
|
349
|
+
const sheet = workbook.get_sheet_by_index(0);
|
|
350
|
+
|
|
351
|
+
for (let row = 0; row < sheet.row_count(); row++) {
|
|
352
|
+
for (let col = 0; col < sheet.col_count(); col++) {
|
|
353
|
+
const cell = sheet.get_cell(row, col);
|
|
354
|
+
|
|
355
|
+
if (cell.is_string) {
|
|
356
|
+
console.log(`String: ${cell.to_string_value()}`);
|
|
357
|
+
} else if (cell.is_int) {
|
|
358
|
+
console.log(`Integer: ${cell.to_int_value()}`);
|
|
359
|
+
} else if (cell.is_float) {
|
|
360
|
+
console.log(`Float: ${cell.to_float_value()}`);
|
|
361
|
+
} else if (cell.is_bool) {
|
|
362
|
+
console.log(`Boolean: ${cell.to_bool_value()}`);
|
|
363
|
+
} else if (cell.is_datetime) {
|
|
364
|
+
const excelDate = cell.to_float_value();
|
|
365
|
+
// Convert Excel date to JavaScript Date
|
|
366
|
+
const jsDate = new Date((excelDate - 25569) * 86400 * 1000);
|
|
367
|
+
console.log(`Date: ${jsDate.toISOString()}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Progress Tracking for Large Files
|
|
374
|
+
|
|
375
|
+
```javascript
|
|
376
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
377
|
+
import { readFileSync } from 'fs';
|
|
378
|
+
|
|
379
|
+
const buffer = readFileSync('large-file.xlsx');
|
|
380
|
+
|
|
381
|
+
console.log('Loading workbook...');
|
|
382
|
+
const workbook = Workbook.from_bytes_with_progress(
|
|
383
|
+
new Uint8Array(buffer),
|
|
384
|
+
(progress) => {
|
|
385
|
+
const percent = ((progress.currentRow / (progress.totalRows || progress.currentRow)) * 100).toFixed(1);
|
|
386
|
+
console.log(`[${progress.sheetName}] Processing: ${percent}%`);
|
|
387
|
+
},
|
|
388
|
+
500 // Report every 500 rows
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
console.log('Workbook loaded successfully!');
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
## Browser vs Node.js
|
|
395
|
+
|
|
396
|
+
The package provides separate builds optimized for each environment:
|
|
397
|
+
|
|
398
|
+
```javascript
|
|
399
|
+
// Node.js (default)
|
|
400
|
+
import { Workbook } from '@eredzik/calaminejs';
|
|
401
|
+
// or
|
|
402
|
+
import { Workbook } from '@eredzik/calaminejs/node';
|
|
403
|
+
|
|
404
|
+
// Browser
|
|
405
|
+
import { Workbook } from '@eredzik/calaminejs/web';
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Performance Tips
|
|
409
|
+
|
|
410
|
+
1. **Use progress callbacks for large files** to provide user feedback and avoid blocking
|
|
411
|
+
2. **Process sheets on-demand** instead of loading all sheets at once
|
|
412
|
+
3. **Use `get_cell()` for sparse data** instead of accessing the full `rows` array
|
|
413
|
+
4. **Infer header once** and reuse the result instead of calling it multiple times
|
|
414
|
+
5. **Convert to Parquet** for efficient storage and further processing with data tools
|
|
415
|
+
|
|
416
|
+
## Requirements
|
|
417
|
+
|
|
418
|
+
- Node.js >= 16.0.0 (for Node.js usage)
|
|
419
|
+
- Modern browser with WebAssembly support (for browser usage)
|
|
420
|
+
|
|
421
|
+
## Supported File Formats
|
|
422
|
+
|
|
423
|
+
- **XLSX** - Excel 2007+ (.xlsx)
|
|
424
|
+
- **XLS** - Excel 97-2003 (.xls)
|
|
425
|
+
|
|
426
|
+
## Supported Data Types
|
|
427
|
+
|
|
428
|
+
The library preserves Excel data types:
|
|
429
|
+
|
|
430
|
+
- **Empty** - Empty cells
|
|
431
|
+
- **String** - Text values
|
|
432
|
+
- **Float** - Floating-point numbers
|
|
433
|
+
- **Int** - Integer numbers
|
|
434
|
+
- **Bool** - Boolean values (TRUE/FALSE)
|
|
435
|
+
- **Error** - Excel error values (#N/A, #REF!, etc.)
|
|
436
|
+
- **DateTime** - Date and time values (stored as Excel serial numbers)
|
|
437
|
+
- **Duration** - Duration values
|
|
438
|
+
|
|
439
|
+
## License
|
|
440
|
+
|
|
441
|
+
MIT
|
|
442
|
+
|
|
443
|
+
## Repository
|
|
444
|
+
|
|
445
|
+
GitHub: [https://github.com/eredzik/calaminejs](https://github.com/eredzik/calaminejs)
|
|
446
|
+
|
|
447
|
+
## Issues
|
|
448
|
+
|
|
449
|
+
Report issues: [https://github.com/eredzik/calaminejs/issues](https://github.com/eredzik/calaminejs/issues)
|
|
450
|
+
|
|
451
|
+
## Credits
|
|
452
|
+
|
|
453
|
+
Built on top of the excellent [calamine](https://github.com/tafia/calamine) Rust library and [Polars](https://www.pola.rs/) for Parquet conversion.
|
|
454
|
+
|
|
@@ -17,6 +17,13 @@ export class CellValue {
|
|
|
17
17
|
readonly is_float: boolean;
|
|
18
18
|
readonly is_string: boolean;
|
|
19
19
|
}
|
|
20
|
+
export class HeaderInfo {
|
|
21
|
+
private constructor();
|
|
22
|
+
free(): void;
|
|
23
|
+
[Symbol.dispose](): void;
|
|
24
|
+
readonly column_names: string[];
|
|
25
|
+
readonly row_index: number;
|
|
26
|
+
}
|
|
20
27
|
export class Sheet {
|
|
21
28
|
private constructor();
|
|
22
29
|
free(): void;
|
|
@@ -26,6 +33,16 @@ export class Sheet {
|
|
|
26
33
|
* Returns the Parquet file as bytes
|
|
27
34
|
*/
|
|
28
35
|
to_parquet(): Uint8Array;
|
|
36
|
+
/**
|
|
37
|
+
* Infer which row is the table header
|
|
38
|
+
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
39
|
+
*
|
|
40
|
+
* This function uses heuristics to detect the header row:
|
|
41
|
+
* - Headers typically contain string values in most columns
|
|
42
|
+
* - Headers are followed by rows with data
|
|
43
|
+
* - Headers have multiple non-empty cells
|
|
44
|
+
*/
|
|
45
|
+
infer_header_row(): HeaderInfo | undefined;
|
|
29
46
|
/**
|
|
30
47
|
* Convert sheet to Parquet format with custom column names
|
|
31
48
|
* column_names: array of column names (must match col_count)
|
package/dist/node/calamine_js.js
CHANGED
|
@@ -111,6 +111,17 @@ function getArrayU8FromWasm0(ptr, len) {
|
|
|
111
111
|
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
|
+
function getArrayJsValueFromWasm0(ptr, len) {
|
|
115
|
+
ptr = ptr >>> 0;
|
|
116
|
+
const mem = getDataViewMemory0();
|
|
117
|
+
const result = [];
|
|
118
|
+
for (let i = ptr; i < ptr + 4 * len; i += 4) {
|
|
119
|
+
result.push(wasm.__wbindgen_externrefs.get(mem.getUint32(i, true)));
|
|
120
|
+
}
|
|
121
|
+
wasm.__externref_drop_slice(ptr, len);
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
|
|
114
125
|
function takeFromExternrefTable0(idx) {
|
|
115
126
|
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
116
127
|
wasm.__externref_table_dealloc(idx);
|
|
@@ -253,6 +264,52 @@ if (Symbol.dispose) CellValue.prototype[Symbol.dispose] = CellValue.prototype.fr
|
|
|
253
264
|
|
|
254
265
|
exports.CellValue = CellValue;
|
|
255
266
|
|
|
267
|
+
const HeaderInfoFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
268
|
+
? { register: () => {}, unregister: () => {} }
|
|
269
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_headerinfo_free(ptr >>> 0, 1));
|
|
270
|
+
|
|
271
|
+
class HeaderInfo {
|
|
272
|
+
|
|
273
|
+
static __wrap(ptr) {
|
|
274
|
+
ptr = ptr >>> 0;
|
|
275
|
+
const obj = Object.create(HeaderInfo.prototype);
|
|
276
|
+
obj.__wbg_ptr = ptr;
|
|
277
|
+
HeaderInfoFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
278
|
+
return obj;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
__destroy_into_raw() {
|
|
282
|
+
const ptr = this.__wbg_ptr;
|
|
283
|
+
this.__wbg_ptr = 0;
|
|
284
|
+
HeaderInfoFinalization.unregister(this);
|
|
285
|
+
return ptr;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
free() {
|
|
289
|
+
const ptr = this.__destroy_into_raw();
|
|
290
|
+
wasm.__wbg_headerinfo_free(ptr, 0);
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* @returns {string[]}
|
|
294
|
+
*/
|
|
295
|
+
get column_names() {
|
|
296
|
+
const ret = wasm.headerinfo_column_names(this.__wbg_ptr);
|
|
297
|
+
var v1 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
|
298
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
299
|
+
return v1;
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* @returns {number}
|
|
303
|
+
*/
|
|
304
|
+
get row_index() {
|
|
305
|
+
const ret = wasm.headerinfo_row_index(this.__wbg_ptr);
|
|
306
|
+
return ret >>> 0;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
if (Symbol.dispose) HeaderInfo.prototype[Symbol.dispose] = HeaderInfo.prototype.free;
|
|
310
|
+
|
|
311
|
+
exports.HeaderInfo = HeaderInfo;
|
|
312
|
+
|
|
256
313
|
const SheetFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
257
314
|
? { register: () => {}, unregister: () => {} }
|
|
258
315
|
: new FinalizationRegistry(ptr => wasm.__wbg_sheet_free(ptr >>> 0, 1));
|
|
@@ -292,6 +349,20 @@ class Sheet {
|
|
|
292
349
|
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
293
350
|
return v1;
|
|
294
351
|
}
|
|
352
|
+
/**
|
|
353
|
+
* Infer which row is the table header
|
|
354
|
+
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
355
|
+
*
|
|
356
|
+
* This function uses heuristics to detect the header row:
|
|
357
|
+
* - Headers typically contain string values in most columns
|
|
358
|
+
* - Headers are followed by rows with data
|
|
359
|
+
* - Headers have multiple non-empty cells
|
|
360
|
+
* @returns {HeaderInfo | undefined}
|
|
361
|
+
*/
|
|
362
|
+
infer_header_row() {
|
|
363
|
+
const ret = wasm.sheet_infer_header_row(this.__wbg_ptr);
|
|
364
|
+
return ret === 0 ? undefined : HeaderInfo.__wrap(ret);
|
|
365
|
+
}
|
|
295
366
|
/**
|
|
296
367
|
* Convert sheet to Parquet format with custom column names
|
|
297
368
|
* column_names: array of column names (must match col_count)
|
|
Binary file
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
export const memory: WebAssembly.Memory;
|
|
4
4
|
export const __wbg_cellvalue_free: (a: number, b: number) => void;
|
|
5
|
+
export const __wbg_headerinfo_free: (a: number, b: number) => void;
|
|
5
6
|
export const __wbg_sheet_free: (a: number, b: number) => void;
|
|
6
7
|
export const __wbg_workbook_free: (a: number, b: number) => void;
|
|
7
8
|
export const cellvalue_is_bool: (a: number) => number;
|
|
@@ -16,8 +17,11 @@ export const cellvalue_to_bool_value: (a: number) => number;
|
|
|
16
17
|
export const cellvalue_to_float_value: (a: number) => [number, number];
|
|
17
18
|
export const cellvalue_to_int_value: (a: number) => [number, bigint];
|
|
18
19
|
export const cellvalue_to_string_value: (a: number) => [number, number];
|
|
20
|
+
export const headerinfo_column_names: (a: number) => [number, number];
|
|
21
|
+
export const headerinfo_row_index: (a: number) => number;
|
|
19
22
|
export const sheet_col_count: (a: number) => number;
|
|
20
23
|
export const sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
24
|
+
export const sheet_infer_header_row: (a: number) => number;
|
|
21
25
|
export const sheet_name: (a: number) => [number, number];
|
|
22
26
|
export const sheet_row_count: (a: number) => number;
|
|
23
27
|
export const sheet_rows: (a: number) => any;
|
|
@@ -49,6 +53,7 @@ export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) =>
|
|
|
49
53
|
export const __wbindgen_exn_store: (a: number) => void;
|
|
50
54
|
export const __externref_table_alloc: () => number;
|
|
51
55
|
export const __wbindgen_externrefs: WebAssembly.Table;
|
|
52
|
-
export const
|
|
56
|
+
export const __externref_drop_slice: (a: number, b: number) => void;
|
|
53
57
|
export const __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
58
|
+
export const __externref_table_dealloc: (a: number) => void;
|
|
54
59
|
export const __wbindgen_start: () => void;
|
|
@@ -17,6 +17,13 @@ export class CellValue {
|
|
|
17
17
|
readonly is_float: boolean;
|
|
18
18
|
readonly is_string: boolean;
|
|
19
19
|
}
|
|
20
|
+
export class HeaderInfo {
|
|
21
|
+
private constructor();
|
|
22
|
+
free(): void;
|
|
23
|
+
[Symbol.dispose](): void;
|
|
24
|
+
readonly column_names: string[];
|
|
25
|
+
readonly row_index: number;
|
|
26
|
+
}
|
|
20
27
|
export class Sheet {
|
|
21
28
|
private constructor();
|
|
22
29
|
free(): void;
|
|
@@ -26,6 +33,16 @@ export class Sheet {
|
|
|
26
33
|
* Returns the Parquet file as bytes
|
|
27
34
|
*/
|
|
28
35
|
to_parquet(): Uint8Array;
|
|
36
|
+
/**
|
|
37
|
+
* Infer which row is the table header
|
|
38
|
+
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
39
|
+
*
|
|
40
|
+
* This function uses heuristics to detect the header row:
|
|
41
|
+
* - Headers typically contain string values in most columns
|
|
42
|
+
* - Headers are followed by rows with data
|
|
43
|
+
* - Headers have multiple non-empty cells
|
|
44
|
+
*/
|
|
45
|
+
infer_header_row(): HeaderInfo | undefined;
|
|
29
46
|
/**
|
|
30
47
|
* Convert sheet to Parquet format with custom column names
|
|
31
48
|
* column_names: array of column names (must match col_count)
|
|
@@ -54,6 +71,7 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
|
|
|
54
71
|
export interface InitOutput {
|
|
55
72
|
readonly memory: WebAssembly.Memory;
|
|
56
73
|
readonly __wbg_cellvalue_free: (a: number, b: number) => void;
|
|
74
|
+
readonly __wbg_headerinfo_free: (a: number, b: number) => void;
|
|
57
75
|
readonly __wbg_sheet_free: (a: number, b: number) => void;
|
|
58
76
|
readonly __wbg_workbook_free: (a: number, b: number) => void;
|
|
59
77
|
readonly cellvalue_is_bool: (a: number) => number;
|
|
@@ -68,8 +86,11 @@ export interface InitOutput {
|
|
|
68
86
|
readonly cellvalue_to_float_value: (a: number) => [number, number];
|
|
69
87
|
readonly cellvalue_to_int_value: (a: number) => [number, bigint];
|
|
70
88
|
readonly cellvalue_to_string_value: (a: number) => [number, number];
|
|
89
|
+
readonly headerinfo_column_names: (a: number) => [number, number];
|
|
90
|
+
readonly headerinfo_row_index: (a: number) => number;
|
|
71
91
|
readonly sheet_col_count: (a: number) => number;
|
|
72
92
|
readonly sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
93
|
+
readonly sheet_infer_header_row: (a: number) => number;
|
|
73
94
|
readonly sheet_name: (a: number) => [number, number];
|
|
74
95
|
readonly sheet_row_count: (a: number) => number;
|
|
75
96
|
readonly sheet_rows: (a: number) => any;
|
|
@@ -101,8 +122,9 @@ export interface InitOutput {
|
|
|
101
122
|
readonly __wbindgen_exn_store: (a: number) => void;
|
|
102
123
|
readonly __externref_table_alloc: () => number;
|
|
103
124
|
readonly __wbindgen_externrefs: WebAssembly.Table;
|
|
104
|
-
readonly
|
|
125
|
+
readonly __externref_drop_slice: (a: number, b: number) => void;
|
|
105
126
|
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
127
|
+
readonly __externref_table_dealloc: (a: number) => void;
|
|
106
128
|
readonly __wbindgen_start: () => void;
|
|
107
129
|
}
|
|
108
130
|
|
package/dist/web/calamine_js.js
CHANGED
|
@@ -117,6 +117,17 @@ function getArrayU8FromWasm0(ptr, len) {
|
|
|
117
117
|
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
function getArrayJsValueFromWasm0(ptr, len) {
|
|
121
|
+
ptr = ptr >>> 0;
|
|
122
|
+
const mem = getDataViewMemory0();
|
|
123
|
+
const result = [];
|
|
124
|
+
for (let i = ptr; i < ptr + 4 * len; i += 4) {
|
|
125
|
+
result.push(wasm.__wbindgen_externrefs.get(mem.getUint32(i, true)));
|
|
126
|
+
}
|
|
127
|
+
wasm.__externref_drop_slice(ptr, len);
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
120
131
|
function takeFromExternrefTable0(idx) {
|
|
121
132
|
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
122
133
|
wasm.__externref_table_dealloc(idx);
|
|
@@ -257,6 +268,50 @@ export class CellValue {
|
|
|
257
268
|
}
|
|
258
269
|
if (Symbol.dispose) CellValue.prototype[Symbol.dispose] = CellValue.prototype.free;
|
|
259
270
|
|
|
271
|
+
const HeaderInfoFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
272
|
+
? { register: () => {}, unregister: () => {} }
|
|
273
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_headerinfo_free(ptr >>> 0, 1));
|
|
274
|
+
|
|
275
|
+
export class HeaderInfo {
|
|
276
|
+
|
|
277
|
+
static __wrap(ptr) {
|
|
278
|
+
ptr = ptr >>> 0;
|
|
279
|
+
const obj = Object.create(HeaderInfo.prototype);
|
|
280
|
+
obj.__wbg_ptr = ptr;
|
|
281
|
+
HeaderInfoFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
282
|
+
return obj;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
__destroy_into_raw() {
|
|
286
|
+
const ptr = this.__wbg_ptr;
|
|
287
|
+
this.__wbg_ptr = 0;
|
|
288
|
+
HeaderInfoFinalization.unregister(this);
|
|
289
|
+
return ptr;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
free() {
|
|
293
|
+
const ptr = this.__destroy_into_raw();
|
|
294
|
+
wasm.__wbg_headerinfo_free(ptr, 0);
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* @returns {string[]}
|
|
298
|
+
*/
|
|
299
|
+
get column_names() {
|
|
300
|
+
const ret = wasm.headerinfo_column_names(this.__wbg_ptr);
|
|
301
|
+
var v1 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
|
302
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
303
|
+
return v1;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* @returns {number}
|
|
307
|
+
*/
|
|
308
|
+
get row_index() {
|
|
309
|
+
const ret = wasm.headerinfo_row_index(this.__wbg_ptr);
|
|
310
|
+
return ret >>> 0;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (Symbol.dispose) HeaderInfo.prototype[Symbol.dispose] = HeaderInfo.prototype.free;
|
|
314
|
+
|
|
260
315
|
const SheetFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
261
316
|
? { register: () => {}, unregister: () => {} }
|
|
262
317
|
: new FinalizationRegistry(ptr => wasm.__wbg_sheet_free(ptr >>> 0, 1));
|
|
@@ -296,6 +351,20 @@ export class Sheet {
|
|
|
296
351
|
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
297
352
|
return v1;
|
|
298
353
|
}
|
|
354
|
+
/**
|
|
355
|
+
* Infer which row is the table header
|
|
356
|
+
* Returns HeaderInfo with the row index and column names, or None if no header is found
|
|
357
|
+
*
|
|
358
|
+
* This function uses heuristics to detect the header row:
|
|
359
|
+
* - Headers typically contain string values in most columns
|
|
360
|
+
* - Headers are followed by rows with data
|
|
361
|
+
* - Headers have multiple non-empty cells
|
|
362
|
+
* @returns {HeaderInfo | undefined}
|
|
363
|
+
*/
|
|
364
|
+
infer_header_row() {
|
|
365
|
+
const ret = wasm.sheet_infer_header_row(this.__wbg_ptr);
|
|
366
|
+
return ret === 0 ? undefined : HeaderInfo.__wrap(ret);
|
|
367
|
+
}
|
|
299
368
|
/**
|
|
300
369
|
* Convert sheet to Parquet format with custom column names
|
|
301
370
|
* column_names: array of column names (must match col_count)
|
|
Binary file
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
export const memory: WebAssembly.Memory;
|
|
4
4
|
export const __wbg_cellvalue_free: (a: number, b: number) => void;
|
|
5
|
+
export const __wbg_headerinfo_free: (a: number, b: number) => void;
|
|
5
6
|
export const __wbg_sheet_free: (a: number, b: number) => void;
|
|
6
7
|
export const __wbg_workbook_free: (a: number, b: number) => void;
|
|
7
8
|
export const cellvalue_is_bool: (a: number) => number;
|
|
@@ -16,8 +17,11 @@ export const cellvalue_to_bool_value: (a: number) => number;
|
|
|
16
17
|
export const cellvalue_to_float_value: (a: number) => [number, number];
|
|
17
18
|
export const cellvalue_to_int_value: (a: number) => [number, bigint];
|
|
18
19
|
export const cellvalue_to_string_value: (a: number) => [number, number];
|
|
20
|
+
export const headerinfo_column_names: (a: number) => [number, number];
|
|
21
|
+
export const headerinfo_row_index: (a: number) => number;
|
|
19
22
|
export const sheet_col_count: (a: number) => number;
|
|
20
23
|
export const sheet_get_cell: (a: number, b: number, c: number) => number;
|
|
24
|
+
export const sheet_infer_header_row: (a: number) => number;
|
|
21
25
|
export const sheet_name: (a: number) => [number, number];
|
|
22
26
|
export const sheet_row_count: (a: number) => number;
|
|
23
27
|
export const sheet_rows: (a: number) => any;
|
|
@@ -49,6 +53,7 @@ export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) =>
|
|
|
49
53
|
export const __wbindgen_exn_store: (a: number) => void;
|
|
50
54
|
export const __externref_table_alloc: () => number;
|
|
51
55
|
export const __wbindgen_externrefs: WebAssembly.Table;
|
|
52
|
-
export const
|
|
56
|
+
export const __externref_drop_slice: (a: number, b: number) => void;
|
|
53
57
|
export const __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
58
|
+
export const __externref_table_dealloc: (a: number) => void;
|
|
54
59
|
export const __wbindgen_start: () => void;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@eredzik/calaminejs",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Rust calamine library bindings for JavaScript/TypeScript - Excel file reading and Parquet conversion",
|
|
5
5
|
"main": "./dist/node/calamine_js.js",
|
|
6
6
|
"module": "./dist/web/calamine_js.js",
|
|
@@ -35,18 +35,20 @@
|
|
|
35
35
|
"LICENSE"
|
|
36
36
|
],
|
|
37
37
|
"scripts": {
|
|
38
|
-
"build": "npm
|
|
38
|
+
"build": "npm-run-all --parallel build:node build:web && npm run postbuild",
|
|
39
39
|
"build:node": "wasm-pack build --target nodejs --out-dir dist/node --release",
|
|
40
40
|
"build:web": "wasm-pack build --target web --out-dir dist/web --release",
|
|
41
|
-
"build:dev": "npm
|
|
41
|
+
"build:dev": "npm-run-all --parallel build:node:dev build:web:dev && npm run postbuild",
|
|
42
42
|
"build:node:dev": "wasm-pack build --target nodejs --out-dir dist/node --dev",
|
|
43
43
|
"build:web:dev": "wasm-pack build --target web --out-dir dist/web --dev",
|
|
44
|
-
"build:size": "npm
|
|
44
|
+
"build:size": "npm-run-all --parallel build:node:size build:web:size && npm run postbuild",
|
|
45
45
|
"build:node:size": "wasm-pack build --target nodejs --out-dir dist/node --profile release-size",
|
|
46
46
|
"build:web:size": "wasm-pack build --target web --out-dir dist/web --profile release-size",
|
|
47
47
|
"postbuild": "rm -f dist/node/.gitignore dist/web/.gitignore dist/node/package.json dist/web/package.json dist/node/README.md dist/web/README.md dist/node/LICENSE dist/web/LICENSE",
|
|
48
|
-
"clean": "rm -rf dist target"
|
|
49
|
-
|
|
48
|
+
"clean": "rm -rf dist target"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"npm-run-all": "^4.1.5"
|
|
50
52
|
},
|
|
51
53
|
"keywords": [
|
|
52
54
|
"excel",
|