@eredzik/calaminejs 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,454 @@
1
+ # @eredzik/calaminejs
2
+
3
+ A high-performance JavaScript/TypeScript library for reading Excel files (XLS/XLSX) and converting them to Parquet format. Built on the Rust [calamine](https://github.com/tafia/calamine) library using WebAssembly for optimal performance.
4
+
5
+ ## Features
6
+
7
+ - 📊 Read Excel files (XLS and XLSX formats)
8
+ - 🚀 High performance through WebAssembly
9
+ - 🔄 Convert sheets to Parquet format
10
+ - 🎯 Smart header row detection
11
+ - 📈 Progress tracking for large files
12
+ - 🌐 Works in Node.js and browsers
13
+ - 📝 Full TypeScript support
14
+ - 🔢 Preserves data types (strings, numbers, booleans, dates, etc.)
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ npm install @eredzik/calaminejs
20
+ ```
21
+
22
+ ## Quick Start
23
+
24
+ ### Node.js
25
+
26
+ ```javascript
27
+ import { Workbook } from '@eredzik/calaminejs';
28
+ import { readFileSync } from 'fs';
29
+
30
+ // Read an Excel file
31
+ const buffer = readFileSync('data.xlsx');
32
+ const workbook = Workbook.from_bytes(new Uint8Array(buffer));
33
+
34
+ // Get sheet names
35
+ const sheetNames = workbook.sheet_names();
36
+ console.log('Sheets:', sheetNames);
37
+
38
+ // Access a sheet
39
+ const sheet = workbook.get_sheet(sheetNames[0]);
40
+ console.log(`Rows: ${sheet.row_count()}, Columns: ${sheet.col_count()}`);
41
+
42
+ // Access cell data
43
+ const rows = sheet.rows;
44
+ console.log('First row:', rows[0]);
45
+ ```
46
+
47
+ ### Browser
48
+
49
+ ```javascript
50
+ import { Workbook } from '@eredzik/calaminejs/web';
51
+
52
+ // From file input
53
+ const file = document.querySelector('input[type="file"]').files[0];
54
+ const arrayBuffer = await file.arrayBuffer();
55
+ const workbook = Workbook.from_bytes(new Uint8Array(arrayBuffer));
56
+
57
+ // Process the workbook
58
+ const sheet = workbook.get_sheet_by_index(0);
59
+ console.log('Sheet name:', sheet.name);
60
+ ```
61
+
62
+ ## API Reference
63
+
64
+ ### Workbook
65
+
66
+ The main class for working with Excel files.
67
+
68
+ #### Static Methods
69
+
70
+ ##### `Workbook.from_bytes(data: Uint8Array): Workbook`
71
+
72
+ Load an Excel file from bytes. Automatically detects XLS or XLSX format.
73
+
74
+ ```javascript
75
+ const workbook = Workbook.from_bytes(new Uint8Array(buffer));
76
+ ```
77
+
78
+ ##### `Workbook.from_bytes_with_progress(data: Uint8Array, callback?: Function, interval?: number): Workbook`
79
+
80
+ Load an Excel file with progress tracking.
81
+
82
+ ```javascript
83
+ const workbook = Workbook.from_bytes_with_progress(
84
+ new Uint8Array(buffer),
85
+ (progress) => {
86
+ console.log(`Processing sheet ${progress.sheetIndex + 1}/${progress.totalSheets}`);
87
+ console.log(`Sheet: ${progress.sheetName}, Row: ${progress.currentRow}`);
88
+ },
89
+ 100 // Report progress every 100 rows
90
+ );
91
+ ```
92
+
93
+ **Progress object properties:**
94
+ - `sheetIndex: number` - Current sheet index (0-based)
95
+ - `totalSheets: number` - Total number of sheets
96
+ - `sheetName: string` - Name of current sheet
97
+ - `currentRow: number` - Current row being processed
98
+ - `totalRows: number | null` - Total rows (available when sheet is complete)
99
+
100
+ #### Instance Methods
101
+
102
+ ##### `sheet_names(): string[]`
103
+
104
+ Get an array of all sheet names in the workbook.
105
+
106
+ ```javascript
107
+ const names = workbook.sheet_names();
108
+ ```
109
+
110
+ ##### `get_sheet(name: string): Sheet | undefined`
111
+
112
+ Get a sheet by name.
113
+
114
+ ```javascript
115
+ const sheet = workbook.get_sheet('Sheet1');
116
+ ```
117
+
118
+ ##### `get_sheet_by_index(index: number): Sheet | undefined`
119
+
120
+ Get a sheet by index (0-based).
121
+
122
+ ```javascript
123
+ const firstSheet = workbook.get_sheet_by_index(0);
124
+ ```
125
+
126
+ ##### `sheet_count(): number`
127
+
128
+ Get the total number of sheets.
129
+
130
+ ```javascript
131
+ const count = workbook.sheet_count();
132
+ ```
133
+
134
+ ### Sheet
135
+
136
+ Represents a single worksheet in an Excel file.
137
+
138
+ #### Properties
139
+
140
+ ##### `name: string`
141
+
142
+ The name of the sheet.
143
+
144
+ ```javascript
145
+ console.log(sheet.name); // "Sheet1"
146
+ ```
147
+
148
+ ##### `rows: Array<Array<any>>`
149
+
150
+ A 2D array of cell values. Values are converted to native JavaScript types:
151
+ - Strings → `string`
152
+ - Numbers → `number`
153
+ - Booleans → `boolean`
154
+ - Empty cells → `null`
155
+ - Dates → `number` (Excel date format)
156
+ - Errors → `string`
157
+
158
+ ```javascript
159
+ const rows = sheet.rows;
160
+ rows.forEach((row, rowIndex) => {
161
+ row.forEach((cell, colIndex) => {
162
+ console.log(`Cell [${rowIndex}, ${colIndex}]:`, cell);
163
+ });
164
+ });
165
+ ```
166
+
167
+ #### Methods
168
+
169
+ ##### `get_cell(row: number, col: number): CellValue | undefined`
170
+
171
+ Get a specific cell with type information.
172
+
173
+ ```javascript
174
+ const cell = sheet.get_cell(0, 0);
175
+ if (cell.is_string) {
176
+ console.log('String value:', cell.to_string_value());
177
+ }
178
+ ```
179
+
180
+ ##### `row_count(): number`
181
+
182
+ Get the number of rows in the sheet.
183
+
184
+ ```javascript
185
+ const rowCount = sheet.row_count();
186
+ ```
187
+
188
+ ##### `col_count(): number`
189
+
190
+ Get the maximum number of columns in the sheet.
191
+
192
+ ```javascript
193
+ const colCount = sheet.col_count();
194
+ ```
195
+
196
+ ##### `infer_header_row(): HeaderInfo | undefined`
197
+
198
+ Automatically detect which row contains the table header using heuristics:
199
+ - Headers typically contain string values in most columns
200
+ - Headers are followed by rows with data
201
+ - Headers have multiple non-empty cells
202
+ - Prioritizes rows in the first 20 rows
203
+
204
+ ```javascript
205
+ const headerInfo = sheet.infer_header_row();
206
+ if (headerInfo) {
207
+ console.log('Header found at row:', headerInfo.row_index);
208
+ console.log('Column names:', headerInfo.column_names);
209
+ }
210
+ ```
211
+
212
+ ##### `to_parquet(): Uint8Array`
213
+
214
+ Convert the sheet to Parquet format. Column types are automatically inferred:
215
+ - All booleans → Boolean column
216
+ - All integers → Int64 column
217
+ - All floats/numbers → Float64 column
218
+ - All dates → Datetime column (millisecond precision)
219
+ - Mixed or strings → String column
220
+
221
+ ```javascript
222
+ const parquetBytes = sheet.to_parquet();
223
+ // Save to file or process further
224
+ ```
225
+
226
+ ##### `to_parquet_with_names(columnNames: string[]): Uint8Array`
227
+
228
+ Convert the sheet to Parquet format with custom column names.
229
+
230
+ ```javascript
231
+ const columnNames = ['ID', 'Name', 'Age', 'Email'];
232
+ const parquetBytes = sheet.to_parquet_with_names(columnNames);
233
+ ```
234
+
235
+ ### CellValue
236
+
237
+ Detailed cell information with type checking and conversion methods.
238
+
239
+ #### Type Checking Properties
240
+
241
+ - `is_empty: boolean` - Check if cell is empty
242
+ - `is_string: boolean` - Check if cell contains a string
243
+ - `is_float: boolean` - Check if cell contains a float
244
+ - `is_int: boolean` - Check if cell contains an integer
245
+ - `is_bool: boolean` - Check if cell contains a boolean
246
+ - `is_error: boolean` - Check if cell contains an error
247
+ - `is_datetime: boolean` - Check if cell contains a date/time
248
+ - `is_duration: boolean` - Check if cell contains a duration
249
+
250
+ #### Conversion Methods
251
+
252
+ ##### `to_string_value(): string | undefined`
253
+
254
+ Convert cell to string representation.
255
+
256
+ ```javascript
257
+ const cell = sheet.get_cell(0, 0);
258
+ const str = cell.to_string_value();
259
+ ```
260
+
261
+ ##### `to_float_value(): number | undefined`
262
+
263
+ Convert cell to float (works for numbers, booleans, dates).
264
+
265
+ ```javascript
266
+ const num = cell.to_float_value();
267
+ ```
268
+
269
+ ##### `to_int_value(): number | undefined`
270
+
271
+ Convert cell to integer (works for integers, floats, booleans).
272
+
273
+ ```javascript
274
+ const int = cell.to_int_value();
275
+ ```
276
+
277
+ ##### `to_bool_value(): boolean | undefined`
278
+
279
+ Get boolean value (only works for boolean cells).
280
+
281
+ ```javascript
282
+ const bool = cell.to_bool_value();
283
+ ```
284
+
285
+ ### HeaderInfo
286
+
287
+ Information about detected header row.
288
+
289
+ #### Properties
290
+
291
+ - `row_index: number` - The index of the header row (0-based)
292
+ - `column_names: string[]` - Array of column names extracted from the header
293
+
294
+ ## Examples
295
+
296
+ ### Reading and Processing Data
297
+
298
+ ```javascript
299
+ import { Workbook } from '@eredzik/calaminejs';
300
+ import { readFileSync } from 'fs';
301
+
302
+ const buffer = readFileSync('sales.xlsx');
303
+ const workbook = Workbook.from_bytes(new Uint8Array(buffer));
304
+ const sheet = workbook.get_sheet('Sales Data');
305
+
306
+ // Detect header
307
+ const headerInfo = sheet.infer_header_row();
308
+ if (headerInfo) {
309
+ console.log('Columns:', headerInfo.column_names);
310
+
311
+ // Process data rows (skip header)
312
+ const dataRows = sheet.rows.slice(headerInfo.row_index + 1);
313
+ dataRows.forEach(row => {
314
+ console.log('Row data:', row);
315
+ });
316
+ }
317
+ ```
318
+
319
+ ### Converting to Parquet
320
+
321
+ ```javascript
322
+ import { Workbook } from '@eredzik/calaminejs';
323
+ import { writeFileSync, readFileSync } from 'fs';
324
+
325
+ const buffer = readFileSync('data.xlsx');
326
+ const workbook = Workbook.from_bytes(new Uint8Array(buffer));
327
+ const sheet = workbook.get_sheet_by_index(0);
328
+
329
+ // Option 1: Auto-generated column names
330
+ const parquet1 = sheet.to_parquet();
331
+ writeFileSync('output1.parquet', parquet1);
332
+
333
+ // Option 2: Custom column names
334
+ const headerInfo = sheet.infer_header_row();
335
+ if (headerInfo) {
336
+ // Skip header row and convert data
337
+ const dataSheet = {
338
+ ...sheet,
339
+ rows: sheet.rows.slice(headerInfo.row_index + 1)
340
+ };
341
+ const parquet2 = dataSheet.to_parquet_with_names(headerInfo.column_names);
342
+ writeFileSync('output2.parquet', parquet2);
343
+ }
344
+ ```
345
+
346
+ ### Working with Cell Types
347
+
348
+ ```javascript
349
+ const sheet = workbook.get_sheet_by_index(0);
350
+
351
+ for (let row = 0; row < sheet.row_count(); row++) {
352
+ for (let col = 0; col < sheet.col_count(); col++) {
353
+ const cell = sheet.get_cell(row, col);
354
+
355
+ if (cell.is_string) {
356
+ console.log(`String: ${cell.to_string_value()}`);
357
+ } else if (cell.is_int) {
358
+ console.log(`Integer: ${cell.to_int_value()}`);
359
+ } else if (cell.is_float) {
360
+ console.log(`Float: ${cell.to_float_value()}`);
361
+ } else if (cell.is_bool) {
362
+ console.log(`Boolean: ${cell.to_bool_value()}`);
363
+ } else if (cell.is_datetime) {
364
+ const excelDate = cell.to_float_value();
365
+ // Convert Excel date to JavaScript Date
366
+ const jsDate = new Date((excelDate - 25569) * 86400 * 1000);
367
+ console.log(`Date: ${jsDate.toISOString()}`);
368
+ }
369
+ }
370
+ }
371
+ ```
372
+
373
+ ### Progress Tracking for Large Files
374
+
375
+ ```javascript
376
+ import { Workbook } from '@eredzik/calaminejs';
377
+ import { readFileSync } from 'fs';
378
+
379
+ const buffer = readFileSync('large-file.xlsx');
380
+
381
+ console.log('Loading workbook...');
382
+ const workbook = Workbook.from_bytes_with_progress(
383
+ new Uint8Array(buffer),
384
+ (progress) => {
385
+ const percent = ((progress.currentRow / (progress.totalRows || progress.currentRow)) * 100).toFixed(1);
386
+ console.log(`[${progress.sheetName}] Processing: ${percent}%`);
387
+ },
388
+ 500 // Report every 500 rows
389
+ );
390
+
391
+ console.log('Workbook loaded successfully!');
392
+ ```
393
+
394
+ ## Browser vs Node.js
395
+
396
+ The package provides separate builds optimized for each environment:
397
+
398
+ ```javascript
399
+ // Node.js (default)
400
+ import { Workbook } from '@eredzik/calaminejs';
401
+ // or
402
+ import { Workbook } from '@eredzik/calaminejs/node';
403
+
404
+ // Browser
405
+ import { Workbook } from '@eredzik/calaminejs/web';
406
+ ```
407
+
408
+ ## Performance Tips
409
+
410
+ 1. **Use progress callbacks for large files** to provide user feedback and avoid blocking
411
+ 2. **Process sheets on-demand** instead of loading all sheets at once
412
+ 3. **Use `get_cell()` for sparse data** instead of accessing the full `rows` array
413
+ 4. **Infer header once** and reuse the result instead of calling it multiple times
414
+ 5. **Convert to Parquet** for efficient storage and further processing with data tools
415
+
416
+ ## Requirements
417
+
418
+ - Node.js >= 16.0.0 (for Node.js usage)
419
+ - Modern browser with WebAssembly support (for browser usage)
420
+
421
+ ## Supported File Formats
422
+
423
+ - **XLSX** - Excel 2007+ (.xlsx)
424
+ - **XLS** - Excel 97-2003 (.xls)
425
+
426
+ ## Supported Data Types
427
+
428
+ The library preserves Excel data types:
429
+
430
+ - **Empty** - Empty cells
431
+ - **String** - Text values
432
+ - **Float** - Floating-point numbers
433
+ - **Int** - Integer numbers
434
+ - **Bool** - Boolean values (TRUE/FALSE)
435
+ - **Error** - Excel error values (#N/A, #REF!, etc.)
436
+ - **DateTime** - Date and time values (stored as Excel serial numbers)
437
+ - **Duration** - Duration values
438
+
439
+ ## License
440
+
441
+ MIT
442
+
443
+ ## Repository
444
+
445
+ GitHub: [https://github.com/eredzik/calaminejs](https://github.com/eredzik/calaminejs)
446
+
447
+ ## Issues
448
+
449
+ Report issues: [https://github.com/eredzik/calaminejs/issues](https://github.com/eredzik/calaminejs/issues)
450
+
451
+ ## Credits
452
+
453
+ Built on top of the excellent [calamine](https://github.com/tafia/calamine) Rust library and [Polars](https://www.pola.rs/) for Parquet conversion.
454
+
@@ -17,6 +17,13 @@ export class CellValue {
17
17
  readonly is_float: boolean;
18
18
  readonly is_string: boolean;
19
19
  }
20
+ export class HeaderInfo {
21
+ private constructor();
22
+ free(): void;
23
+ [Symbol.dispose](): void;
24
+ readonly column_names: string[];
25
+ readonly row_index: number;
26
+ }
20
27
  export class Sheet {
21
28
  private constructor();
22
29
  free(): void;
@@ -26,6 +33,16 @@ export class Sheet {
26
33
  * Returns the Parquet file as bytes
27
34
  */
28
35
  to_parquet(): Uint8Array;
36
+ /**
37
+ * Infer which row is the table header
38
+ * Returns HeaderInfo with the row index and column names, or None if no header is found
39
+ *
40
+ * This function uses heuristics to detect the header row:
41
+ * - Headers typically contain string values in most columns
42
+ * - Headers are followed by rows with data
43
+ * - Headers have multiple non-empty cells
44
+ */
45
+ infer_header_row(): HeaderInfo | undefined;
29
46
  /**
30
47
  * Convert sheet to Parquet format with custom column names
31
48
  * column_names: array of column names (must match col_count)
@@ -111,6 +111,17 @@ function getArrayU8FromWasm0(ptr, len) {
111
111
  return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
112
112
  }
113
113
 
114
+ function getArrayJsValueFromWasm0(ptr, len) {
115
+ ptr = ptr >>> 0;
116
+ const mem = getDataViewMemory0();
117
+ const result = [];
118
+ for (let i = ptr; i < ptr + 4 * len; i += 4) {
119
+ result.push(wasm.__wbindgen_externrefs.get(mem.getUint32(i, true)));
120
+ }
121
+ wasm.__externref_drop_slice(ptr, len);
122
+ return result;
123
+ }
124
+
114
125
  function takeFromExternrefTable0(idx) {
115
126
  const value = wasm.__wbindgen_externrefs.get(idx);
116
127
  wasm.__externref_table_dealloc(idx);
@@ -253,6 +264,52 @@ if (Symbol.dispose) CellValue.prototype[Symbol.dispose] = CellValue.prototype.fr
253
264
 
254
265
  exports.CellValue = CellValue;
255
266
 
267
+ const HeaderInfoFinalization = (typeof FinalizationRegistry === 'undefined')
268
+ ? { register: () => {}, unregister: () => {} }
269
+ : new FinalizationRegistry(ptr => wasm.__wbg_headerinfo_free(ptr >>> 0, 1));
270
+
271
+ class HeaderInfo {
272
+
273
+ static __wrap(ptr) {
274
+ ptr = ptr >>> 0;
275
+ const obj = Object.create(HeaderInfo.prototype);
276
+ obj.__wbg_ptr = ptr;
277
+ HeaderInfoFinalization.register(obj, obj.__wbg_ptr, obj);
278
+ return obj;
279
+ }
280
+
281
+ __destroy_into_raw() {
282
+ const ptr = this.__wbg_ptr;
283
+ this.__wbg_ptr = 0;
284
+ HeaderInfoFinalization.unregister(this);
285
+ return ptr;
286
+ }
287
+
288
+ free() {
289
+ const ptr = this.__destroy_into_raw();
290
+ wasm.__wbg_headerinfo_free(ptr, 0);
291
+ }
292
+ /**
293
+ * @returns {string[]}
294
+ */
295
+ get column_names() {
296
+ const ret = wasm.headerinfo_column_names(this.__wbg_ptr);
297
+ var v1 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
298
+ wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
299
+ return v1;
300
+ }
301
+ /**
302
+ * @returns {number}
303
+ */
304
+ get row_index() {
305
+ const ret = wasm.headerinfo_row_index(this.__wbg_ptr);
306
+ return ret >>> 0;
307
+ }
308
+ }
309
+ if (Symbol.dispose) HeaderInfo.prototype[Symbol.dispose] = HeaderInfo.prototype.free;
310
+
311
+ exports.HeaderInfo = HeaderInfo;
312
+
256
313
  const SheetFinalization = (typeof FinalizationRegistry === 'undefined')
257
314
  ? { register: () => {}, unregister: () => {} }
258
315
  : new FinalizationRegistry(ptr => wasm.__wbg_sheet_free(ptr >>> 0, 1));
@@ -292,6 +349,20 @@ class Sheet {
292
349
  wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
293
350
  return v1;
294
351
  }
352
+ /**
353
+ * Infer which row is the table header
354
+ * Returns HeaderInfo with the row index and column names, or None if no header is found
355
+ *
356
+ * This function uses heuristics to detect the header row:
357
+ * - Headers typically contain string values in most columns
358
+ * - Headers are followed by rows with data
359
+ * - Headers have multiple non-empty cells
360
+ * @returns {HeaderInfo | undefined}
361
+ */
362
+ infer_header_row() {
363
+ const ret = wasm.sheet_infer_header_row(this.__wbg_ptr);
364
+ return ret === 0 ? undefined : HeaderInfo.__wrap(ret);
365
+ }
295
366
  /**
296
367
  * Convert sheet to Parquet format with custom column names
297
368
  * column_names: array of column names (must match col_count)
Binary file
@@ -2,6 +2,7 @@
2
2
  /* eslint-disable */
3
3
  export const memory: WebAssembly.Memory;
4
4
  export const __wbg_cellvalue_free: (a: number, b: number) => void;
5
+ export const __wbg_headerinfo_free: (a: number, b: number) => void;
5
6
  export const __wbg_sheet_free: (a: number, b: number) => void;
6
7
  export const __wbg_workbook_free: (a: number, b: number) => void;
7
8
  export const cellvalue_is_bool: (a: number) => number;
@@ -16,8 +17,11 @@ export const cellvalue_to_bool_value: (a: number) => number;
16
17
  export const cellvalue_to_float_value: (a: number) => [number, number];
17
18
  export const cellvalue_to_int_value: (a: number) => [number, bigint];
18
19
  export const cellvalue_to_string_value: (a: number) => [number, number];
20
+ export const headerinfo_column_names: (a: number) => [number, number];
21
+ export const headerinfo_row_index: (a: number) => number;
19
22
  export const sheet_col_count: (a: number) => number;
20
23
  export const sheet_get_cell: (a: number, b: number, c: number) => number;
24
+ export const sheet_infer_header_row: (a: number) => number;
21
25
  export const sheet_name: (a: number) => [number, number];
22
26
  export const sheet_row_count: (a: number) => number;
23
27
  export const sheet_rows: (a: number) => any;
@@ -49,6 +53,7 @@ export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) =>
49
53
  export const __wbindgen_exn_store: (a: number) => void;
50
54
  export const __externref_table_alloc: () => number;
51
55
  export const __wbindgen_externrefs: WebAssembly.Table;
52
- export const __externref_table_dealloc: (a: number) => void;
56
+ export const __externref_drop_slice: (a: number, b: number) => void;
53
57
  export const __wbindgen_free: (a: number, b: number, c: number) => void;
58
+ export const __externref_table_dealloc: (a: number) => void;
54
59
  export const __wbindgen_start: () => void;
@@ -17,6 +17,13 @@ export class CellValue {
17
17
  readonly is_float: boolean;
18
18
  readonly is_string: boolean;
19
19
  }
20
+ export class HeaderInfo {
21
+ private constructor();
22
+ free(): void;
23
+ [Symbol.dispose](): void;
24
+ readonly column_names: string[];
25
+ readonly row_index: number;
26
+ }
20
27
  export class Sheet {
21
28
  private constructor();
22
29
  free(): void;
@@ -26,6 +33,16 @@ export class Sheet {
26
33
  * Returns the Parquet file as bytes
27
34
  */
28
35
  to_parquet(): Uint8Array;
36
+ /**
37
+ * Infer which row is the table header
38
+ * Returns HeaderInfo with the row index and column names, or None if no header is found
39
+ *
40
+ * This function uses heuristics to detect the header row:
41
+ * - Headers typically contain string values in most columns
42
+ * - Headers are followed by rows with data
43
+ * - Headers have multiple non-empty cells
44
+ */
45
+ infer_header_row(): HeaderInfo | undefined;
29
46
  /**
30
47
  * Convert sheet to Parquet format with custom column names
31
48
  * column_names: array of column names (must match col_count)
@@ -54,6 +71,7 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
54
71
  export interface InitOutput {
55
72
  readonly memory: WebAssembly.Memory;
56
73
  readonly __wbg_cellvalue_free: (a: number, b: number) => void;
74
+ readonly __wbg_headerinfo_free: (a: number, b: number) => void;
57
75
  readonly __wbg_sheet_free: (a: number, b: number) => void;
58
76
  readonly __wbg_workbook_free: (a: number, b: number) => void;
59
77
  readonly cellvalue_is_bool: (a: number) => number;
@@ -68,8 +86,11 @@ export interface InitOutput {
68
86
  readonly cellvalue_to_float_value: (a: number) => [number, number];
69
87
  readonly cellvalue_to_int_value: (a: number) => [number, bigint];
70
88
  readonly cellvalue_to_string_value: (a: number) => [number, number];
89
+ readonly headerinfo_column_names: (a: number) => [number, number];
90
+ readonly headerinfo_row_index: (a: number) => number;
71
91
  readonly sheet_col_count: (a: number) => number;
72
92
  readonly sheet_get_cell: (a: number, b: number, c: number) => number;
93
+ readonly sheet_infer_header_row: (a: number) => number;
73
94
  readonly sheet_name: (a: number) => [number, number];
74
95
  readonly sheet_row_count: (a: number) => number;
75
96
  readonly sheet_rows: (a: number) => any;
@@ -101,8 +122,9 @@ export interface InitOutput {
101
122
  readonly __wbindgen_exn_store: (a: number) => void;
102
123
  readonly __externref_table_alloc: () => number;
103
124
  readonly __wbindgen_externrefs: WebAssembly.Table;
104
- readonly __externref_table_dealloc: (a: number) => void;
125
+ readonly __externref_drop_slice: (a: number, b: number) => void;
105
126
  readonly __wbindgen_free: (a: number, b: number, c: number) => void;
127
+ readonly __externref_table_dealloc: (a: number) => void;
106
128
  readonly __wbindgen_start: () => void;
107
129
  }
108
130
 
@@ -117,6 +117,17 @@ function getArrayU8FromWasm0(ptr, len) {
117
117
  return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
118
118
  }
119
119
 
120
+ function getArrayJsValueFromWasm0(ptr, len) {
121
+ ptr = ptr >>> 0;
122
+ const mem = getDataViewMemory0();
123
+ const result = [];
124
+ for (let i = ptr; i < ptr + 4 * len; i += 4) {
125
+ result.push(wasm.__wbindgen_externrefs.get(mem.getUint32(i, true)));
126
+ }
127
+ wasm.__externref_drop_slice(ptr, len);
128
+ return result;
129
+ }
130
+
120
131
  function takeFromExternrefTable0(idx) {
121
132
  const value = wasm.__wbindgen_externrefs.get(idx);
122
133
  wasm.__externref_table_dealloc(idx);
@@ -257,6 +268,50 @@ export class CellValue {
257
268
  }
258
269
  if (Symbol.dispose) CellValue.prototype[Symbol.dispose] = CellValue.prototype.free;
259
270
 
271
+ const HeaderInfoFinalization = (typeof FinalizationRegistry === 'undefined')
272
+ ? { register: () => {}, unregister: () => {} }
273
+ : new FinalizationRegistry(ptr => wasm.__wbg_headerinfo_free(ptr >>> 0, 1));
274
+
275
+ export class HeaderInfo {
276
+
277
+ static __wrap(ptr) {
278
+ ptr = ptr >>> 0;
279
+ const obj = Object.create(HeaderInfo.prototype);
280
+ obj.__wbg_ptr = ptr;
281
+ HeaderInfoFinalization.register(obj, obj.__wbg_ptr, obj);
282
+ return obj;
283
+ }
284
+
285
+ __destroy_into_raw() {
286
+ const ptr = this.__wbg_ptr;
287
+ this.__wbg_ptr = 0;
288
+ HeaderInfoFinalization.unregister(this);
289
+ return ptr;
290
+ }
291
+
292
+ free() {
293
+ const ptr = this.__destroy_into_raw();
294
+ wasm.__wbg_headerinfo_free(ptr, 0);
295
+ }
296
+ /**
297
+ * @returns {string[]}
298
+ */
299
+ get column_names() {
300
+ const ret = wasm.headerinfo_column_names(this.__wbg_ptr);
301
+ var v1 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
302
+ wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
303
+ return v1;
304
+ }
305
+ /**
306
+ * @returns {number}
307
+ */
308
+ get row_index() {
309
+ const ret = wasm.headerinfo_row_index(this.__wbg_ptr);
310
+ return ret >>> 0;
311
+ }
312
+ }
313
+ if (Symbol.dispose) HeaderInfo.prototype[Symbol.dispose] = HeaderInfo.prototype.free;
314
+
260
315
  const SheetFinalization = (typeof FinalizationRegistry === 'undefined')
261
316
  ? { register: () => {}, unregister: () => {} }
262
317
  : new FinalizationRegistry(ptr => wasm.__wbg_sheet_free(ptr >>> 0, 1));
@@ -296,6 +351,20 @@ export class Sheet {
296
351
  wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
297
352
  return v1;
298
353
  }
354
+ /**
355
+ * Infer which row is the table header
356
+ * Returns HeaderInfo with the row index and column names, or None if no header is found
357
+ *
358
+ * This function uses heuristics to detect the header row:
359
+ * - Headers typically contain string values in most columns
360
+ * - Headers are followed by rows with data
361
+ * - Headers have multiple non-empty cells
362
+ * @returns {HeaderInfo | undefined}
363
+ */
364
+ infer_header_row() {
365
+ const ret = wasm.sheet_infer_header_row(this.__wbg_ptr);
366
+ return ret === 0 ? undefined : HeaderInfo.__wrap(ret);
367
+ }
299
368
  /**
300
369
  * Convert sheet to Parquet format with custom column names
301
370
  * column_names: array of column names (must match col_count)
Binary file
@@ -2,6 +2,7 @@
2
2
  /* eslint-disable */
3
3
  export const memory: WebAssembly.Memory;
4
4
  export const __wbg_cellvalue_free: (a: number, b: number) => void;
5
+ export const __wbg_headerinfo_free: (a: number, b: number) => void;
5
6
  export const __wbg_sheet_free: (a: number, b: number) => void;
6
7
  export const __wbg_workbook_free: (a: number, b: number) => void;
7
8
  export const cellvalue_is_bool: (a: number) => number;
@@ -16,8 +17,11 @@ export const cellvalue_to_bool_value: (a: number) => number;
16
17
  export const cellvalue_to_float_value: (a: number) => [number, number];
17
18
  export const cellvalue_to_int_value: (a: number) => [number, bigint];
18
19
  export const cellvalue_to_string_value: (a: number) => [number, number];
20
+ export const headerinfo_column_names: (a: number) => [number, number];
21
+ export const headerinfo_row_index: (a: number) => number;
19
22
  export const sheet_col_count: (a: number) => number;
20
23
  export const sheet_get_cell: (a: number, b: number, c: number) => number;
24
+ export const sheet_infer_header_row: (a: number) => number;
21
25
  export const sheet_name: (a: number) => [number, number];
22
26
  export const sheet_row_count: (a: number) => number;
23
27
  export const sheet_rows: (a: number) => any;
@@ -49,6 +53,7 @@ export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) =>
49
53
  export const __wbindgen_exn_store: (a: number) => void;
50
54
  export const __externref_table_alloc: () => number;
51
55
  export const __wbindgen_externrefs: WebAssembly.Table;
52
- export const __externref_table_dealloc: (a: number) => void;
56
+ export const __externref_drop_slice: (a: number, b: number) => void;
53
57
  export const __wbindgen_free: (a: number, b: number, c: number) => void;
58
+ export const __externref_table_dealloc: (a: number) => void;
54
59
  export const __wbindgen_start: () => void;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@eredzik/calaminejs",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Rust calamine library bindings for JavaScript/TypeScript - Excel file reading and Parquet conversion",
5
5
  "main": "./dist/node/calamine_js.js",
6
6
  "module": "./dist/web/calamine_js.js",
@@ -35,18 +35,20 @@
35
35
  "LICENSE"
36
36
  ],
37
37
  "scripts": {
38
- "build": "npm run build:node && npm run build:web && npm run postbuild",
38
+ "build": "npm-run-all --parallel build:node build:web && npm run postbuild",
39
39
  "build:node": "wasm-pack build --target nodejs --out-dir dist/node --release",
40
40
  "build:web": "wasm-pack build --target web --out-dir dist/web --release",
41
- "build:dev": "npm run build:node:dev && npm run build:web:dev && npm run postbuild",
41
+ "build:dev": "npm-run-all --parallel build:node:dev build:web:dev && npm run postbuild",
42
42
  "build:node:dev": "wasm-pack build --target nodejs --out-dir dist/node --dev",
43
43
  "build:web:dev": "wasm-pack build --target web --out-dir dist/web --dev",
44
- "build:size": "npm run build:node:size && npm run build:web:size && npm run postbuild",
44
+ "build:size": "npm-run-all --parallel build:node:size build:web:size && npm run postbuild",
45
45
  "build:node:size": "wasm-pack build --target nodejs --out-dir dist/node --profile release-size",
46
46
  "build:web:size": "wasm-pack build --target web --out-dir dist/web --profile release-size",
47
47
  "postbuild": "rm -f dist/node/.gitignore dist/web/.gitignore dist/node/package.json dist/web/package.json dist/node/README.md dist/web/README.md dist/node/LICENSE dist/web/LICENSE",
48
- "clean": "rm -rf dist target",
49
- "prepublishOnly": "npm run build"
48
+ "clean": "rm -rf dist target"
49
+ },
50
+ "devDependencies": {
51
+ "npm-run-all": "^4.1.5"
50
52
  },
51
53
  "keywords": [
52
54
  "excel",