juxscript 1.1.205 → 1.1.207
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/components/dataframe.d.ts +7 -2
- package/lib/components/dataframe.d.ts.map +1 -1
- package/lib/components/dataframe.js +185 -243
- package/lib/components/dataframe.ts +203 -255
- package/lib/storage/TabularDriver.d.ts +10 -4
- package/lib/storage/TabularDriver.d.ts.map +1 -1
- package/lib/storage/TabularDriver.js +61 -23
- package/lib/storage/TabularDriver.ts +57 -23
- package/package.json +1 -1
|
@@ -486,12 +486,63 @@ export class TabularDriver {
|
|
|
486
486
|
df = df.select(...selectCols);
|
|
487
487
|
return df;
|
|
488
488
|
}
|
|
489
|
+
/**
|
|
490
|
+
* Read raw cell values from first sheet of an Excel file.
|
|
491
|
+
* Returns rows with their actual sheet row indices.
|
|
492
|
+
* Used by both the preview UI and the parser to ensure consistency.
|
|
493
|
+
*/
|
|
494
|
+
async readRawExcelRows(file, maxRows = 15) {
|
|
495
|
+
let XLSX;
|
|
496
|
+
try {
|
|
497
|
+
XLSX = await import('xlsx');
|
|
498
|
+
}
|
|
499
|
+
catch {
|
|
500
|
+
throw new Error('XLSX support requires the "xlsx" package.');
|
|
501
|
+
}
|
|
502
|
+
const buffer = await file.arrayBuffer();
|
|
503
|
+
const workbook = XLSX.read(buffer, {
|
|
504
|
+
type: 'array',
|
|
505
|
+
sheetRows: maxRows + 5,
|
|
506
|
+
dense: false
|
|
507
|
+
});
|
|
508
|
+
const sheetName = workbook.SheetNames[0];
|
|
509
|
+
const worksheet = workbook.Sheets[sheetName];
|
|
510
|
+
const ref = worksheet['!ref'];
|
|
511
|
+
if (!ref)
|
|
512
|
+
return [];
|
|
513
|
+
const range = XLSX.utils.decode_range(ref);
|
|
514
|
+
const startRow = range.s.r;
|
|
515
|
+
const endRow = Math.min(range.e.r, startRow + maxRows - 1);
|
|
516
|
+
const startCol = range.s.c;
|
|
517
|
+
const endCol = range.e.c;
|
|
518
|
+
console.log(`[readRawExcelRows] ref=${ref}, startRow=${startRow}, endRow=${endRow}, cols=${startCol}-${endCol}`);
|
|
519
|
+
const rows = [];
|
|
520
|
+
for (let r = startRow; r <= endRow; r++) {
|
|
521
|
+
const values = [];
|
|
522
|
+
for (let c = startCol; c <= endCol; c++) {
|
|
523
|
+
const addr = XLSX.utils.encode_cell({ r, c });
|
|
524
|
+
const cell = worksheet[addr];
|
|
525
|
+
if (!cell) {
|
|
526
|
+
values.push(null);
|
|
527
|
+
continue;
|
|
528
|
+
}
|
|
529
|
+
if (cell.w !== undefined) {
|
|
530
|
+
values.push(cell.w);
|
|
531
|
+
continue;
|
|
532
|
+
}
|
|
533
|
+
if (cell.v !== undefined) {
|
|
534
|
+
values.push(cell.v);
|
|
535
|
+
continue;
|
|
536
|
+
}
|
|
537
|
+
values.push(null);
|
|
538
|
+
}
|
|
539
|
+
rows.push({ sheetRow: r, values });
|
|
540
|
+
}
|
|
541
|
+
return rows;
|
|
542
|
+
}
|
|
489
543
|
/**
|
|
490
544
|
* ✅ FIXED: Stream Excel file with optional headerRow override
|
|
491
|
-
* headerRow is
|
|
492
|
-
*
|
|
493
|
-
* Uses direct cell access instead of sheet_to_json to avoid
|
|
494
|
-
* issues with blank row handling and sparse arrays.
|
|
545
|
+
* headerRow is the absolute sheet row index (same as sheetRow from readRawExcelRows).
|
|
495
546
|
*/
|
|
496
547
|
async streamFileMultiSheet(file, options = {}) {
|
|
497
548
|
const { maxSheetSize = 100000, sheetChunkSize = 10000, onProgress, headerRow = 0 } = options;
|
|
@@ -531,7 +582,6 @@ export class TabularDriver {
|
|
|
531
582
|
processedSheets++;
|
|
532
583
|
continue;
|
|
533
584
|
}
|
|
534
|
-
// Direct cell reader - bypasses sheet_to_json completely
|
|
535
585
|
const readCellValue = (r, c) => {
|
|
536
586
|
const addr = XLSX.utils.encode_cell({ r, c });
|
|
537
587
|
const cell = worksheet[addr];
|
|
@@ -550,46 +600,37 @@ export class TabularDriver {
|
|
|
550
600
|
}
|
|
551
601
|
return vals;
|
|
552
602
|
};
|
|
553
|
-
//
|
|
554
|
-
//
|
|
555
|
-
// sees row 0 as the first row regardless of where the sheet
|
|
556
|
-
// range begins. We simply use headerRow as the absolute sheet
|
|
557
|
-
// row index.
|
|
603
|
+
// headerRow is the absolute sheet row index — same value as
|
|
604
|
+
// sheetRow from readRawExcelRows(). No offset needed.
|
|
558
605
|
const headerSheetRow = headerRow;
|
|
559
|
-
console.log(`[TabularDriver] Sheet "${sheetName}": range=${ref}, startRow=${startRow}, endRow=${endRow}
|
|
560
|
-
console.log(`[TabularDriver] headerRow=${headerRow}, headerSheetRow=${headerSheetRow}
|
|
606
|
+
console.log(`[TabularDriver] Sheet "${sheetName}": range=${ref}, startRow=${startRow}, endRow=${endRow}`);
|
|
607
|
+
console.log(`[TabularDriver] headerRow=${headerRow}, headerSheetRow=${headerSheetRow}`);
|
|
561
608
|
if (headerSheetRow > endRow) {
|
|
562
|
-
console.warn(`[TabularDriver] headerRow ${headerRow}
|
|
609
|
+
console.warn(`[TabularDriver] headerRow ${headerRow} exceeds endRow ${endRow}`);
|
|
563
610
|
processedSheets++;
|
|
564
611
|
continue;
|
|
565
612
|
}
|
|
566
|
-
// Read header values directly from cells
|
|
567
613
|
const headerValues = readRow(headerSheetRow);
|
|
568
614
|
console.log(`[TabularDriver] Raw header values at sheet row ${headerSheetRow}:`, headerValues);
|
|
569
|
-
// Build headers array
|
|
570
615
|
const headers = headerValues.map((h, i) => {
|
|
571
616
|
if (h === null || h === undefined || String(h).trim() === '') {
|
|
572
617
|
return `__EMPTY${i > 0 ? '_' + i : ''}`;
|
|
573
618
|
}
|
|
574
619
|
return String(h).trim();
|
|
575
620
|
});
|
|
576
|
-
// Count valid headers
|
|
577
621
|
const validHeaders = headers.filter(h => !h.startsWith('__EMPTY'));
|
|
578
622
|
console.log(`[TabularDriver] Headers (${validHeaders.length} valid / ${headers.length} total):`, headers);
|
|
579
623
|
if (validHeaders.length === 0) {
|
|
580
|
-
console.warn(`[TabularDriver] No valid headers found at row ${headerRow}
|
|
581
|
-
// Log surrounding rows for debugging
|
|
624
|
+
console.warn(`[TabularDriver] No valid headers found at row ${headerRow} in sheet "${sheetName}"`);
|
|
582
625
|
for (let debugR = Math.max(startRow, headerSheetRow - 2); debugR <= Math.min(endRow, headerSheetRow + 2); debugR++) {
|
|
583
626
|
console.log(`[TabularDriver] row ${debugR}:`, readRow(debugR));
|
|
584
627
|
}
|
|
585
628
|
processedSheets++;
|
|
586
629
|
continue;
|
|
587
630
|
}
|
|
588
|
-
// Build data rows: everything after the header row
|
|
589
631
|
const rows = [];
|
|
590
632
|
for (let r = headerSheetRow + 1; r <= endRow; r++) {
|
|
591
633
|
const rowData = readRow(r);
|
|
592
|
-
// Skip completely empty rows
|
|
593
634
|
const hasContent = rowData.some(cell => cell !== null && cell !== undefined && String(cell).trim() !== '');
|
|
594
635
|
if (!hasContent)
|
|
595
636
|
continue;
|
|
@@ -600,9 +641,6 @@ export class TabularDriver {
|
|
|
600
641
|
rows.push(row);
|
|
601
642
|
}
|
|
602
643
|
console.log(`[TabularDriver] Built ${rows.length} data rows for sheet "${sheetName}"`);
|
|
603
|
-
if (rows.length > 0) {
|
|
604
|
-
console.log(`[TabularDriver] First row:`, rows[0]);
|
|
605
|
-
}
|
|
606
644
|
if (rows.length > 0) {
|
|
607
645
|
sheets[sheetName] = new DataFrame(rows);
|
|
608
646
|
}
|
|
@@ -606,12 +606,59 @@ export class TabularDriver {
|
|
|
606
606
|
return df;
|
|
607
607
|
}
|
|
608
608
|
|
|
609
|
+
/**
|
|
610
|
+
* Read raw cell values from first sheet of an Excel file.
|
|
611
|
+
* Returns rows with their actual sheet row indices.
|
|
612
|
+
* Used by both the preview UI and the parser to ensure consistency.
|
|
613
|
+
*/
|
|
614
|
+
async readRawExcelRows(file: File, maxRows: number = 15): Promise<{ sheetRow: number; values: any[] }[]> {
|
|
615
|
+
let XLSX: any;
|
|
616
|
+
try {
|
|
617
|
+
XLSX = await import('xlsx');
|
|
618
|
+
} catch {
|
|
619
|
+
throw new Error('XLSX support requires the "xlsx" package.');
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
const buffer = await file.arrayBuffer();
|
|
623
|
+
const workbook = XLSX.read(buffer, {
|
|
624
|
+
type: 'array',
|
|
625
|
+
sheetRows: maxRows + 5,
|
|
626
|
+
dense: false
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
const sheetName = workbook.SheetNames[0];
|
|
630
|
+
const worksheet = workbook.Sheets[sheetName];
|
|
631
|
+
const ref = worksheet['!ref'];
|
|
632
|
+
if (!ref) return [];
|
|
633
|
+
|
|
634
|
+
const range = XLSX.utils.decode_range(ref);
|
|
635
|
+
const startRow = range.s.r;
|
|
636
|
+
const endRow = Math.min(range.e.r, startRow + maxRows - 1);
|
|
637
|
+
const startCol = range.s.c;
|
|
638
|
+
const endCol = range.e.c;
|
|
639
|
+
|
|
640
|
+
console.log(`[readRawExcelRows] ref=${ref}, startRow=${startRow}, endRow=${endRow}, cols=${startCol}-${endCol}`);
|
|
641
|
+
|
|
642
|
+
const rows: { sheetRow: number; values: any[] }[] = [];
|
|
643
|
+
for (let r = startRow; r <= endRow; r++) {
|
|
644
|
+
const values: any[] = [];
|
|
645
|
+
for (let c = startCol; c <= endCol; c++) {
|
|
646
|
+
const addr = XLSX.utils.encode_cell({ r, c });
|
|
647
|
+
const cell = worksheet[addr];
|
|
648
|
+
if (!cell) { values.push(null); continue; }
|
|
649
|
+
if (cell.w !== undefined) { values.push(cell.w); continue; }
|
|
650
|
+
if (cell.v !== undefined) { values.push(cell.v); continue; }
|
|
651
|
+
values.push(null);
|
|
652
|
+
}
|
|
653
|
+
rows.push({ sheetRow: r, values });
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return rows;
|
|
657
|
+
}
|
|
658
|
+
|
|
609
659
|
/**
|
|
610
660
|
* ✅ FIXED: Stream Excel file with optional headerRow override
|
|
611
|
-
* headerRow is
|
|
612
|
-
*
|
|
613
|
-
* Uses direct cell access instead of sheet_to_json to avoid
|
|
614
|
-
* issues with blank row handling and sparse arrays.
|
|
661
|
+
* headerRow is the absolute sheet row index (same as sheetRow from readRawExcelRows).
|
|
615
662
|
*/
|
|
616
663
|
async streamFileMultiSheet(file: File, options: ParseOptions = {}): Promise<Record<string, DataFrame>> {
|
|
617
664
|
const { maxSheetSize = 100000, sheetChunkSize = 10000, onProgress, headerRow = 0 } = options;
|
|
@@ -659,7 +706,6 @@ export class TabularDriver {
|
|
|
659
706
|
continue;
|
|
660
707
|
}
|
|
661
708
|
|
|
662
|
-
// Direct cell reader - bypasses sheet_to_json completely
|
|
663
709
|
const readCellValue = (r: number, c: number): any => {
|
|
664
710
|
const addr = XLSX.utils.encode_cell({ r, c });
|
|
665
711
|
const cell = worksheet[addr];
|
|
@@ -677,27 +723,22 @@ export class TabularDriver {
|
|
|
677
723
|
return vals;
|
|
678
724
|
};
|
|
679
725
|
|
|
680
|
-
//
|
|
681
|
-
//
|
|
682
|
-
// sees row 0 as the first row regardless of where the sheet
|
|
683
|
-
// range begins. We simply use headerRow as the absolute sheet
|
|
684
|
-
// row index.
|
|
726
|
+
// headerRow is the absolute sheet row index — same value as
|
|
727
|
+
// sheetRow from readRawExcelRows(). No offset needed.
|
|
685
728
|
const headerSheetRow = headerRow;
|
|
686
729
|
|
|
687
|
-
console.log(`[TabularDriver] Sheet "${sheetName}": range=${ref}, startRow=${startRow}, endRow=${endRow}
|
|
688
|
-
console.log(`[TabularDriver] headerRow=${headerRow}, headerSheetRow=${headerSheetRow}
|
|
730
|
+
console.log(`[TabularDriver] Sheet "${sheetName}": range=${ref}, startRow=${startRow}, endRow=${endRow}`);
|
|
731
|
+
console.log(`[TabularDriver] headerRow=${headerRow}, headerSheetRow=${headerSheetRow}`);
|
|
689
732
|
|
|
690
733
|
if (headerSheetRow > endRow) {
|
|
691
|
-
console.warn(`[TabularDriver] headerRow ${headerRow}
|
|
734
|
+
console.warn(`[TabularDriver] headerRow ${headerRow} exceeds endRow ${endRow}`);
|
|
692
735
|
processedSheets++;
|
|
693
736
|
continue;
|
|
694
737
|
}
|
|
695
738
|
|
|
696
|
-
// Read header values directly from cells
|
|
697
739
|
const headerValues = readRow(headerSheetRow);
|
|
698
740
|
console.log(`[TabularDriver] Raw header values at sheet row ${headerSheetRow}:`, headerValues);
|
|
699
741
|
|
|
700
|
-
// Build headers array
|
|
701
742
|
const headers: string[] = headerValues.map((h: any, i: number) => {
|
|
702
743
|
if (h === null || h === undefined || String(h).trim() === '') {
|
|
703
744
|
return `__EMPTY${i > 0 ? '_' + i : ''}`;
|
|
@@ -705,13 +746,11 @@ export class TabularDriver {
|
|
|
705
746
|
return String(h).trim();
|
|
706
747
|
});
|
|
707
748
|
|
|
708
|
-
// Count valid headers
|
|
709
749
|
const validHeaders = headers.filter(h => !h.startsWith('__EMPTY'));
|
|
710
750
|
console.log(`[TabularDriver] Headers (${validHeaders.length} valid / ${headers.length} total):`, headers);
|
|
711
751
|
|
|
712
752
|
if (validHeaders.length === 0) {
|
|
713
|
-
console.warn(`[TabularDriver] No valid headers found at row ${headerRow}
|
|
714
|
-
// Log surrounding rows for debugging
|
|
753
|
+
console.warn(`[TabularDriver] No valid headers found at row ${headerRow} in sheet "${sheetName}"`);
|
|
715
754
|
for (let debugR = Math.max(startRow, headerSheetRow - 2); debugR <= Math.min(endRow, headerSheetRow + 2); debugR++) {
|
|
716
755
|
console.log(`[TabularDriver] row ${debugR}:`, readRow(debugR));
|
|
717
756
|
}
|
|
@@ -719,12 +758,10 @@ export class TabularDriver {
|
|
|
719
758
|
continue;
|
|
720
759
|
}
|
|
721
760
|
|
|
722
|
-
// Build data rows: everything after the header row
|
|
723
761
|
const rows: Record<string, any>[] = [];
|
|
724
762
|
for (let r = headerSheetRow + 1; r <= endRow; r++) {
|
|
725
763
|
const rowData = readRow(r);
|
|
726
764
|
|
|
727
|
-
// Skip completely empty rows
|
|
728
765
|
const hasContent = rowData.some(cell =>
|
|
729
766
|
cell !== null && cell !== undefined && String(cell).trim() !== ''
|
|
730
767
|
);
|
|
@@ -738,9 +775,6 @@ export class TabularDriver {
|
|
|
738
775
|
}
|
|
739
776
|
|
|
740
777
|
console.log(`[TabularDriver] Built ${rows.length} data rows for sheet "${sheetName}"`);
|
|
741
|
-
if (rows.length > 0) {
|
|
742
|
-
console.log(`[TabularDriver] First row:`, rows[0]);
|
|
743
|
-
}
|
|
744
778
|
|
|
745
779
|
if (rows.length > 0) {
|
|
746
780
|
sheets[sheetName] = new DataFrame(rows);
|