juxscript 1.1.180 → 1.1.182
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/components/dataframe.d.ts +22 -0
- package/lib/components/dataframe.d.ts.map +1 -1
- package/lib/components/dataframe.js +75 -15
- package/lib/components/dataframe.ts +89 -15
- package/lib/storage/TabularDriver.d.ts +17 -4
- package/lib/storage/TabularDriver.d.ts.map +1 -1
- package/lib/storage/TabularDriver.js +146 -16
- package/lib/storage/TabularDriver.ts +177 -19
- package/package.json +1 -1
|
@@ -14,6 +14,10 @@ export interface DataFrameOptions {
|
|
|
14
14
|
rowsPerPage?: number;
|
|
15
15
|
showStatus?: boolean;
|
|
16
16
|
icon?: string;
|
|
17
|
+
maxSheetSize?: number;
|
|
18
|
+
sheetChunkSize?: number;
|
|
19
|
+
maxFileSize?: number;
|
|
20
|
+
showReshapeWarning?: boolean;
|
|
17
21
|
style?: string;
|
|
18
22
|
class?: string;
|
|
19
23
|
}
|
|
@@ -36,6 +40,11 @@ export declare class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
36
40
|
private _inlineUpload;
|
|
37
41
|
private _showStatus;
|
|
38
42
|
private _icon;
|
|
43
|
+
private _maxSheetSize;
|
|
44
|
+
private _sheetChunkSize;
|
|
45
|
+
private _maxFileSize;
|
|
46
|
+
private _showReshapeWarning;
|
|
47
|
+
private _rawFileData;
|
|
39
48
|
constructor(id: string, options?: DataFrameOptions);
|
|
40
49
|
protected getTriggerEvents(): readonly string[];
|
|
41
50
|
protected getCallbackEvents(): readonly string[];
|
|
@@ -68,6 +77,18 @@ export declare class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
68
77
|
filterable(v: boolean): this;
|
|
69
78
|
paginated(v: boolean): this;
|
|
70
79
|
rowsPerPage(v: number): this;
|
|
80
|
+
/**
|
|
81
|
+
* ✅ NEW: Set max rows per sheet (prevents memory issues with huge Excel files)
|
|
82
|
+
*/
|
|
83
|
+
maxSheetSize(v: number): this;
|
|
84
|
+
/**
|
|
85
|
+
* ✅ NEW: Set chunk size for processing large sheets
|
|
86
|
+
*/
|
|
87
|
+
sheetChunkSize(v: number): this;
|
|
88
|
+
/**
|
|
89
|
+
* ✅ NEW: Set max file size in MB
|
|
90
|
+
*/
|
|
91
|
+
maxFileSize(mb: number): this;
|
|
71
92
|
/**
|
|
72
93
|
* ✅ FIXED: Render multiple Excel sheets as tabs
|
|
73
94
|
*/
|
|
@@ -76,6 +97,7 @@ export declare class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
76
97
|
private _setDataFrame;
|
|
77
98
|
private _updateTable;
|
|
78
99
|
private _showFilterInput;
|
|
100
|
+
private _showReshapeModal;
|
|
79
101
|
update(prop: string, value: any): void;
|
|
80
102
|
render(targetId?: string | HTMLElement | BaseComponent<any>): this;
|
|
81
103
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataframe.d.ts","sourceRoot":"","sources":["dataframe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAOnC,MAAM,WAAW,gBAAgB;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,KAAK,cAAc,GAAG,SAAS,GAAG;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,qBAAa,kBAAmB,SAAQ,aAAa,CAAC,cAAc,CAAC;IACjE,OAAO,CAAC,GAAG,CAA0B;IACrC,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,aAAa,CAOnB;IACF,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,WAAW,CAAuB;IAC1C,OAAO,CAAC,cAAc,CAAsC;IAC5D,OAAO,CAAC,aAAa,CAAgE;IACrF,OAAO,CAAC,WAAW,CAAiB;IACpC,OAAO,CAAC,KAAK,CAAc;
|
|
1
|
+
{"version":3,"file":"dataframe.d.ts","sourceRoot":"","sources":["dataframe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAOnC,MAAM,WAAW,gBAAgB;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,KAAK,cAAc,GAAG,SAAS,GAAG;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,qBAAa,kBAAmB,SAAQ,aAAa,CAAC,cAAc,CAAC;IACjE,OAAO,CAAC,GAAG,CAA0B;IACrC,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,aAAa,CAOnB;IACF,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,WAAW,CAAuB;IAC1C,OAAO,CAAC,cAAc,CAAsC;IAC5D,OAAO,CAAC,aAAa,CAAgE;IACrF,OAAO,CAAC,WAAW,CAAiB;IACpC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,aAAa,CAAkB;IACvC,OAAO,CAAC,eAAe,CAAiB;IACxC,OAAO,CAAC,YAAY,CAAc;IAClC,OAAO,CAAC,mBAAmB,CAAiB;IAC5C,OAAO,CAAC,YAAY,CAA8C;gBAEtD,EAAE,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB;IAmCtD,SAAS,CAAC,gBAAgB,IAAI,SAAS,MAAM,EAAE;IAC/C,SAAS,CAAC,iBAAiB,IAAI,SAAS,MAAM,EAAE;IAMhD,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAwB9B,UAAU,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI;IAgEpC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,GAAG,IAAI;IAiBnE,UAAU,CAAC,KAAK,GAAE,MAAsB,EAAE,MAAM,GAAE,MAAoC,EAAE,IAAI,GAAE,MAAiB,GAAG,IAAI;IAStH,UAAU,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IAC5B,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAM3B,KAAK,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,SAAS,KAAK,SAAS,GAAG,IAAI;IAQ7C,MAAM,CAAC,SAAS,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,GAAG,IAAI;IAI7E,MAAM,CAAC,GAAG,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI;IAI/B,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,OAAO,GAAG,IAAI;IAI7C,IAAI,CAAC,CAAC,GAAE,MAAU,GAAG,IAAI;IAIzB,IAAI,CAAC,CAAC,GAAE,MAAU,GAAG,IAAI;IAIzB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,MAAM,KAAK,GAAG,GAAG,IAAI;IAIpF,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,GAAG,UAAU,GAAG,YAAY,GAAG,UAAU,EAAE,KAAK,EAAE,GAAG,GAAG,IAAI;IAQxH,IAAI,EAAE,IAAI,SAAS,GAAG,IAAI,CAAqB;IAC/C,IAAI,MAAM,IAAI,aAAa,CAAyB;IACpD,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,CAAwB;IACjD,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI;IACtC,KAAK,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM;IACjC,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;IAC/B,IAAI,KAAK,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAsC;IACnE,IAAI,OAAO,IAAI,MAAM,EAAE,CAAoC;IAErD,IAAI,CAAC,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAUhD,OAAO,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IACzB,SAAS,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IAC3B,QAAQ,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IAC1B,UAAU,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IAC5B,SAAS,CAAC,CAAC,EAAE,OAAO,GAAG,IAAI;IAC3B,WAAW,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAE5B;;OAEG;IACH,YAAY,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAK7B;;OAEG;IACH,cAAc,CAAC,CAAC,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAS7B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAkIzB,OAAO,CAAC,aAAa;IAuBrB,OAAO,CAAC,aAAa;IAmCrB,OAAO,CAAC,YAAY;IAwCpB,OAAO,CAAC,gBAAgB;IA+CxB,OAAO,CAAC,iBAAiB;IAKzB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,GAAG,IAAI;IAMtC,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,IAAI;CAyGrE;AAED,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,kBAAkB,CAExF"}
|
|
@@ -30,6 +30,11 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
30
30
|
this._inlineUpload = null;
|
|
31
31
|
this._showStatus = true;
|
|
32
32
|
this._icon = '';
|
|
33
|
+
this._maxSheetSize = 100000; // ✅ Default 100k rows
|
|
34
|
+
this._sheetChunkSize = 10000; // ✅ Default 10k chunk
|
|
35
|
+
this._maxFileSize = 50; // ✅ Default 50MB
|
|
36
|
+
this._showReshapeWarning = true;
|
|
37
|
+
this._rawFileData = null; // ✅ Store for re-parsing
|
|
33
38
|
this._driver = new TabularDriver(options.dbName ?? 'jux-dataframes', options.storeName ?? 'frames');
|
|
34
39
|
this._showStatus = options.showStatus ?? true;
|
|
35
40
|
this._icon = options.icon ?? '';
|
|
@@ -41,6 +46,10 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
41
46
|
paginated: options.paginated ?? true,
|
|
42
47
|
rowsPerPage: options.rowsPerPage ?? 25
|
|
43
48
|
};
|
|
49
|
+
this._maxSheetSize = options.maxSheetSize ?? 100000;
|
|
50
|
+
this._sheetChunkSize = options.sheetChunkSize ?? 10000;
|
|
51
|
+
this._maxFileSize = options.maxFileSize ?? 50;
|
|
52
|
+
this._showReshapeWarning = options.showReshapeWarning ?? true;
|
|
44
53
|
}
|
|
45
54
|
getTriggerEvents() { return TRIGGER_EVENTS; }
|
|
46
55
|
getCallbackEvents() { return CALLBACK_EVENTS; }
|
|
@@ -83,29 +92,44 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
83
92
|
if (!files || files.length === 0)
|
|
84
93
|
return;
|
|
85
94
|
const file = files[0];
|
|
95
|
+
// ✅ Check file size
|
|
96
|
+
const fileSizeMB = file.size / (1024 * 1024);
|
|
97
|
+
if (fileSizeMB > this._maxFileSize) {
|
|
98
|
+
this._updateStatus(`❌ File too large (${fileSizeMB.toFixed(1)}MB). Max: ${this._maxFileSize}MB`, 'error');
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
86
101
|
this.state.loading = true;
|
|
87
102
|
this._updateStatus('⏳ Parsing ' + file.name + '...', 'loading');
|
|
88
103
|
try {
|
|
89
|
-
// ✅ Check if multi-sheet Excel
|
|
90
104
|
const isExcel = file.name.toLowerCase().endsWith('.xlsx') ||
|
|
91
105
|
file.name.toLowerCase().endsWith('.xls');
|
|
92
106
|
if (isExcel) {
|
|
93
|
-
|
|
107
|
+
// ✅ Pass chunking options for large files
|
|
108
|
+
const sheets = await this._driver.streamFileMultiSheet(file, {
|
|
109
|
+
maxSheetSize: this._maxSheetSize,
|
|
110
|
+
sheetChunkSize: this._sheetChunkSize,
|
|
111
|
+
onProgress: (loaded, total) => {
|
|
112
|
+
const pct = total ? Math.round((loaded / total) * 100) : 0;
|
|
113
|
+
this._updateStatus(`⏳ Parsing ${file.name}... ${pct}%`, 'loading');
|
|
114
|
+
}
|
|
115
|
+
});
|
|
94
116
|
const sheetNames = Object.keys(sheets);
|
|
95
|
-
// Store first sheet to IndexedDB
|
|
96
117
|
await this._driver.store(file.name, sheets[sheetNames[0]], { source: file.name });
|
|
97
118
|
if (sheetNames.length > 1) {
|
|
98
|
-
// ✅ Multi-sheet: render tabs
|
|
99
119
|
this._renderMultiSheet(sheets, file.name);
|
|
100
120
|
}
|
|
101
121
|
else {
|
|
102
|
-
// Single sheet: render normally
|
|
103
122
|
this._setDataFrame(sheets[sheetNames[0]], file.name);
|
|
104
123
|
}
|
|
105
124
|
}
|
|
106
125
|
else {
|
|
107
|
-
// CSV/TSV:
|
|
108
|
-
const
|
|
126
|
+
// ✅ CSV/TSV: Store raw text for reshaping
|
|
127
|
+
const text = await file.text();
|
|
128
|
+
this._rawFileData = { file, text };
|
|
129
|
+
const df = this._driver.parseCSV(text, {
|
|
130
|
+
autoDetectDelimiter: true,
|
|
131
|
+
hasHeader: true
|
|
132
|
+
});
|
|
109
133
|
await this._driver.store(file.name, df, { source: file.name });
|
|
110
134
|
this._setDataFrame(df, file.name);
|
|
111
135
|
}
|
|
@@ -208,6 +232,27 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
208
232
|
filterable(v) { this._tableOptions.filterable = v; return this; }
|
|
209
233
|
paginated(v) { this._tableOptions.paginated = v; return this; }
|
|
210
234
|
rowsPerPage(v) { this._tableOptions.rowsPerPage = v; return this; }
|
|
235
|
+
/**
|
|
236
|
+
* ✅ NEW: Set max rows per sheet (prevents memory issues with huge Excel files)
|
|
237
|
+
*/
|
|
238
|
+
maxSheetSize(v) {
|
|
239
|
+
this._maxSheetSize = v;
|
|
240
|
+
return this;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* ✅ NEW: Set chunk size for processing large sheets
|
|
244
|
+
*/
|
|
245
|
+
sheetChunkSize(v) {
|
|
246
|
+
this._sheetChunkSize = v;
|
|
247
|
+
return this;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* ✅ NEW: Set max file size in MB
|
|
251
|
+
*/
|
|
252
|
+
maxFileSize(mb) {
|
|
253
|
+
this._maxFileSize = mb;
|
|
254
|
+
return this;
|
|
255
|
+
}
|
|
211
256
|
/* ═══════════════════════════════════════════════════
|
|
212
257
|
* MULTI-SHEET RENDERING
|
|
213
258
|
* ═══════════════════════════════════════════════════ */
|
|
@@ -348,8 +393,17 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
348
393
|
}
|
|
349
394
|
this._updateTable();
|
|
350
395
|
this._updateStatus(`${sourceName} — ${this._df.height} rows × ${this._df.width} cols`, 'success');
|
|
351
|
-
if
|
|
352
|
-
|
|
396
|
+
// ✅ Add reshape warning button if CSV and enabled
|
|
397
|
+
if (this._showReshapeWarning && this._rawFileData?.text) {
|
|
398
|
+
const statusEl = document.getElementById(`${this._id}-status`);
|
|
399
|
+
if (statusEl) {
|
|
400
|
+
const settingsBtn = document.createElement('button');
|
|
401
|
+
settingsBtn.textContent = 'Settings';
|
|
402
|
+
settingsBtn.className = 'jux-button jux-button-sm jux-button-ghost';
|
|
403
|
+
settingsBtn.style.marginLeft = '0.5rem';
|
|
404
|
+
settingsBtn.addEventListener('click', () => this._showReshapeModal());
|
|
405
|
+
statusEl.appendChild(settingsBtn);
|
|
406
|
+
}
|
|
353
407
|
}
|
|
354
408
|
this._triggerCallback('load', this._df, null, this);
|
|
355
409
|
}
|
|
@@ -427,6 +481,10 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
427
481
|
this._table?.rows(filtered.toRows());
|
|
428
482
|
});
|
|
429
483
|
}
|
|
484
|
+
_showReshapeModal() {
|
|
485
|
+
// TODO: Implement reshape modal for re-parsing CSV with different options
|
|
486
|
+
console.warn('Reshape modal not yet implemented');
|
|
487
|
+
}
|
|
430
488
|
update(prop, value) { }
|
|
431
489
|
/* ═══════════════════════════════════════════════════
|
|
432
490
|
* RENDER
|
|
@@ -460,25 +518,27 @@ export class DataFrameComponent extends BaseComponent {
|
|
|
460
518
|
this.state.loading = true;
|
|
461
519
|
this._updateStatus('⏳ Parsing ' + file.name + '...', 'loading');
|
|
462
520
|
try {
|
|
463
|
-
// ✅ Check if multi-sheet Excel
|
|
464
521
|
const isExcel = file.name.toLowerCase().endsWith('.xlsx') ||
|
|
465
522
|
file.name.toLowerCase().endsWith('.xls');
|
|
466
523
|
if (isExcel) {
|
|
467
|
-
const sheets = await this._driver.streamFileMultiSheet(file
|
|
524
|
+
const sheets = await this._driver.streamFileMultiSheet(file, {
|
|
525
|
+
maxSheetSize: this._maxSheetSize,
|
|
526
|
+
sheetChunkSize: this._sheetChunkSize,
|
|
527
|
+
onProgress: (loaded, total) => {
|
|
528
|
+
const pct = total ? Math.round((loaded / total) * 100) : 0;
|
|
529
|
+
this._updateStatus(`⏳ Parsing ${file.name}... ${pct}%`, 'loading');
|
|
530
|
+
}
|
|
531
|
+
});
|
|
468
532
|
const sheetNames = Object.keys(sheets);
|
|
469
|
-
// Store first sheet to IndexedDB
|
|
470
533
|
await this._driver.store(file.name, sheets[sheetNames[0]], { source: file.name });
|
|
471
534
|
if (sheetNames.length > 1) {
|
|
472
|
-
// ✅ Multi-sheet: render tabs
|
|
473
535
|
this._renderMultiSheet(sheets, file.name);
|
|
474
536
|
}
|
|
475
537
|
else {
|
|
476
|
-
// Single sheet: render normally
|
|
477
538
|
this._setDataFrame(sheets[sheetNames[0]], file.name);
|
|
478
539
|
}
|
|
479
540
|
}
|
|
480
541
|
else {
|
|
481
|
-
// CSV/TSV: single sheet
|
|
482
542
|
const df = await this._driver.streamFile(file);
|
|
483
543
|
await this._driver.store(file.name, df, { source: file.name });
|
|
484
544
|
this._setDataFrame(df, file.name);
|
|
@@ -20,6 +20,10 @@ export interface DataFrameOptions {
|
|
|
20
20
|
rowsPerPage?: number;
|
|
21
21
|
showStatus?: boolean;
|
|
22
22
|
icon?: string;
|
|
23
|
+
maxSheetSize?: number; // ✅ NEW: Max rows per sheet (default: 100k)
|
|
24
|
+
sheetChunkSize?: number; // ✅ NEW: Chunk size for large sheets (default: 10k)
|
|
25
|
+
maxFileSize?: number; // ✅ NEW: Max file size in MB (default: 50MB)
|
|
26
|
+
showReshapeWarning?: boolean; // ✅ NEW: Show warning when data looks malformed
|
|
23
27
|
style?: string;
|
|
24
28
|
class?: string;
|
|
25
29
|
}
|
|
@@ -51,6 +55,11 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
51
55
|
private _inlineUpload: { label: string; accept: string; icon: string } | null = null;
|
|
52
56
|
private _showStatus: boolean = true;
|
|
53
57
|
private _icon: string = '';
|
|
58
|
+
private _maxSheetSize: number = 100000; // ✅ Default 100k rows
|
|
59
|
+
private _sheetChunkSize: number = 10000; // ✅ Default 10k chunk
|
|
60
|
+
private _maxFileSize: number = 50; // ✅ Default 50MB
|
|
61
|
+
private _showReshapeWarning: boolean = true;
|
|
62
|
+
private _rawFileData: { file: File; text?: string } | null = null; // ✅ Store for re-parsing
|
|
54
63
|
|
|
55
64
|
constructor(id: string, options: DataFrameOptions = {}) {
|
|
56
65
|
super(id, {
|
|
@@ -81,6 +90,10 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
81
90
|
paginated: options.paginated ?? true,
|
|
82
91
|
rowsPerPage: options.rowsPerPage ?? 25
|
|
83
92
|
};
|
|
93
|
+
this._maxSheetSize = options.maxSheetSize ?? 100000;
|
|
94
|
+
this._sheetChunkSize = options.sheetChunkSize ?? 10000;
|
|
95
|
+
this._maxFileSize = options.maxFileSize ?? 50;
|
|
96
|
+
this._showReshapeWarning = options.showReshapeWarning ?? true;
|
|
84
97
|
}
|
|
85
98
|
|
|
86
99
|
protected getTriggerEvents(): readonly string[] { return TRIGGER_EVENTS; }
|
|
@@ -120,31 +133,51 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
120
133
|
upload.bind('change', async (files: File[]) => {
|
|
121
134
|
if (!files || files.length === 0) return;
|
|
122
135
|
const file = files[0];
|
|
136
|
+
|
|
137
|
+
// ✅ Check file size
|
|
138
|
+
const fileSizeMB = file.size / (1024 * 1024);
|
|
139
|
+
if (fileSizeMB > this._maxFileSize) {
|
|
140
|
+
this._updateStatus(`❌ File too large (${fileSizeMB.toFixed(1)}MB). Max: ${this._maxFileSize}MB`, 'error');
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
123
144
|
this.state.loading = true;
|
|
124
145
|
this._updateStatus('⏳ Parsing ' + file.name + '...', 'loading');
|
|
125
146
|
|
|
126
147
|
try {
|
|
127
|
-
// ✅ Check if multi-sheet Excel
|
|
128
148
|
const isExcel = file.name.toLowerCase().endsWith('.xlsx') ||
|
|
129
149
|
file.name.toLowerCase().endsWith('.xls');
|
|
130
150
|
|
|
131
151
|
if (isExcel) {
|
|
132
|
-
|
|
152
|
+
// ✅ Pass chunking options for large files
|
|
153
|
+
const sheets = await this._driver.streamFileMultiSheet(file, {
|
|
154
|
+
maxSheetSize: this._maxSheetSize,
|
|
155
|
+
sheetChunkSize: this._sheetChunkSize,
|
|
156
|
+
onProgress: (loaded, total) => {
|
|
157
|
+
const pct = total ? Math.round((loaded / total) * 100) : 0;
|
|
158
|
+
this._updateStatus(`⏳ Parsing ${file.name}... ${pct}%`, 'loading');
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
133
162
|
const sheetNames = Object.keys(sheets);
|
|
134
163
|
|
|
135
|
-
// Store first sheet to IndexedDB
|
|
136
164
|
await this._driver.store(file.name, sheets[sheetNames[0]], { source: file.name });
|
|
137
165
|
|
|
138
166
|
if (sheetNames.length > 1) {
|
|
139
|
-
// ✅ Multi-sheet: render tabs
|
|
140
167
|
this._renderMultiSheet(sheets, file.name);
|
|
141
168
|
} else {
|
|
142
|
-
// Single sheet: render normally
|
|
143
169
|
this._setDataFrame(sheets[sheetNames[0]], file.name);
|
|
144
170
|
}
|
|
145
171
|
} else {
|
|
146
|
-
// CSV/TSV:
|
|
147
|
-
const
|
|
172
|
+
// ✅ CSV/TSV: Store raw text for reshaping
|
|
173
|
+
const text = await file.text();
|
|
174
|
+
this._rawFileData = { file, text };
|
|
175
|
+
|
|
176
|
+
const df = this._driver.parseCSV(text, {
|
|
177
|
+
autoDetectDelimiter: true,
|
|
178
|
+
hasHeader: true
|
|
179
|
+
});
|
|
180
|
+
|
|
148
181
|
await this._driver.store(file.name, df, { source: file.name });
|
|
149
182
|
this._setDataFrame(df, file.name);
|
|
150
183
|
}
|
|
@@ -257,6 +290,30 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
257
290
|
paginated(v: boolean): this { this._tableOptions.paginated = v; return this; }
|
|
258
291
|
rowsPerPage(v: number): this { this._tableOptions.rowsPerPage = v; return this; }
|
|
259
292
|
|
|
293
|
+
/**
|
|
294
|
+
* ✅ NEW: Set max rows per sheet (prevents memory issues with huge Excel files)
|
|
295
|
+
*/
|
|
296
|
+
maxSheetSize(v: number): this {
|
|
297
|
+
this._maxSheetSize = v;
|
|
298
|
+
return this;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* ✅ NEW: Set chunk size for processing large sheets
|
|
303
|
+
*/
|
|
304
|
+
sheetChunkSize(v: number): this {
|
|
305
|
+
this._sheetChunkSize = v;
|
|
306
|
+
return this;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* ✅ NEW: Set max file size in MB
|
|
311
|
+
*/
|
|
312
|
+
maxFileSize(mb: number): this {
|
|
313
|
+
this._maxFileSize = mb;
|
|
314
|
+
return this;
|
|
315
|
+
}
|
|
316
|
+
|
|
260
317
|
/* ═══════════════════════════════════════════════════
|
|
261
318
|
* MULTI-SHEET RENDERING
|
|
262
319
|
* ═══════════════════════════════════════════════════ */
|
|
@@ -436,8 +493,17 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
436
493
|
'success'
|
|
437
494
|
);
|
|
438
495
|
|
|
439
|
-
if
|
|
440
|
-
|
|
496
|
+
// ✅ Add reshape warning button if CSV and enabled
|
|
497
|
+
if (this._showReshapeWarning && this._rawFileData?.text) {
|
|
498
|
+
const statusEl = document.getElementById(`${this._id}-status`);
|
|
499
|
+
if (statusEl) {
|
|
500
|
+
const settingsBtn = document.createElement('button');
|
|
501
|
+
settingsBtn.textContent = 'Settings';
|
|
502
|
+
settingsBtn.className = 'jux-button jux-button-sm jux-button-ghost';
|
|
503
|
+
settingsBtn.style.marginLeft = '0.5rem';
|
|
504
|
+
settingsBtn.addEventListener('click', () => this._showReshapeModal());
|
|
505
|
+
statusEl.appendChild(settingsBtn);
|
|
506
|
+
}
|
|
441
507
|
}
|
|
442
508
|
|
|
443
509
|
this._triggerCallback('load', this._df, null, this);
|
|
@@ -530,6 +596,11 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
530
596
|
});
|
|
531
597
|
}
|
|
532
598
|
|
|
599
|
+
private _showReshapeModal(): void {
|
|
600
|
+
// TODO: Implement reshape modal for re-parsing CSV with different options
|
|
601
|
+
console.warn('Reshape modal not yet implemented');
|
|
602
|
+
}
|
|
603
|
+
|
|
533
604
|
update(prop: string, value: any): void { }
|
|
534
605
|
|
|
535
606
|
/* ═══════════════════════════════════════════════════
|
|
@@ -567,26 +638,29 @@ export class DataFrameComponent extends BaseComponent<DataFrameState> {
|
|
|
567
638
|
this._updateStatus('⏳ Parsing ' + file.name + '...', 'loading');
|
|
568
639
|
|
|
569
640
|
try {
|
|
570
|
-
// ✅ Check if multi-sheet Excel
|
|
571
641
|
const isExcel = file.name.toLowerCase().endsWith('.xlsx') ||
|
|
572
642
|
file.name.toLowerCase().endsWith('.xls');
|
|
573
643
|
|
|
574
644
|
if (isExcel) {
|
|
575
|
-
const sheets = await this._driver.streamFileMultiSheet(file
|
|
645
|
+
const sheets = await this._driver.streamFileMultiSheet(file, {
|
|
646
|
+
maxSheetSize: this._maxSheetSize,
|
|
647
|
+
sheetChunkSize: this._sheetChunkSize,
|
|
648
|
+
onProgress: (loaded, total) => {
|
|
649
|
+
const pct = total ? Math.round((loaded / total) * 100) : 0;
|
|
650
|
+
this._updateStatus(`⏳ Parsing ${file.name}... ${pct}%`, 'loading');
|
|
651
|
+
}
|
|
652
|
+
});
|
|
653
|
+
|
|
576
654
|
const sheetNames = Object.keys(sheets);
|
|
577
655
|
|
|
578
|
-
// Store first sheet to IndexedDB
|
|
579
656
|
await this._driver.store(file.name, sheets[sheetNames[0]], { source: file.name });
|
|
580
657
|
|
|
581
658
|
if (sheetNames.length > 1) {
|
|
582
|
-
// ✅ Multi-sheet: render tabs
|
|
583
659
|
this._renderMultiSheet(sheets, file.name);
|
|
584
660
|
} else {
|
|
585
|
-
// Single sheet: render normally
|
|
586
661
|
this._setDataFrame(sheets[sheetNames[0]], file.name);
|
|
587
662
|
}
|
|
588
663
|
} else {
|
|
589
|
-
// CSV/TSV: single sheet
|
|
590
664
|
const df = await this._driver.streamFile(file);
|
|
591
665
|
await this._driver.store(file.name, df, { source: file.name });
|
|
592
666
|
this._setDataFrame(df, file.name);
|
|
@@ -18,6 +18,10 @@ export interface ParseOptions {
|
|
|
18
18
|
skipRows?: number;
|
|
19
19
|
columns?: string[];
|
|
20
20
|
sheet?: string | number;
|
|
21
|
+
maxSheetSize?: number;
|
|
22
|
+
sheetChunkSize?: number;
|
|
23
|
+
headerRow?: number;
|
|
24
|
+
autoDetectDelimiter?: boolean;
|
|
21
25
|
}
|
|
22
26
|
export declare class TabularDriver {
|
|
23
27
|
private _dbName;
|
|
@@ -25,6 +29,16 @@ export declare class TabularDriver {
|
|
|
25
29
|
private _db;
|
|
26
30
|
constructor(dbName?: string, storeName?: string);
|
|
27
31
|
open(): Promise<IDBDatabase>;
|
|
32
|
+
/**
|
|
33
|
+
* ✅ NEW: Auto-detect delimiter from first N lines of CSV
|
|
34
|
+
* Checks for: , | \t ;
|
|
35
|
+
*/
|
|
36
|
+
private _detectDelimiter;
|
|
37
|
+
/**
|
|
38
|
+
* ✅ NEW: Detect which row contains the header
|
|
39
|
+
* Looks for first row with mostly string values
|
|
40
|
+
*/
|
|
41
|
+
private _detectHeaderRow;
|
|
28
42
|
/**
|
|
29
43
|
* Parse a CSV/TSV string into a DataFrame
|
|
30
44
|
*/
|
|
@@ -74,11 +88,10 @@ export declare class TabularDriver {
|
|
|
74
88
|
*/
|
|
75
89
|
fetch(url: string, options?: ParseOptions): Promise<DataFrame>;
|
|
76
90
|
/**
|
|
77
|
-
* ✅
|
|
78
|
-
*
|
|
79
|
-
* @returns Record<sheetName, DataFrame>
|
|
91
|
+
* ✅ OPTIMIZED: Stream Excel file with chunked row processing
|
|
92
|
+
* Handles large files by processing rows in batches
|
|
80
93
|
*/
|
|
81
|
-
streamFileMultiSheet(file: File): Promise<Record<string, DataFrame>>;
|
|
94
|
+
streamFileMultiSheet(file: File, options?: ParseOptions): Promise<Record<string, DataFrame>>;
|
|
82
95
|
private _splitLines;
|
|
83
96
|
private _parseLine;
|
|
84
97
|
private _autoType;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TabularDriver.d.ts","sourceRoot":"","sources":["TabularDriver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAE3C,MAAM,WAAW,WAAW;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,KAAK,IAAI,CAAC;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"TabularDriver.d.ts","sourceRoot":"","sources":["TabularDriver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAE3C,MAAM,WAAW,WAAW;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,KAAK,IAAI,CAAC;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mBAAmB,CAAC,EAAE,OAAO,CAAC;CACjC;AAED,qBAAa,aAAa;IACtB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,GAAG,CAA4B;gBAE3B,MAAM,GAAE,MAAsB,EAAE,SAAS,GAAE,MAAiB;IAKlE,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC;IA4BlC;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAgCxB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAwBxB;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,SAAS;IA6D7D;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,SAAS,CAAC;IAoG5E;;OAEG;YACW,UAAU;IAuExB;;OAEG;IACG,aAAa,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAiBlD;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAuBzF;;OAEG;IACG,IAAI,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC;IAwBjD;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC;IA4BzD;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAqBlH;;OAEG;IACG,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAYjC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB5B;;OAEG;IACG,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,SAAS,CAAC;IA0ExE;;;OAGG;IACG,oBAAoB,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAyGtG,OAAO,CAAC,WAAW;IAInB,OAAO,CAAC,UAAU;IAmClB,OAAO,CAAC,SAAS;IAYjB,KAAK,IAAI,IAAI;CAMhB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAEhF"}
|
|
@@ -28,17 +28,85 @@ export class TabularDriver {
|
|
|
28
28
|
/* ═══════════════════════════════════════════════════
|
|
29
29
|
* CSV / TSV PARSING
|
|
30
30
|
* ═══════════════════════════════════════════════════ */
|
|
31
|
+
/**
|
|
32
|
+
* ✅ NEW: Auto-detect delimiter from first N lines of CSV
|
|
33
|
+
* Checks for: , | \t ;
|
|
34
|
+
*/
|
|
35
|
+
_detectDelimiter(text, sampleLines = 5) {
|
|
36
|
+
const lines = this._splitLines(text).slice(0, sampleLines).filter(l => l.trim());
|
|
37
|
+
if (lines.length === 0)
|
|
38
|
+
return ',';
|
|
39
|
+
const delimiters = [',', '|', '\t', ';'];
|
|
40
|
+
const scores = {};
|
|
41
|
+
delimiters.forEach(delim => {
|
|
42
|
+
const counts = lines.map(line => {
|
|
43
|
+
// Count occurrences of delimiter NOT inside quotes
|
|
44
|
+
let count = 0;
|
|
45
|
+
let inQuotes = false;
|
|
46
|
+
for (let i = 0; i < line.length; i++) {
|
|
47
|
+
if (line[i] === '"')
|
|
48
|
+
inQuotes = !inQuotes;
|
|
49
|
+
if (!inQuotes && line[i] === delim)
|
|
50
|
+
count++;
|
|
51
|
+
}
|
|
52
|
+
return count;
|
|
53
|
+
});
|
|
54
|
+
// Delimiter should have consistent count across lines
|
|
55
|
+
const avg = counts.reduce((sum, c) => sum + c, 0) / counts.length;
|
|
56
|
+
const variance = counts.reduce((sum, c) => sum + Math.pow(c - avg, 2), 0) / counts.length;
|
|
57
|
+
// Score: high count, low variance
|
|
58
|
+
scores[delim] = avg > 0 ? avg / (1 + variance) : 0;
|
|
59
|
+
});
|
|
60
|
+
// Return delimiter with highest score
|
|
61
|
+
const best = Object.entries(scores).sort((a, b) => b[1] - a[1])[0];
|
|
62
|
+
return best[0];
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* ✅ NEW: Detect which row contains the header
|
|
66
|
+
* Looks for first row with mostly string values
|
|
67
|
+
*/
|
|
68
|
+
_detectHeaderRow(text, delimiter, maxCheck = 10) {
|
|
69
|
+
const lines = this._splitLines(text).slice(0, maxCheck).filter(l => l.trim());
|
|
70
|
+
for (let i = 0; i < lines.length; i++) {
|
|
71
|
+
const values = this._parseLine(lines[i], delimiter);
|
|
72
|
+
// Skip if mostly empty
|
|
73
|
+
const nonEmpty = values.filter(v => v.trim()).length;
|
|
74
|
+
if (nonEmpty < values.length * 0.5)
|
|
75
|
+
continue;
|
|
76
|
+
// Check if mostly non-numeric (likely headers)
|
|
77
|
+
const nonNumeric = values.filter(v => {
|
|
78
|
+
const trimmed = v.trim();
|
|
79
|
+
return isNaN(Number(trimmed)) && trimmed !== '';
|
|
80
|
+
}).length;
|
|
81
|
+
if (nonNumeric >= values.length * 0.7) {
|
|
82
|
+
return i;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return 0; // Fallback to first row
|
|
86
|
+
}
|
|
31
87
|
/**
|
|
32
88
|
* Parse a CSV/TSV string into a DataFrame
|
|
33
89
|
*/
|
|
34
90
|
parseCSV(text, options = {}) {
|
|
35
|
-
const { delimiter
|
|
91
|
+
const { delimiter: userDelimiter, hasHeader = true, maxRows, skipRows: userSkipRows = 0, columns: selectCols, headerRow: userHeaderRow, autoDetectDelimiter = true } = options;
|
|
92
|
+
// ✅ Auto-detect delimiter if not provided
|
|
93
|
+
const delimiter = userDelimiter || (autoDetectDelimiter ? this._detectDelimiter(text) : ',');
|
|
94
|
+
// ✅ Auto-detect header row if not provided
|
|
95
|
+
const headerRow = userHeaderRow !== undefined ? userHeaderRow : (hasHeader ? this._detectHeaderRow(text, delimiter) : -1);
|
|
36
96
|
const lines = this._splitLines(text);
|
|
37
|
-
let startIdx = skipRows;
|
|
38
97
|
let headers;
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
98
|
+
// Skip rows before header
|
|
99
|
+
let startIdx = userSkipRows;
|
|
100
|
+
if (hasHeader && headerRow >= 0) {
|
|
101
|
+
const headerLine = lines[headerRow + userSkipRows];
|
|
102
|
+
if (headerLine) {
|
|
103
|
+
headers = this._parseLine(headerLine, delimiter);
|
|
104
|
+
startIdx = headerRow + userSkipRows + 1;
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
const firstLine = this._parseLine(lines[startIdx] || '', delimiter);
|
|
108
|
+
headers = firstLine.map((_, i) => `col_${i}`);
|
|
109
|
+
}
|
|
42
110
|
}
|
|
43
111
|
else {
|
|
44
112
|
const firstLine = this._parseLine(lines[startIdx] || '', delimiter);
|
|
@@ -419,12 +487,11 @@ export class TabularDriver {
|
|
|
419
487
|
return df;
|
|
420
488
|
}
|
|
421
489
|
/**
|
|
422
|
-
* ✅
|
|
423
|
-
*
|
|
424
|
-
* @returns Record<sheetName, DataFrame>
|
|
490
|
+
* ✅ OPTIMIZED: Stream Excel file with chunked row processing
|
|
491
|
+
* Handles large files by processing rows in batches
|
|
425
492
|
*/
|
|
426
|
-
async streamFileMultiSheet(file) {
|
|
427
|
-
|
|
493
|
+
async streamFileMultiSheet(file, options = {}) {
|
|
494
|
+
const { maxSheetSize = 100000, sheetChunkSize = 10000, onProgress } = options;
|
|
428
495
|
let XLSX;
|
|
429
496
|
try {
|
|
430
497
|
XLSX = await import('xlsx');
|
|
@@ -432,16 +499,79 @@ export class TabularDriver {
|
|
|
432
499
|
catch {
|
|
433
500
|
throw new Error('XLSX support requires the "xlsx" package. Install it with: npm install xlsx');
|
|
434
501
|
}
|
|
502
|
+
if (onProgress)
|
|
503
|
+
onProgress(0, file.size);
|
|
435
504
|
const buffer = await file.arrayBuffer();
|
|
436
|
-
|
|
505
|
+
if (onProgress)
|
|
506
|
+
onProgress(file.size * 0.3, file.size);
|
|
507
|
+
const workbook = XLSX.read(buffer, {
|
|
508
|
+
type: 'array',
|
|
509
|
+
sheetRows: maxSheetSize, // ✅ Limit rows read per sheet
|
|
510
|
+
dense: false // ✅ Use sparse format for memory efficiency
|
|
511
|
+
});
|
|
437
512
|
const sheets = {};
|
|
438
|
-
workbook.SheetNames.
|
|
513
|
+
const totalSheets = workbook.SheetNames.length;
|
|
514
|
+
let processedSheets = 0;
|
|
515
|
+
for (const sheetName of workbook.SheetNames) {
|
|
439
516
|
const worksheet = workbook.Sheets[sheetName];
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
517
|
+
// ✅ Get sheet range to determine size
|
|
518
|
+
const range = XLSX.utils.decode_range(worksheet['!ref'] || 'A1');
|
|
519
|
+
const totalRows = range.e.r - range.s.r + 1;
|
|
520
|
+
if (totalRows === 0) {
|
|
521
|
+
processedSheets++;
|
|
522
|
+
continue;
|
|
443
523
|
}
|
|
444
|
-
|
|
524
|
+
// ✅ Process in chunks if sheet is large
|
|
525
|
+
if (totalRows > sheetChunkSize) {
|
|
526
|
+
const rows = [];
|
|
527
|
+
let headers = [];
|
|
528
|
+
for (let startRow = 0; startRow < totalRows; startRow += sheetChunkSize) {
|
|
529
|
+
const endRow = Math.min(startRow + sheetChunkSize, totalRows);
|
|
530
|
+
// ✅ Read chunk with limited row range
|
|
531
|
+
const chunkData = XLSX.utils.sheet_to_json(worksheet, {
|
|
532
|
+
range: startRow,
|
|
533
|
+
header: startRow === 0 ? undefined : headers,
|
|
534
|
+
defval: null,
|
|
535
|
+
raw: false, // ✅ Convert dates/numbers to strings to reduce memory
|
|
536
|
+
blankrows: false
|
|
537
|
+
});
|
|
538
|
+
if (startRow === 0 && chunkData.length > 0) {
|
|
539
|
+
headers = Object.keys(chunkData[0]);
|
|
540
|
+
}
|
|
541
|
+
rows.push(...chunkData);
|
|
542
|
+
if (onProgress) {
|
|
543
|
+
const progress = 0.3 + (0.6 * (processedSheets + (startRow / totalRows)) / totalSheets);
|
|
544
|
+
onProgress(file.size * progress, file.size);
|
|
545
|
+
}
|
|
546
|
+
// ✅ Stop if we hit max rows
|
|
547
|
+
if (rows.length >= maxSheetSize) {
|
|
548
|
+
console.warn(`⚠️ Sheet "${sheetName}" truncated to ${maxSheetSize} rows`);
|
|
549
|
+
break;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
if (rows.length > 0) {
|
|
553
|
+
sheets[sheetName] = new DataFrame(rows);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
else {
|
|
557
|
+
// ✅ Small sheet: process normally
|
|
558
|
+
const jsonData = XLSX.utils.sheet_to_json(worksheet, {
|
|
559
|
+
defval: null,
|
|
560
|
+
raw: false,
|
|
561
|
+
blankrows: false
|
|
562
|
+
});
|
|
563
|
+
if (jsonData.length > 0) {
|
|
564
|
+
sheets[sheetName] = new DataFrame(jsonData);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
processedSheets++;
|
|
568
|
+
if (onProgress) {
|
|
569
|
+
const progress = 0.3 + (0.6 * (processedSheets / totalSheets));
|
|
570
|
+
onProgress(file.size * progress, file.size);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
if (onProgress)
|
|
574
|
+
onProgress(file.size, file.size);
|
|
445
575
|
return sheets;
|
|
446
576
|
}
|
|
447
577
|
/* ═══════════════════════════════════════════════════
|
|
@@ -19,7 +19,11 @@ export interface ParseOptions {
|
|
|
19
19
|
maxRows?: number;
|
|
20
20
|
skipRows?: number;
|
|
21
21
|
columns?: string[];
|
|
22
|
-
sheet?: string | number;
|
|
22
|
+
sheet?: string | number;
|
|
23
|
+
maxSheetSize?: number; // ✅ NEW: Max rows per sheet to prevent memory issues
|
|
24
|
+
sheetChunkSize?: number; // ✅ NEW: Process Excel in chunks
|
|
25
|
+
headerRow?: number; // ✅ NEW: Which row contains headers (default: 0)
|
|
26
|
+
autoDetectDelimiter?: boolean; // ✅ NEW: Auto-detect delimiter from first lines
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
export class TabularDriver {
|
|
@@ -60,25 +64,105 @@ export class TabularDriver {
|
|
|
60
64
|
* CSV / TSV PARSING
|
|
61
65
|
* ═══════════════════════════════════════════════════ */
|
|
62
66
|
|
|
67
|
+
/**
|
|
68
|
+
* ✅ NEW: Auto-detect delimiter from first N lines of CSV
|
|
69
|
+
* Checks for: , | \t ;
|
|
70
|
+
*/
|
|
71
|
+
private _detectDelimiter(text: string, sampleLines: number = 5): string {
|
|
72
|
+
const lines = this._splitLines(text).slice(0, sampleLines).filter(l => l.trim());
|
|
73
|
+
if (lines.length === 0) return ',';
|
|
74
|
+
|
|
75
|
+
const delimiters = [',', '|', '\t', ';'];
|
|
76
|
+
const scores: Record<string, number> = {};
|
|
77
|
+
|
|
78
|
+
delimiters.forEach(delim => {
|
|
79
|
+
const counts = lines.map(line => {
|
|
80
|
+
// Count occurrences of delimiter NOT inside quotes
|
|
81
|
+
let count = 0;
|
|
82
|
+
let inQuotes = false;
|
|
83
|
+
for (let i = 0; i < line.length; i++) {
|
|
84
|
+
if (line[i] === '"') inQuotes = !inQuotes;
|
|
85
|
+
if (!inQuotes && line[i] === delim) count++;
|
|
86
|
+
}
|
|
87
|
+
return count;
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Delimiter should have consistent count across lines
|
|
91
|
+
const avg = counts.reduce((sum, c) => sum + c, 0) / counts.length;
|
|
92
|
+
const variance = counts.reduce((sum, c) => sum + Math.pow(c - avg, 2), 0) / counts.length;
|
|
93
|
+
|
|
94
|
+
// Score: high count, low variance
|
|
95
|
+
scores[delim] = avg > 0 ? avg / (1 + variance) : 0;
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Return delimiter with highest score
|
|
99
|
+
const best = Object.entries(scores).sort((a, b) => b[1] - a[1])[0];
|
|
100
|
+
return best[0];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* ✅ NEW: Detect which row contains the header
|
|
105
|
+
* Looks for first row with mostly string values
|
|
106
|
+
*/
|
|
107
|
+
private _detectHeaderRow(text: string, delimiter: string, maxCheck: number = 10): number {
|
|
108
|
+
const lines = this._splitLines(text).slice(0, maxCheck).filter(l => l.trim());
|
|
109
|
+
|
|
110
|
+
for (let i = 0; i < lines.length; i++) {
|
|
111
|
+
const values = this._parseLine(lines[i], delimiter);
|
|
112
|
+
|
|
113
|
+
// Skip if mostly empty
|
|
114
|
+
const nonEmpty = values.filter(v => v.trim()).length;
|
|
115
|
+
if (nonEmpty < values.length * 0.5) continue;
|
|
116
|
+
|
|
117
|
+
// Check if mostly non-numeric (likely headers)
|
|
118
|
+
const nonNumeric = values.filter(v => {
|
|
119
|
+
const trimmed = v.trim();
|
|
120
|
+
return isNaN(Number(trimmed)) && trimmed !== '';
|
|
121
|
+
}).length;
|
|
122
|
+
|
|
123
|
+
if (nonNumeric >= values.length * 0.7) {
|
|
124
|
+
return i;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return 0; // Fallback to first row
|
|
129
|
+
}
|
|
130
|
+
|
|
63
131
|
/**
|
|
64
132
|
* Parse a CSV/TSV string into a DataFrame
|
|
65
133
|
*/
|
|
66
134
|
parseCSV(text: string, options: ParseOptions = {}): DataFrame {
|
|
67
135
|
const {
|
|
68
|
-
delimiter
|
|
136
|
+
delimiter: userDelimiter,
|
|
69
137
|
hasHeader = true,
|
|
70
138
|
maxRows,
|
|
71
|
-
skipRows = 0,
|
|
72
|
-
columns: selectCols
|
|
139
|
+
skipRows: userSkipRows = 0,
|
|
140
|
+
columns: selectCols,
|
|
141
|
+
headerRow: userHeaderRow,
|
|
142
|
+
autoDetectDelimiter = true
|
|
73
143
|
} = options;
|
|
74
144
|
|
|
145
|
+
// ✅ Auto-detect delimiter if not provided
|
|
146
|
+
const delimiter = userDelimiter || (autoDetectDelimiter ? this._detectDelimiter(text) : ',');
|
|
147
|
+
|
|
148
|
+
// ✅ Auto-detect header row if not provided
|
|
149
|
+
const headerRow = userHeaderRow !== undefined ? userHeaderRow : (hasHeader ? this._detectHeaderRow(text, delimiter) : -1);
|
|
150
|
+
|
|
75
151
|
const lines = this._splitLines(text);
|
|
76
|
-
let startIdx = skipRows;
|
|
77
152
|
let headers: string[];
|
|
78
153
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
154
|
+
// Skip rows before header
|
|
155
|
+
let startIdx = userSkipRows;
|
|
156
|
+
|
|
157
|
+
if (hasHeader && headerRow >= 0) {
|
|
158
|
+
const headerLine = lines[headerRow + userSkipRows];
|
|
159
|
+
if (headerLine) {
|
|
160
|
+
headers = this._parseLine(headerLine, delimiter);
|
|
161
|
+
startIdx = headerRow + userSkipRows + 1;
|
|
162
|
+
} else {
|
|
163
|
+
const firstLine = this._parseLine(lines[startIdx] || '', delimiter);
|
|
164
|
+
headers = firstLine.map((_, i) => `col_${i}`);
|
|
165
|
+
}
|
|
82
166
|
} else {
|
|
83
167
|
const firstLine = this._parseLine(lines[startIdx] || '', delimiter);
|
|
84
168
|
headers = firstLine.map((_, i) => `col_${i}`);
|
|
@@ -523,12 +607,12 @@ export class TabularDriver {
|
|
|
523
607
|
}
|
|
524
608
|
|
|
525
609
|
/**
|
|
526
|
-
* ✅
|
|
527
|
-
*
|
|
528
|
-
* @returns Record<sheetName, DataFrame>
|
|
610
|
+
* ✅ OPTIMIZED: Stream Excel file with chunked row processing
|
|
611
|
+
* Handles large files by processing rows in batches
|
|
529
612
|
*/
|
|
530
|
-
async streamFileMultiSheet(file: File): Promise<Record<string, DataFrame>> {
|
|
531
|
-
|
|
613
|
+
async streamFileMultiSheet(file: File, options: ParseOptions = {}): Promise<Record<string, DataFrame>> {
|
|
614
|
+
const { maxSheetSize = 100000, sheetChunkSize = 10000, onProgress } = options;
|
|
615
|
+
|
|
532
616
|
let XLSX: any;
|
|
533
617
|
try {
|
|
534
618
|
XLSX = await import('xlsx');
|
|
@@ -536,19 +620,93 @@ export class TabularDriver {
|
|
|
536
620
|
throw new Error('XLSX support requires the "xlsx" package. Install it with: npm install xlsx');
|
|
537
621
|
}
|
|
538
622
|
|
|
623
|
+
if (onProgress) onProgress(0, file.size);
|
|
624
|
+
|
|
539
625
|
const buffer = await file.arrayBuffer();
|
|
540
|
-
|
|
626
|
+
|
|
627
|
+
if (onProgress) onProgress(file.size * 0.3, file.size);
|
|
628
|
+
|
|
629
|
+
const workbook = XLSX.read(buffer, {
|
|
630
|
+
type: 'array',
|
|
631
|
+
sheetRows: maxSheetSize, // ✅ Limit rows read per sheet
|
|
632
|
+
dense: false // ✅ Use sparse format for memory efficiency
|
|
633
|
+
});
|
|
541
634
|
|
|
542
635
|
const sheets: Record<string, DataFrame> = {};
|
|
636
|
+
const totalSheets = workbook.SheetNames.length;
|
|
637
|
+
let processedSheets = 0;
|
|
543
638
|
|
|
544
|
-
workbook.SheetNames
|
|
639
|
+
for (const sheetName of workbook.SheetNames) {
|
|
545
640
|
const worksheet = workbook.Sheets[sheetName];
|
|
546
|
-
const jsonData: Record<string, any>[] = XLSX.utils.sheet_to_json(worksheet, { defval: null });
|
|
547
641
|
|
|
548
|
-
|
|
549
|
-
|
|
642
|
+
// ✅ Get sheet range to determine size
|
|
643
|
+
const range = XLSX.utils.decode_range(worksheet['!ref'] || 'A1');
|
|
644
|
+
const totalRows = range.e.r - range.s.r + 1;
|
|
645
|
+
|
|
646
|
+
if (totalRows === 0) {
|
|
647
|
+
processedSheets++;
|
|
648
|
+
continue;
|
|
550
649
|
}
|
|
551
|
-
|
|
650
|
+
|
|
651
|
+
// ✅ Process in chunks if sheet is large
|
|
652
|
+
if (totalRows > sheetChunkSize) {
|
|
653
|
+
const rows: Record<string, any>[] = [];
|
|
654
|
+
let headers: string[] = [];
|
|
655
|
+
|
|
656
|
+
for (let startRow = 0; startRow < totalRows; startRow += sheetChunkSize) {
|
|
657
|
+
const endRow = Math.min(startRow + sheetChunkSize, totalRows);
|
|
658
|
+
|
|
659
|
+
// ✅ Read chunk with limited row range
|
|
660
|
+
const chunkData: Record<string, any>[] = XLSX.utils.sheet_to_json(worksheet, {
|
|
661
|
+
range: startRow,
|
|
662
|
+
header: startRow === 0 ? undefined : headers,
|
|
663
|
+
defval: null,
|
|
664
|
+
raw: false, // ✅ Convert dates/numbers to strings to reduce memory
|
|
665
|
+
blankrows: false
|
|
666
|
+
});
|
|
667
|
+
|
|
668
|
+
if (startRow === 0 && chunkData.length > 0) {
|
|
669
|
+
headers = Object.keys(chunkData[0]);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
rows.push(...chunkData);
|
|
673
|
+
|
|
674
|
+
if (onProgress) {
|
|
675
|
+
const progress = 0.3 + (0.6 * (processedSheets + (startRow / totalRows)) / totalSheets);
|
|
676
|
+
onProgress(file.size * progress, file.size);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// ✅ Stop if we hit max rows
|
|
680
|
+
if (rows.length >= maxSheetSize) {
|
|
681
|
+
console.warn(`⚠️ Sheet "${sheetName}" truncated to ${maxSheetSize} rows`);
|
|
682
|
+
break;
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
if (rows.length > 0) {
|
|
687
|
+
sheets[sheetName] = new DataFrame(rows);
|
|
688
|
+
}
|
|
689
|
+
} else {
|
|
690
|
+
// ✅ Small sheet: process normally
|
|
691
|
+
const jsonData: Record<string, any>[] = XLSX.utils.sheet_to_json(worksheet, {
|
|
692
|
+
defval: null,
|
|
693
|
+
raw: false,
|
|
694
|
+
blankrows: false
|
|
695
|
+
});
|
|
696
|
+
|
|
697
|
+
if (jsonData.length > 0) {
|
|
698
|
+
sheets[sheetName] = new DataFrame(jsonData);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
processedSheets++;
|
|
703
|
+
if (onProgress) {
|
|
704
|
+
const progress = 0.3 + (0.6 * (processedSheets / totalSheets));
|
|
705
|
+
onProgress(file.size * progress, file.size);
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
if (onProgress) onProgress(file.size, file.size);
|
|
552
710
|
|
|
553
711
|
return sheets;
|
|
554
712
|
}
|