juxscript 1.1.231 → 1.1.232

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,556 @@
1
+ import { DataFrame } from './DataFrame.js';
2
+ import { TabularDriver } from './TabularDriver.js';
3
+
4
+ export interface SourceOptions {
5
+ // Storage
6
+ dbName?: string;
7
+ storeName?: string;
8
+ persistToIndexedDB?: boolean;
9
+
10
+ // Parsing
11
+ inferTypes?: boolean;
12
+ maxFileSize?: number; // MB
13
+ maxSheetSize?: number;
14
+ headerRow?: number;
15
+ delimiter?: string;
16
+ }
17
+
18
+ export interface APIOptions {
19
+ method?: 'GET' | 'POST' | 'PUT' | 'DELETE';
20
+ headers?: Record<string, string>;
21
+ body?: any;
22
+ credentials?: 'omit' | 'same-origin' | 'include';
23
+ dataPath?: string; // JSONPath-like: "data.results" or "response.items[0].records"
24
+ }
25
+
26
+ export interface StreamOptions {
27
+ mimeType?: string;
28
+ chunkSize?: number;
29
+ cacheToIndexedDB?: boolean;
30
+ }
31
+
32
+ export interface MultiSheetResult {
33
+ sheets: Map<string, DataFrame>;
34
+ sheetNames: string[];
35
+ activeSheet: string;
36
+ }
37
+
38
+ /**
39
+ * DataFrameSource - Data acquisition layer for DataFrames
40
+ *
41
+ * Responsibilities:
42
+ * - Load from files (CSV, TSV, Excel)
43
+ * - Load from URLs (fetch, stream)
44
+ * - Load from APIs (with auth, JSON traversal)
45
+ * - Load from IndexedDB
46
+ * - Load from inline data
47
+ * - Multi-sheet support
48
+ * - Progress callbacks
49
+ *
50
+ * This is a PURE data layer - no UI concerns
51
+ */
52
+ export class DataFrameSource {
53
+ private _driver: TabularDriver;
54
+ private _options: SourceOptions;
55
+ private _sheets: Map<string, DataFrame> = new Map();
56
+ private _activeSheet: string = '';
57
+ private _sourceName: string = '';
58
+ private _rawFile: File | null = null;
59
+ private _rawText: string | null = null;
60
+
61
+ constructor(options: SourceOptions = {}) {
62
+ this._options = {
63
+ dbName: options.dbName ?? 'jux-dataframes',
64
+ storeName: options.storeName ?? 'frames',
65
+ persistToIndexedDB: options.persistToIndexedDB ?? false,
66
+ inferTypes: options.inferTypes ?? true,
67
+ maxFileSize: options.maxFileSize ?? 50,
68
+ maxSheetSize: options.maxSheetSize ?? 100000,
69
+ headerRow: options.headerRow ?? 0,
70
+ delimiter: options.delimiter
71
+ };
72
+
73
+ this._driver = new TabularDriver(
74
+ this._options.dbName!,
75
+ this._options.storeName!
76
+ );
77
+ }
78
+
79
+ /* ═══════════════════════════════════════════════════
80
+ * FROM FILE
81
+ * ═══════════════════════════════════════════════════ */
82
+
83
+ /**
84
+ * Load from a File object (CSV, TSV, Excel)
85
+ */
86
+ async fromFile(
87
+ file: File,
88
+ onProgress?: (loaded: number, total: number | null) => void
89
+ ): Promise<DataFrame | MultiSheetResult> {
90
+ const fileSizeMB = file.size / (1024 * 1024);
91
+
92
+ if (fileSizeMB > this._options.maxFileSize!) {
93
+ throw new Error(`File too large (${fileSizeMB.toFixed(1)}MB). Max: ${this._options.maxFileSize}MB`);
94
+ }
95
+
96
+ this._rawFile = file;
97
+ this._sourceName = file.name;
98
+
99
+ const isExcel = /\.(xlsx?|xls)$/i.test(file.name);
100
+
101
+ if (isExcel) {
102
+ const sheets = await this._driver.streamFileMultiSheet(file, {
103
+ maxSheetSize: this._options.maxSheetSize,
104
+ headerRow: this._options.headerRow,
105
+ onProgress
106
+ });
107
+
108
+ this._sheets.clear();
109
+ const sheetNames = Object.keys(sheets);
110
+
111
+ sheetNames.forEach(name => {
112
+ this._sheets.set(name, sheets[name]);
113
+ });
114
+
115
+ this._activeSheet = sheetNames[0] || '';
116
+
117
+ if (this._options.persistToIndexedDB) {
118
+ await this._driver.store(file.name, sheets[sheetNames[0]], { source: file.name });
119
+ }
120
+
121
+ if (sheetNames.length > 1) {
122
+ return {
123
+ sheets: this._sheets,
124
+ sheetNames,
125
+ activeSheet: this._activeSheet
126
+ };
127
+ } else {
128
+ return this._sheets.get(this._activeSheet)!;
129
+ }
130
+ } else {
131
+ const text = await file.text();
132
+ this._rawText = text;
133
+
134
+ const df = this._driver.parseCSV(text, {
135
+ autoDetectDelimiter: !this._options.delimiter,
136
+ delimiter: this._options.delimiter,
137
+ headerRow: this._options.headerRow,
138
+ hasHeader: true
139
+ });
140
+
141
+ this._sheets.set('Sheet1', df);
142
+ this._activeSheet = 'Sheet1';
143
+
144
+ if (this._options.persistToIndexedDB) {
145
+ await this._driver.store(file.name, df, { source: file.name });
146
+ }
147
+
148
+ return df;
149
+ }
150
+ }
151
+
152
+ /* ═══════════════════════════════════════════════════
153
+ * FROM URL (Simple Fetch)
154
+ * ═══════════════════════════════════════════════════ */
155
+
156
+ /**
157
+ * Fetch CSV/TSV from URL
158
+ */
159
+ async fromUrl(
160
+ url: string,
161
+ onProgress?: (loaded: number, total: number | null) => void
162
+ ): Promise<DataFrame> {
163
+ this._sourceName = url.split('/').pop() || url;
164
+
165
+ const df = await this._driver.fetch(url, {
166
+ autoDetectDelimiter: true,
167
+ hasHeader: true,
168
+ headerRow: this._options.headerRow,
169
+ onProgress
170
+ });
171
+
172
+ this._sheets.set('Sheet1', df);
173
+ this._activeSheet = 'Sheet1';
174
+
175
+ if (this._options.persistToIndexedDB) {
176
+ await this._driver.store(this._sourceName, df, { source: url });
177
+ }
178
+
179
+ return df;
180
+ }
181
+
182
+ /* ═══════════════════════════════════════════════════
183
+ * FROM API (JSON with traversal)
184
+ * ═══════════════════════════════════════════════════ */
185
+
186
+ /**
187
+ * Fetch data from API endpoint
188
+ * Supports JSON response with path traversal
189
+ */
190
+ async fromAPI(url: string, options: APIOptions = {}): Promise<DataFrame> {
191
+ const {
192
+ method = 'GET',
193
+ headers = {},
194
+ body,
195
+ credentials = 'same-origin',
196
+ dataPath
197
+ } = options;
198
+
199
+ const fetchOptions: RequestInit = {
200
+ method,
201
+ headers: {
202
+ 'Accept': 'application/json',
203
+ ...headers
204
+ },
205
+ credentials
206
+ };
207
+
208
+ if (body && method !== 'GET') {
209
+ fetchOptions.body = typeof body === 'string' ? body : JSON.stringify(body);
210
+ if (!headers['Content-Type']) {
211
+ (fetchOptions.headers as Record<string, string>)['Content-Type'] = 'application/json';
212
+ }
213
+ }
214
+
215
+ const response = await fetch(url, fetchOptions);
216
+
217
+ if (!response.ok) {
218
+ throw new Error(`API request failed: ${response.status} ${response.statusText}`);
219
+ }
220
+
221
+ let data = await response.json();
222
+
223
+ // Traverse to data path if specified
224
+ if (dataPath) {
225
+ data = this._traversePath(data, dataPath);
226
+ }
227
+
228
+ // Convert to DataFrame
229
+ if (!Array.isArray(data)) {
230
+ throw new Error('API response must be an array of objects (or use dataPath to navigate to array)');
231
+ }
232
+
233
+ const df = new DataFrame(data, { inferTypes: this._options.inferTypes });
234
+
235
+ this._sourceName = url;
236
+ this._sheets.set('Sheet1', df);
237
+ this._activeSheet = 'Sheet1';
238
+
239
+ if (this._options.persistToIndexedDB) {
240
+ await this._driver.store(this._sourceName, df, { source: url });
241
+ }
242
+
243
+ return df;
244
+ }
245
+
246
+ /**
247
+ * Traverse JSON path like "data.results" or "response.items[0].records"
248
+ */
249
+ private _traversePath(obj: any, path: string): any {
250
+ const parts = path.split(/\.|\[|\]/).filter(Boolean);
251
+ let current = obj;
252
+
253
+ for (const part of parts) {
254
+ if (current === null || current === undefined) {
255
+ throw new Error(`Cannot traverse path '${path}': null at '${part}'`);
256
+ }
257
+
258
+ const index = Number(part);
259
+ if (!isNaN(index)) {
260
+ current = current[index];
261
+ } else {
262
+ current = current[part];
263
+ }
264
+ }
265
+
266
+ return current;
267
+ }
268
+
269
+ /* ═══════════════════════════════════════════════════
270
+ * FROM STREAM (Binary data with MIME type)
271
+ * ═══════════════════════════════════════════════════ */
272
+
273
+ /**
274
+ * Stream binary data from URL, auto-detect format from MIME type
275
+ */
276
+ async fromStream(
277
+ url: string,
278
+ options: StreamOptions = {},
279
+ onProgress?: (loaded: number, total: number | null) => void
280
+ ): Promise<DataFrame | MultiSheetResult> {
281
+ const {
282
+ mimeType,
283
+ cacheToIndexedDB = false
284
+ } = options;
285
+
286
+ const response = await fetch(url);
287
+
288
+ if (!response.ok) {
289
+ throw new Error(`Stream request failed: ${response.status}`);
290
+ }
291
+
292
+ const contentType = mimeType || response.headers.get('content-type') || '';
293
+ const fileName = url.split('/').pop() || 'stream-data';
294
+
295
+ // Get the blob
296
+ const blob = await response.blob();
297
+ const file = new File([blob], fileName, { type: contentType });
298
+
299
+ // Cache to IndexedDB if requested
300
+ if (cacheToIndexedDB) {
301
+ // Store raw blob for potential re-parsing
302
+ await this._cacheBlob(fileName, blob);
303
+ }
304
+
305
+ // Parse based on content type
306
+ return this.fromFile(file, onProgress);
307
+ }
308
+
309
+ private async _cacheBlob(name: string, blob: Blob): Promise<void> {
310
+ // Simple blob caching in IndexedDB
311
+ const db = await this._driver.open();
312
+ return new Promise((resolve, reject) => {
313
+ const tx = db.transaction('blobs', 'readwrite');
314
+ const store = tx.objectStore('blobs');
315
+ store.put({ name, blob, timestamp: Date.now() });
316
+ tx.oncomplete = () => resolve();
317
+ tx.onerror = () => reject(tx.error);
318
+ });
319
+ }
320
+
321
+ /* ═══════════════════════════════════════════════════
322
+ * FROM INDEXEDDB
323
+ * ═══════════════════════════════════════════════════ */
324
+
325
+ /**
326
+ * Load from IndexedDB by key/name
327
+ */
328
+ async fromStorage(key: string): Promise<DataFrame | null> {
329
+ const df = await this._driver.loadByName(key);
330
+
331
+ if (df) {
332
+ this._sourceName = key;
333
+ this._sheets.set('Sheet1', df);
334
+ this._activeSheet = 'Sheet1';
335
+ }
336
+
337
+ return df;
338
+ }
339
+
340
+ /**
341
+ * List all stored DataFrames
342
+ */
343
+ async listStored(): Promise<Array<{ id: string; name: string; rowCount: number; timestamp: number }>> {
344
+ return this._driver.list();
345
+ }
346
+
347
+ /* ═══════════════════════════════════════════════════
348
+ * FROM INLINE DATA
349
+ * ═══════════════════════════════════════════════════ */
350
+
351
+ /**
352
+ * Create from inline data
353
+ */
354
+ fromData(
355
+ data: Record<string, any>[] | Record<string, any[]>,
356
+ name: string = 'inline'
357
+ ): DataFrame {
358
+ const df = new DataFrame(data, { inferTypes: this._options.inferTypes });
359
+
360
+ this._sourceName = name;
361
+ this._sheets.set('Sheet1', df);
362
+ this._activeSheet = 'Sheet1';
363
+
364
+ return df;
365
+ }
366
+
367
+ /**
368
+ * Create empty DataFrame with specified columns
369
+ */
370
+ empty(columns: string[]): DataFrame {
371
+ const data: Record<string, any[]> = {};
372
+ columns.forEach(c => { data[c] = []; });
373
+ return this.fromData(data, 'empty');
374
+ }
375
+
376
+ /* ═══════════════════════════════════════════════════
377
+ * RE-IMPORT / RE-PARSE
378
+ * ═══════════════════════════════════════════════════ */
379
+
380
+ /**
381
+ * Re-parse with different options
382
+ */
383
+ async reimport(options: {
384
+ headerRow?: number;
385
+ delimiter?: string;
386
+ sheetName?: string;
387
+ }): Promise<DataFrame | MultiSheetResult> {
388
+ if (this._rawFile) {
389
+ // Update options
390
+ if (options.headerRow !== undefined) {
391
+ this._options.headerRow = options.headerRow;
392
+ }
393
+ if (options.delimiter !== undefined) {
394
+ this._options.delimiter = options.delimiter;
395
+ }
396
+
397
+ return this.fromFile(this._rawFile);
398
+ } else if (this._rawText) {
399
+ const df = this._driver.parseCSV(this._rawText, {
400
+ delimiter: options.delimiter || this._options.delimiter,
401
+ headerRow: options.headerRow ?? this._options.headerRow,
402
+ hasHeader: true
403
+ });
404
+
405
+ this._sheets.set(this._activeSheet, df);
406
+ return df;
407
+ }
408
+
409
+ throw new Error('No raw data available for reimport');
410
+ }
411
+
412
+ /**
413
+ * Get raw preview rows for header selection UI
414
+ */
415
+ async getPreviewRows(maxRows: number = 15): Promise<{ row: number; values: any[] }[]> {
416
+ if (this._rawFile && /\.(xlsx?|xls)$/i.test(this._rawFile.name)) {
417
+ const rows = await this._driver.readRawExcelRows(this._rawFile, maxRows);
418
+ return rows.map(r => ({ row: r.sheetRow, values: r.values }));
419
+ } else if (this._rawText) {
420
+ const lines = this._rawText.split('\n').slice(0, maxRows);
421
+ const delimiter = (this._driver as any)._detectDelimiter(this._rawText);
422
+ return lines.map((line, i) => ({
423
+ row: i,
424
+ values: (this._driver as any)._parseLine(line, delimiter)
425
+ }));
426
+ }
427
+
428
+ return [];
429
+ }
430
+
431
+ /* ═══════════════════════════════════════════════════
432
+ * MULTI-SHEET ACCESS
433
+ * ═══════════════════════════════════════════════════ */
434
+
435
+ /**
436
+ * Get all sheet names
437
+ */
438
+ get sheetNames(): string[] {
439
+ return Array.from(this._sheets.keys());
440
+ }
441
+
442
+ /**
443
+ * Get active sheet
444
+ */
445
+ get activeSheet(): string {
446
+ return this._activeSheet;
447
+ }
448
+
449
+ /**
450
+ * Set active sheet
451
+ */
452
+ setActiveSheet(name: string): DataFrame {
453
+ if (!this._sheets.has(name)) {
454
+ throw new Error(`Sheet '${name}' not found`);
455
+ }
456
+ this._activeSheet = name;
457
+ return this._sheets.get(name)!;
458
+ }
459
+
460
+ /**
461
+ * Get specific sheet by name
462
+ */
463
+ getSheet(name: string): DataFrame | undefined {
464
+ return this._sheets.get(name);
465
+ }
466
+
467
+ /**
468
+ * Get current DataFrame (active sheet)
469
+ */
470
+ get df(): DataFrame | null {
471
+ return this._sheets.get(this._activeSheet) || null;
472
+ }
473
+
474
+ /**
475
+ * Get all sheets
476
+ */
477
+ get sheets(): Map<string, DataFrame> {
478
+ return new Map(this._sheets);
479
+ }
480
+
481
+ /* ═══════════════════════════════════════════════════
482
+ * PERSISTENCE
483
+ * ═══════════════════════════════════════════════════ */
484
+
485
+ /**
486
+ * Save current DataFrame to IndexedDB
487
+ */
488
+ async save(key?: string): Promise<string | null> {
489
+ const df = this.df;
490
+ if (!df) return null;
491
+
492
+ const name = key || this._sourceName;
493
+ return this._driver.store(name, df);
494
+ }
495
+
496
+ /**
497
+ * Save all sheets
498
+ */
499
+ async saveAll(prefix?: string): Promise<string[]> {
500
+ const ids: string[] = [];
501
+
502
+ for (const [sheetName, df] of this._sheets) {
503
+ const key = prefix ? `${prefix}_${sheetName}` : `${this._sourceName}_${sheetName}`;
504
+ const id = await this._driver.store(key, df);
505
+ ids.push(id);
506
+ }
507
+
508
+ return ids;
509
+ }
510
+
511
+ /**
512
+ * Delete from storage
513
+ */
514
+ async deleteStored(key: string): Promise<void> {
515
+ const tables = await this._driver.list();
516
+ const matching = tables.filter(t => t.name === key);
517
+
518
+ for (const table of matching) {
519
+ await this._driver.delete(table.id);
520
+ }
521
+ }
522
+
523
+ /**
524
+ * Clear all data
525
+ */
526
+ clear(): void {
527
+ this._sheets.clear();
528
+ this._activeSheet = '';
529
+ this._sourceName = '';
530
+ this._rawFile = null;
531
+ this._rawText = null;
532
+ }
533
+
534
+ /* ═══════════════════════════════════════════════════
535
+ * SOURCE INFO
536
+ * ═══════════════════════════════════════════════════ */
537
+
538
+ get sourceName(): string {
539
+ return this._sourceName;
540
+ }
541
+
542
+ get hasRawData(): boolean {
543
+ return this._rawFile !== null || this._rawText !== null;
544
+ }
545
+
546
+ get isExcel(): boolean {
547
+ return this._rawFile !== null && /\.(xlsx?|xls)$/i.test(this._rawFile.name);
548
+ }
549
+ }
550
+
551
+ /**
552
+ * Factory function
553
+ */
554
+ export function dataFrameSource(options: SourceOptions = {}): DataFrameSource {
555
+ return new DataFrameSource(options);
556
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "juxscript",
3
- "version": "1.1.231",
3
+ "version": "1.1.232",
4
4
  "type": "module",
5
5
  "description": "A JavaScript UX authorship platform",
6
6
  "main": "index.js",