@soulcraft/brainy 3.21.0 → 3.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +40 -0
- package/dist/augmentations/defaultAugmentations.d.ts +6 -0
- package/dist/augmentations/defaultAugmentations.js +12 -0
- package/dist/augmentations/intelligentImport/IntelligentImportAugmentation.d.ts +51 -0
- package/dist/augmentations/intelligentImport/IntelligentImportAugmentation.js +185 -0
- package/dist/augmentations/intelligentImport/handlers/base.d.ts +49 -0
- package/dist/augmentations/intelligentImport/handlers/base.js +149 -0
- package/dist/augmentations/intelligentImport/handlers/csvHandler.d.ts +34 -0
- package/dist/augmentations/intelligentImport/handlers/csvHandler.js +185 -0
- package/dist/augmentations/intelligentImport/handlers/excelHandler.d.ts +31 -0
- package/dist/augmentations/intelligentImport/handlers/excelHandler.js +148 -0
- package/dist/augmentations/intelligentImport/handlers/pdfHandler.d.ts +35 -0
- package/dist/augmentations/intelligentImport/handlers/pdfHandler.js +247 -0
- package/dist/augmentations/intelligentImport/index.d.ts +9 -0
- package/dist/augmentations/intelligentImport/index.js +9 -0
- package/dist/augmentations/intelligentImport/types.d.ts +111 -0
- package/dist/augmentations/intelligentImport/types.js +6 -0
- package/package.json +7 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [3.22.0](https://github.com/soulcraftlabs/brainy/compare/v3.21.0...v3.22.0) (2025-10-01)
|
|
6
|
+
|
|
7
|
+
- feat: add intelligent import for CSV, Excel, and PDF files (814cbb4)
|
|
8
|
+
|
|
9
|
+
|
|
5
10
|
### [3.21.0](https://github.com/soulcraftlabs/brainy/compare/v3.20.5...v3.21.0) (2025-10-01)
|
|
6
11
|
|
|
7
12
|
- feat: add progress tracking, entity caching, and relationship confidence (2f9d512)
|
package/README.md
CHANGED
|
@@ -457,6 +457,41 @@ npm run download-models # Download Q8 model
|
|
|
457
457
|
npm run download-models:q8 # Download Q8 model
|
|
458
458
|
```
|
|
459
459
|
|
|
460
|
+
## 🚀 Import Anything - Files, Data, URLs
|
|
461
|
+
|
|
462
|
+
Brainy's universal import intelligently handles **any data format**:
|
|
463
|
+
|
|
464
|
+
```javascript
|
|
465
|
+
// Import CSV with auto-detection
|
|
466
|
+
await brain.import('customers.csv')
|
|
467
|
+
// ✨ Auto-detects: encoding, delimiter, types, creates entities!
|
|
468
|
+
|
|
469
|
+
// Import Excel workbooks with multi-sheet support
|
|
470
|
+
await brain.import('sales-data.xlsx', {
|
|
471
|
+
excelSheets: ['Q1', 'Q2'] // or 'all' for all sheets
|
|
472
|
+
})
|
|
473
|
+
// ✨ Processes all sheets, preserves structure, infers types!
|
|
474
|
+
|
|
475
|
+
// Import PDF documents with table extraction
|
|
476
|
+
await brain.import('research-paper.pdf', {
|
|
477
|
+
pdfExtractTables: true
|
|
478
|
+
})
|
|
479
|
+
// ✨ Extracts text, detects tables, preserves metadata!
|
|
480
|
+
|
|
481
|
+
// Import JSON/YAML data
|
|
482
|
+
await brain.import([
|
|
483
|
+
{ name: 'Alice', role: 'Engineer' },
|
|
484
|
+
{ name: 'Bob', role: 'Designer' }
|
|
485
|
+
])
|
|
486
|
+
// ✨ Automatically creates Person entities with relationships!
|
|
487
|
+
|
|
488
|
+
// Import from URLs (auto-fetched)
|
|
489
|
+
await brain.import('https://api.example.com/data.json')
|
|
490
|
+
// ✨ Auto-detects URL, fetches, parses, processes!
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
**📖 [Complete Import Guide →](docs/guides/import-anything.md)** | **[Live Example →](examples/import-excel-pdf-csv.ts)**
|
|
494
|
+
|
|
460
495
|
## 📚 Core API
|
|
461
496
|
|
|
462
497
|
### `search()` - Vector Similarity
|
|
@@ -513,6 +548,11 @@ await brain.deleteVerb(verbId)
|
|
|
513
548
|
// Bulk operations
|
|
514
549
|
await brain.import(arrayOfData)
|
|
515
550
|
const exported = await brain.export({format: 'json'})
|
|
551
|
+
|
|
552
|
+
// Import from CSV, Excel, PDF files (auto-detected)
|
|
553
|
+
await brain.import('customers.csv') // CSV with encoding detection
|
|
554
|
+
await brain.import('sales-report.xlsx') // Excel with multi-sheet support
|
|
555
|
+
await brain.import('research.pdf') // PDF with table extraction
|
|
516
556
|
```
|
|
517
557
|
|
|
518
558
|
## 🌐 Distributed System (NEW!)
|
|
@@ -13,6 +13,7 @@ import { CacheAugmentation } from './cacheAugmentation.js';
|
|
|
13
13
|
import { MetricsAugmentation } from './metricsAugmentation.js';
|
|
14
14
|
import { MonitoringAugmentation } from './monitoringAugmentation.js';
|
|
15
15
|
import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js';
|
|
16
|
+
import { IntelligentImportAugmentation } from './intelligentImport/index.js';
|
|
16
17
|
/**
|
|
17
18
|
* Create default augmentations for zero-config operation
|
|
18
19
|
* Returns an array of augmentations to be registered
|
|
@@ -25,6 +26,7 @@ export declare function createDefaultAugmentations(config?: {
|
|
|
25
26
|
metrics?: boolean | Record<string, any>;
|
|
26
27
|
monitoring?: boolean | Record<string, any>;
|
|
27
28
|
display?: boolean | Record<string, any>;
|
|
29
|
+
intelligentImport?: boolean | Record<string, any>;
|
|
28
30
|
}): BaseAugmentation[];
|
|
29
31
|
/**
|
|
30
32
|
* Get augmentation by name with type safety
|
|
@@ -54,4 +56,8 @@ export declare const AugmentationHelpers: {
|
|
|
54
56
|
* Get display augmentation
|
|
55
57
|
*/
|
|
56
58
|
getDisplay(brain: Brainy): UniversalDisplayAugmentation | null;
|
|
59
|
+
/**
|
|
60
|
+
* Get intelligent import augmentation
|
|
61
|
+
*/
|
|
62
|
+
getIntelligentImport(brain: Brainy): IntelligentImportAugmentation | null;
|
|
57
63
|
};
|
|
@@ -11,6 +11,7 @@ import { CacheAugmentation } from './cacheAugmentation.js';
|
|
|
11
11
|
import { MetricsAugmentation } from './metricsAugmentation.js';
|
|
12
12
|
import { MonitoringAugmentation } from './monitoringAugmentation.js';
|
|
13
13
|
import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js';
|
|
14
|
+
import { IntelligentImportAugmentation } from './intelligentImport/index.js';
|
|
14
15
|
/**
|
|
15
16
|
* Create default augmentations for zero-config operation
|
|
16
17
|
* Returns an array of augmentations to be registered
|
|
@@ -20,6 +21,11 @@ import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js'
|
|
|
20
21
|
*/
|
|
21
22
|
export function createDefaultAugmentations(config = {}) {
|
|
22
23
|
const augmentations = [];
|
|
24
|
+
// Intelligent Import augmentation (CSV, Excel, PDF)
|
|
25
|
+
if (config.intelligentImport !== false) {
|
|
26
|
+
const importConfig = typeof config.intelligentImport === 'object' ? config.intelligentImport : {};
|
|
27
|
+
augmentations.push(new IntelligentImportAugmentation(importConfig));
|
|
28
|
+
}
|
|
23
29
|
// Cache augmentation (was SearchCache)
|
|
24
30
|
if (config.cache !== false) {
|
|
25
31
|
const cacheConfig = typeof config.cache === 'object' ? config.cache : {};
|
|
@@ -88,6 +94,12 @@ export const AugmentationHelpers = {
|
|
|
88
94
|
*/
|
|
89
95
|
getDisplay(brain) {
|
|
90
96
|
return getAugmentation(brain, 'display');
|
|
97
|
+
},
|
|
98
|
+
/**
|
|
99
|
+
* Get intelligent import augmentation
|
|
100
|
+
*/
|
|
101
|
+
getIntelligentImport(brain) {
|
|
102
|
+
return getAugmentation(brain, 'intelligent-import');
|
|
91
103
|
}
|
|
92
104
|
};
|
|
93
105
|
//# sourceMappingURL=defaultAugmentations.js.map
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Import Augmentation
|
|
3
|
+
*
|
|
4
|
+
* Automatically detects and processes CSV, Excel, and PDF files with:
|
|
5
|
+
* - Format detection and routing
|
|
6
|
+
* - Lazy-loaded handlers
|
|
7
|
+
* - Intelligent entity and relationship extraction
|
|
8
|
+
* - Integration with NeuralImport augmentation
|
|
9
|
+
*/
|
|
10
|
+
import { BaseAugmentation } from '../brainyAugmentation.js';
|
|
11
|
+
import { FormatHandler, IntelligentImportConfig } from './types.js';
|
|
12
|
+
export declare class IntelligentImportAugmentation extends BaseAugmentation {
|
|
13
|
+
readonly name = "intelligent-import";
|
|
14
|
+
readonly timing: "before";
|
|
15
|
+
readonly metadata: {
|
|
16
|
+
reads: "*";
|
|
17
|
+
writes: string[];
|
|
18
|
+
};
|
|
19
|
+
readonly operations: any[];
|
|
20
|
+
readonly priority = 75;
|
|
21
|
+
protected config: IntelligentImportConfig;
|
|
22
|
+
private handlers;
|
|
23
|
+
private initialized;
|
|
24
|
+
constructor(config?: Partial<IntelligentImportConfig>);
|
|
25
|
+
protected onInitialize(): Promise<void>;
|
|
26
|
+
execute<T = any>(operation: string, params: any, next: () => Promise<T>): Promise<T>;
|
|
27
|
+
/**
|
|
28
|
+
* Check if we should process this operation
|
|
29
|
+
*/
|
|
30
|
+
private shouldProcess;
|
|
31
|
+
/**
|
|
32
|
+
* Extract file data from various param formats
|
|
33
|
+
*/
|
|
34
|
+
private extractFileData;
|
|
35
|
+
/**
|
|
36
|
+
* Detect which handler can process this file
|
|
37
|
+
*/
|
|
38
|
+
private detectHandler;
|
|
39
|
+
/**
|
|
40
|
+
* Get handler by format name
|
|
41
|
+
*/
|
|
42
|
+
getHandler(format: string): FormatHandler | undefined;
|
|
43
|
+
/**
|
|
44
|
+
* Get all registered handlers
|
|
45
|
+
*/
|
|
46
|
+
getHandlers(): FormatHandler[];
|
|
47
|
+
/**
|
|
48
|
+
* Get supported formats
|
|
49
|
+
*/
|
|
50
|
+
getSupportedFormats(): string[];
|
|
51
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Import Augmentation
|
|
3
|
+
*
|
|
4
|
+
* Automatically detects and processes CSV, Excel, and PDF files with:
|
|
5
|
+
* - Format detection and routing
|
|
6
|
+
* - Lazy-loaded handlers
|
|
7
|
+
* - Intelligent entity and relationship extraction
|
|
8
|
+
* - Integration with NeuralImport augmentation
|
|
9
|
+
*/
|
|
10
|
+
import { BaseAugmentation } from '../brainyAugmentation.js';
|
|
11
|
+
import { CSVHandler } from './handlers/csvHandler.js';
|
|
12
|
+
import { ExcelHandler } from './handlers/excelHandler.js';
|
|
13
|
+
import { PDFHandler } from './handlers/pdfHandler.js';
|
|
14
|
+
export class IntelligentImportAugmentation extends BaseAugmentation {
|
|
15
|
+
constructor(config = {}) {
|
|
16
|
+
super(config);
|
|
17
|
+
this.name = 'intelligent-import';
|
|
18
|
+
this.timing = 'before';
|
|
19
|
+
this.metadata = {
|
|
20
|
+
reads: '*',
|
|
21
|
+
writes: ['_intelligentImport', '_processedFormat', '_extractedData']
|
|
22
|
+
};
|
|
23
|
+
this.operations = ['import', 'importFile', 'importFromFile', 'importFromURL', 'all'];
|
|
24
|
+
this.priority = 75; // Before NeuralImport (80), after validation
|
|
25
|
+
this.handlers = new Map();
|
|
26
|
+
this.initialized = false;
|
|
27
|
+
this.config = {
|
|
28
|
+
enableCSV: true,
|
|
29
|
+
enableExcel: true,
|
|
30
|
+
enablePDF: true,
|
|
31
|
+
maxFileSize: 100 * 1024 * 1024, // 100MB default
|
|
32
|
+
enableCache: true,
|
|
33
|
+
cacheTTL: 24 * 60 * 60 * 1000, // 24 hours
|
|
34
|
+
...config
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
async onInitialize() {
|
|
38
|
+
// Initialize handlers based on config
|
|
39
|
+
if (this.config.enableCSV) {
|
|
40
|
+
this.handlers.set('csv', new CSVHandler());
|
|
41
|
+
}
|
|
42
|
+
if (this.config.enableExcel) {
|
|
43
|
+
this.handlers.set('excel', new ExcelHandler());
|
|
44
|
+
}
|
|
45
|
+
if (this.config.enablePDF) {
|
|
46
|
+
this.handlers.set('pdf', new PDFHandler());
|
|
47
|
+
}
|
|
48
|
+
this.initialized = true;
|
|
49
|
+
this.log(`Initialized with ${this.handlers.size} format handlers (CSV: ${this.config.enableCSV}, Excel: ${this.config.enableExcel}, PDF: ${this.config.enablePDF})`);
|
|
50
|
+
}
|
|
51
|
+
async execute(operation, params, next) {
|
|
52
|
+
// Only process import operations
|
|
53
|
+
if (!this.shouldProcess(operation, params)) {
|
|
54
|
+
return next();
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
// Extract file data from params
|
|
58
|
+
const fileData = this.extractFileData(params);
|
|
59
|
+
if (!fileData) {
|
|
60
|
+
return next();
|
|
61
|
+
}
|
|
62
|
+
// Check file size limit
|
|
63
|
+
if (this.config.maxFileSize && fileData.data.length > this.config.maxFileSize) {
|
|
64
|
+
this.log(`File too large (${fileData.data.length} bytes), skipping intelligent import`, 'warn');
|
|
65
|
+
return next();
|
|
66
|
+
}
|
|
67
|
+
// Detect format and get appropriate handler
|
|
68
|
+
const handler = this.detectHandler(fileData.data, fileData.filename);
|
|
69
|
+
if (!handler) {
|
|
70
|
+
// Not a supported format, pass through
|
|
71
|
+
return next();
|
|
72
|
+
}
|
|
73
|
+
this.log(`Processing ${fileData.filename || 'file'} with ${handler.format} handler`);
|
|
74
|
+
// Process the file
|
|
75
|
+
const processed = await handler.process(fileData.data, {
|
|
76
|
+
filename: fileData.filename,
|
|
77
|
+
ext: fileData.ext,
|
|
78
|
+
...this.config.csvDefaults,
|
|
79
|
+
...this.config.excelDefaults,
|
|
80
|
+
...this.config.pdfDefaults,
|
|
81
|
+
...params.options
|
|
82
|
+
});
|
|
83
|
+
// Enrich params with processed data
|
|
84
|
+
params._intelligentImport = true;
|
|
85
|
+
params._processedFormat = processed.format;
|
|
86
|
+
params._extractedData = processed.data;
|
|
87
|
+
params._metadata = {
|
|
88
|
+
...params._metadata,
|
|
89
|
+
intelligentImport: processed.metadata
|
|
90
|
+
};
|
|
91
|
+
// If this is an import operation, transform params to include the structured data
|
|
92
|
+
if (processed.data.length > 0) {
|
|
93
|
+
// Store processed data for the neural import augmentation to use
|
|
94
|
+
params.data = processed.data;
|
|
95
|
+
params.metadata = params._metadata;
|
|
96
|
+
}
|
|
97
|
+
this.log(`Extracted ${processed.data.length} items from ${processed.format} file`);
|
|
98
|
+
return next();
|
|
99
|
+
}
|
|
100
|
+
catch (error) {
|
|
101
|
+
this.log(`Intelligent import processing failed: ${error instanceof Error ? error.message : String(error)}`, 'warn');
|
|
102
|
+
// Fall through to normal import on error
|
|
103
|
+
return next();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Check if we should process this operation
|
|
108
|
+
*/
|
|
109
|
+
shouldProcess(operation, params) {
|
|
110
|
+
// Only process if we have handlers initialized
|
|
111
|
+
if (!this.initialized || this.handlers.size === 0) {
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
// Check operation type
|
|
115
|
+
const validOps = ['import', 'importFile', 'importFromFile', 'importFromURL'];
|
|
116
|
+
if (!validOps.some(op => operation.includes(op))) {
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
// Must have some data
|
|
120
|
+
if (!params || (!params.source && !params.data && !params.filePath && !params.url)) {
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
return true;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Extract file data from various param formats
|
|
127
|
+
*/
|
|
128
|
+
extractFileData(params) {
|
|
129
|
+
// From source parameter
|
|
130
|
+
if (params.source) {
|
|
131
|
+
if (Buffer.isBuffer(params.source)) {
|
|
132
|
+
return { data: params.source, filename: params.filename };
|
|
133
|
+
}
|
|
134
|
+
if (typeof params.source === 'string') {
|
|
135
|
+
return { data: Buffer.from(params.source), filename: params.filename };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// From data parameter
|
|
139
|
+
if (params.data) {
|
|
140
|
+
if (Buffer.isBuffer(params.data)) {
|
|
141
|
+
return { data: params.data, filename: params.filename };
|
|
142
|
+
}
|
|
143
|
+
if (typeof params.data === 'string') {
|
|
144
|
+
return { data: Buffer.from(params.data), filename: params.filename };
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// From file path (would need to read - but that should be handled by UniversalImportAPI)
|
|
148
|
+
if (params.filePath && typeof params.filePath === 'string') {
|
|
149
|
+
const ext = params.filePath.split('.').pop();
|
|
150
|
+
return null; // File reading handled elsewhere
|
|
151
|
+
}
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Detect which handler can process this file
|
|
156
|
+
*/
|
|
157
|
+
detectHandler(data, filename) {
|
|
158
|
+
// Try each handler's canHandle method
|
|
159
|
+
for (const handler of this.handlers.values()) {
|
|
160
|
+
if (handler.canHandle(data) || (filename && handler.canHandle({ filename }))) {
|
|
161
|
+
return handler;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Get handler by format name
|
|
168
|
+
*/
|
|
169
|
+
getHandler(format) {
|
|
170
|
+
return this.handlers.get(format.toLowerCase());
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Get all registered handlers
|
|
174
|
+
*/
|
|
175
|
+
getHandlers() {
|
|
176
|
+
return Array.from(this.handlers.values());
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Get supported formats
|
|
180
|
+
*/
|
|
181
|
+
getSupportedFormats() {
|
|
182
|
+
return Array.from(this.handlers.keys());
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=IntelligentImportAugmentation.js.map
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Format Handler
|
|
3
|
+
* Abstract class providing common functionality for all format handlers
|
|
4
|
+
*/
|
|
5
|
+
import { FormatHandler, FormatHandlerOptions, ProcessedData } from '../types.js';
|
|
6
|
+
export declare abstract class BaseFormatHandler implements FormatHandler {
|
|
7
|
+
abstract readonly format: string;
|
|
8
|
+
abstract process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
|
|
9
|
+
abstract canHandle(data: Buffer | string | {
|
|
10
|
+
filename?: string;
|
|
11
|
+
ext?: string;
|
|
12
|
+
}): boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Detect file extension from various inputs
|
|
15
|
+
*/
|
|
16
|
+
protected detectExtension(data: Buffer | string | {
|
|
17
|
+
filename?: string;
|
|
18
|
+
ext?: string;
|
|
19
|
+
}): string | null;
|
|
20
|
+
/**
|
|
21
|
+
* Extract extension from filename
|
|
22
|
+
*/
|
|
23
|
+
protected getExtension(filename: string): string;
|
|
24
|
+
/**
|
|
25
|
+
* Infer field types from data
|
|
26
|
+
* Analyzes multiple rows to determine the most appropriate type
|
|
27
|
+
*/
|
|
28
|
+
protected inferFieldTypes(data: Array<Record<string, any>>): Record<string, string>;
|
|
29
|
+
/**
|
|
30
|
+
* Infer type of a single value
|
|
31
|
+
*/
|
|
32
|
+
protected inferType(value: any): string;
|
|
33
|
+
/**
|
|
34
|
+
* Check if string looks like a date
|
|
35
|
+
*/
|
|
36
|
+
protected isDateString(value: string): boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Sanitize field names for use as object keys
|
|
39
|
+
*/
|
|
40
|
+
protected sanitizeFieldName(name: string): string;
|
|
41
|
+
/**
|
|
42
|
+
* Convert value to appropriate type
|
|
43
|
+
*/
|
|
44
|
+
protected convertValue(value: any, type: string): any;
|
|
45
|
+
/**
|
|
46
|
+
* Create metadata object with common fields
|
|
47
|
+
*/
|
|
48
|
+
protected createMetadata(rowCount: number, fields: string[], processingTime: number, extra?: Record<string, any>): ProcessedData['metadata'];
|
|
49
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Format Handler
|
|
3
|
+
* Abstract class providing common functionality for all format handlers
|
|
4
|
+
*/
|
|
5
|
+
export class BaseFormatHandler {
|
|
6
|
+
/**
|
|
7
|
+
* Detect file extension from various inputs
|
|
8
|
+
*/
|
|
9
|
+
detectExtension(data) {
|
|
10
|
+
if (typeof data === 'object' && 'filename' in data && data.filename) {
|
|
11
|
+
return this.getExtension(data.filename);
|
|
12
|
+
}
|
|
13
|
+
if (typeof data === 'object' && 'ext' in data && data.ext) {
|
|
14
|
+
return data.ext.toLowerCase().replace(/^\./, '');
|
|
15
|
+
}
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Extract extension from filename
|
|
20
|
+
*/
|
|
21
|
+
getExtension(filename) {
|
|
22
|
+
const match = filename.match(/\.([^.]+)$/);
|
|
23
|
+
return match ? match[1].toLowerCase() : '';
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Infer field types from data
|
|
27
|
+
* Analyzes multiple rows to determine the most appropriate type
|
|
28
|
+
*/
|
|
29
|
+
inferFieldTypes(data) {
|
|
30
|
+
if (data.length === 0)
|
|
31
|
+
return {};
|
|
32
|
+
const types = {};
|
|
33
|
+
const firstRow = data[0];
|
|
34
|
+
const sampleSize = Math.min(10, data.length);
|
|
35
|
+
for (const key of Object.keys(firstRow)) {
|
|
36
|
+
// Check first few rows to get more accurate type
|
|
37
|
+
const sampleTypes = new Set();
|
|
38
|
+
for (let i = 0; i < sampleSize; i++) {
|
|
39
|
+
const value = data[i][key];
|
|
40
|
+
const type = this.inferType(value);
|
|
41
|
+
sampleTypes.add(type);
|
|
42
|
+
}
|
|
43
|
+
// If we see both integer and float, use float
|
|
44
|
+
if (sampleTypes.has('float') || (sampleTypes.has('integer') && sampleTypes.has('float'))) {
|
|
45
|
+
types[key] = 'float';
|
|
46
|
+
}
|
|
47
|
+
else if (sampleTypes.has('integer')) {
|
|
48
|
+
types[key] = 'integer';
|
|
49
|
+
}
|
|
50
|
+
else if (sampleTypes.has('date')) {
|
|
51
|
+
types[key] = 'date';
|
|
52
|
+
}
|
|
53
|
+
else if (sampleTypes.has('boolean')) {
|
|
54
|
+
types[key] = 'boolean';
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
types[key] = 'string';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return types;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Infer type of a single value
|
|
64
|
+
*/
|
|
65
|
+
inferType(value) {
|
|
66
|
+
if (value === null || value === undefined || value === '')
|
|
67
|
+
return 'string';
|
|
68
|
+
if (typeof value === 'number')
|
|
69
|
+
return 'number';
|
|
70
|
+
if (typeof value === 'boolean')
|
|
71
|
+
return 'boolean';
|
|
72
|
+
if (typeof value === 'string') {
|
|
73
|
+
// Check if it's a number
|
|
74
|
+
if (/^-?\d+$/.test(value))
|
|
75
|
+
return 'integer';
|
|
76
|
+
if (/^-?\d+\.\d+$/.test(value))
|
|
77
|
+
return 'float';
|
|
78
|
+
// Check if it's a date
|
|
79
|
+
if (this.isDateString(value))
|
|
80
|
+
return 'date';
|
|
81
|
+
// Check if it's a boolean
|
|
82
|
+
if (/^(true|false|yes|no|y|n)$/i.test(value))
|
|
83
|
+
return 'boolean';
|
|
84
|
+
}
|
|
85
|
+
return 'string';
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Check if string looks like a date
|
|
89
|
+
*/
|
|
90
|
+
isDateString(value) {
|
|
91
|
+
// ISO 8601
|
|
92
|
+
if (/^\d{4}-\d{2}-\d{2}/.test(value))
|
|
93
|
+
return true;
|
|
94
|
+
// Common date formats
|
|
95
|
+
if (/^\d{1,2}\/\d{1,2}\/\d{2,4}$/.test(value))
|
|
96
|
+
return true;
|
|
97
|
+
if (/^\d{1,2}-\d{1,2}-\d{2,4}$/.test(value))
|
|
98
|
+
return true;
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Sanitize field names for use as object keys
|
|
103
|
+
*/
|
|
104
|
+
sanitizeFieldName(name) {
|
|
105
|
+
return name
|
|
106
|
+
.trim()
|
|
107
|
+
.replace(/[^a-zA-Z0-9_\s-]/g, '')
|
|
108
|
+
.replace(/\s+/g, '_')
|
|
109
|
+
.replace(/-+/g, '_')
|
|
110
|
+
.replace(/_+/g, '_')
|
|
111
|
+
.replace(/^_|_$/g, '')
|
|
112
|
+
|| 'field';
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Convert value to appropriate type
|
|
116
|
+
*/
|
|
117
|
+
convertValue(value, type) {
|
|
118
|
+
if (value === null || value === undefined || value === '')
|
|
119
|
+
return null;
|
|
120
|
+
switch (type) {
|
|
121
|
+
case 'integer':
|
|
122
|
+
return parseInt(String(value), 10);
|
|
123
|
+
case 'float':
|
|
124
|
+
case 'number':
|
|
125
|
+
return parseFloat(String(value));
|
|
126
|
+
case 'boolean':
|
|
127
|
+
if (typeof value === 'boolean')
|
|
128
|
+
return value;
|
|
129
|
+
const str = String(value).toLowerCase();
|
|
130
|
+
return ['true', 'yes', 'y', '1'].includes(str);
|
|
131
|
+
case 'date':
|
|
132
|
+
return new Date(value);
|
|
133
|
+
default:
|
|
134
|
+
return value;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Create metadata object with common fields
|
|
139
|
+
*/
|
|
140
|
+
createMetadata(rowCount, fields, processingTime, extra = {}) {
|
|
141
|
+
return {
|
|
142
|
+
rowCount,
|
|
143
|
+
fields,
|
|
144
|
+
processingTime,
|
|
145
|
+
...extra
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=base.js.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Format Handler
|
|
3
|
+
* Handles CSV files with:
|
|
4
|
+
* - Automatic encoding detection
|
|
5
|
+
* - Automatic delimiter detection
|
|
6
|
+
* - Streaming for large files
|
|
7
|
+
* - Type inference
|
|
8
|
+
*/
|
|
9
|
+
import { BaseFormatHandler } from './base.js';
|
|
10
|
+
import { FormatHandlerOptions, ProcessedData } from '../types.js';
|
|
11
|
+
export declare class CSVHandler extends BaseFormatHandler {
|
|
12
|
+
readonly format = "csv";
|
|
13
|
+
canHandle(data: Buffer | string | {
|
|
14
|
+
filename?: string;
|
|
15
|
+
ext?: string;
|
|
16
|
+
}): boolean;
|
|
17
|
+
process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
|
|
18
|
+
/**
|
|
19
|
+
* Check if text looks like CSV
|
|
20
|
+
*/
|
|
21
|
+
private looksLikeCSV;
|
|
22
|
+
/**
|
|
23
|
+
* Detect CSV delimiter
|
|
24
|
+
*/
|
|
25
|
+
private detectDelimiter;
|
|
26
|
+
/**
|
|
27
|
+
* Detect encoding safely (with fallback)
|
|
28
|
+
*/
|
|
29
|
+
private detectEncodingSafe;
|
|
30
|
+
/**
|
|
31
|
+
* Normalize encoding names to Node.js-supported encodings
|
|
32
|
+
*/
|
|
33
|
+
private normalizeEncoding;
|
|
34
|
+
}
|