@soulcraft/brainy 3.21.0 → 3.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [3.22.0](https://github.com/soulcraftlabs/brainy/compare/v3.21.0...v3.22.0) (2025-10-01)
6
+
7
+ - feat: add intelligent import for CSV, Excel, and PDF files (814cbb4)
8
+
9
+
5
10
  ### [3.21.0](https://github.com/soulcraftlabs/brainy/compare/v3.20.5...v3.21.0) (2025-10-01)
6
11
 
7
12
  - feat: add progress tracking, entity caching, and relationship confidence (2f9d512)
package/README.md CHANGED
@@ -457,6 +457,41 @@ npm run download-models # Download Q8 model
457
457
  npm run download-models:q8 # Download Q8 model
458
458
  ```
459
459
 
460
+ ## 🚀 Import Anything - Files, Data, URLs
461
+
462
+ Brainy's universal import intelligently handles **any data format**:
463
+
464
+ ```javascript
465
+ // Import CSV with auto-detection
466
+ await brain.import('customers.csv')
467
+ // ✨ Auto-detects: encoding, delimiter, types, creates entities!
468
+
469
+ // Import Excel workbooks with multi-sheet support
470
+ await brain.import('sales-data.xlsx', {
471
+ excelSheets: ['Q1', 'Q2'] // or 'all' for all sheets
472
+ })
473
+ // ✨ Processes all sheets, preserves structure, infers types!
474
+
475
+ // Import PDF documents with table extraction
476
+ await brain.import('research-paper.pdf', {
477
+ pdfExtractTables: true
478
+ })
479
+ // ✨ Extracts text, detects tables, preserves metadata!
480
+
481
+ // Import JSON/YAML data
482
+ await brain.import([
483
+ { name: 'Alice', role: 'Engineer' },
484
+ { name: 'Bob', role: 'Designer' }
485
+ ])
486
+ // ✨ Automatically creates Person entities with relationships!
487
+
488
+ // Import from URLs (auto-fetched)
489
+ await brain.import('https://api.example.com/data.json')
490
+ // ✨ Auto-detects URL, fetches, parses, processes!
491
+ ```
492
+
493
+ **📖 [Complete Import Guide →](docs/guides/import-anything.md)** | **[Live Example →](examples/import-excel-pdf-csv.ts)**
494
+
460
495
  ## 📚 Core API
461
496
 
462
497
  ### `search()` - Vector Similarity
@@ -513,6 +548,11 @@ await brain.deleteVerb(verbId)
513
548
  // Bulk operations
514
549
  await brain.import(arrayOfData)
515
550
  const exported = await brain.export({format: 'json'})
551
+
552
+ // Import from CSV, Excel, PDF files (auto-detected)
553
+ await brain.import('customers.csv') // CSV with encoding detection
554
+ await brain.import('sales-report.xlsx') // Excel with multi-sheet support
555
+ await brain.import('research.pdf') // PDF with table extraction
516
556
  ```
517
557
 
518
558
  ## 🌐 Distributed System (NEW!)
@@ -13,6 +13,7 @@ import { CacheAugmentation } from './cacheAugmentation.js';
13
13
  import { MetricsAugmentation } from './metricsAugmentation.js';
14
14
  import { MonitoringAugmentation } from './monitoringAugmentation.js';
15
15
  import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js';
16
+ import { IntelligentImportAugmentation } from './intelligentImport/index.js';
16
17
  /**
17
18
  * Create default augmentations for zero-config operation
18
19
  * Returns an array of augmentations to be registered
@@ -25,6 +26,7 @@ export declare function createDefaultAugmentations(config?: {
25
26
  metrics?: boolean | Record<string, any>;
26
27
  monitoring?: boolean | Record<string, any>;
27
28
  display?: boolean | Record<string, any>;
29
+ intelligentImport?: boolean | Record<string, any>;
28
30
  }): BaseAugmentation[];
29
31
  /**
30
32
  * Get augmentation by name with type safety
@@ -54,4 +56,8 @@ export declare const AugmentationHelpers: {
54
56
  * Get display augmentation
55
57
  */
56
58
  getDisplay(brain: Brainy): UniversalDisplayAugmentation | null;
59
+ /**
60
+ * Get intelligent import augmentation
61
+ */
62
+ getIntelligentImport(brain: Brainy): IntelligentImportAugmentation | null;
57
63
  };
@@ -11,6 +11,7 @@ import { CacheAugmentation } from './cacheAugmentation.js';
11
11
  import { MetricsAugmentation } from './metricsAugmentation.js';
12
12
  import { MonitoringAugmentation } from './monitoringAugmentation.js';
13
13
  import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js';
14
+ import { IntelligentImportAugmentation } from './intelligentImport/index.js';
14
15
  /**
15
16
  * Create default augmentations for zero-config operation
16
17
  * Returns an array of augmentations to be registered
@@ -20,6 +21,11 @@ import { UniversalDisplayAugmentation } from './universalDisplayAugmentation.js'
20
21
  */
21
22
  export function createDefaultAugmentations(config = {}) {
22
23
  const augmentations = [];
24
+ // Intelligent Import augmentation (CSV, Excel, PDF)
25
+ if (config.intelligentImport !== false) {
26
+ const importConfig = typeof config.intelligentImport === 'object' ? config.intelligentImport : {};
27
+ augmentations.push(new IntelligentImportAugmentation(importConfig));
28
+ }
23
29
  // Cache augmentation (was SearchCache)
24
30
  if (config.cache !== false) {
25
31
  const cacheConfig = typeof config.cache === 'object' ? config.cache : {};
@@ -88,6 +94,12 @@ export const AugmentationHelpers = {
88
94
  */
89
95
  getDisplay(brain) {
90
96
  return getAugmentation(brain, 'display');
97
+ },
98
+ /**
99
+ * Get intelligent import augmentation
100
+ */
101
+ getIntelligentImport(brain) {
102
+ return getAugmentation(brain, 'intelligent-import');
91
103
  }
92
104
  };
93
105
  //# sourceMappingURL=defaultAugmentations.js.map
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Intelligent Import Augmentation
3
+ *
4
+ * Automatically detects and processes CSV, Excel, and PDF files with:
5
+ * - Format detection and routing
6
+ * - Lazy-loaded handlers
7
+ * - Intelligent entity and relationship extraction
8
+ * - Integration with NeuralImport augmentation
9
+ */
10
+ import { BaseAugmentation } from '../brainyAugmentation.js';
11
+ import { FormatHandler, IntelligentImportConfig } from './types.js';
12
+ export declare class IntelligentImportAugmentation extends BaseAugmentation {
13
+ readonly name = "intelligent-import";
14
+ readonly timing: "before";
15
+ readonly metadata: {
16
+ reads: "*";
17
+ writes: string[];
18
+ };
19
+ readonly operations: any[];
20
+ readonly priority = 75;
21
+ protected config: IntelligentImportConfig;
22
+ private handlers;
23
+ private initialized;
24
+ constructor(config?: Partial<IntelligentImportConfig>);
25
+ protected onInitialize(): Promise<void>;
26
+ execute<T = any>(operation: string, params: any, next: () => Promise<T>): Promise<T>;
27
+ /**
28
+ * Check if we should process this operation
29
+ */
30
+ private shouldProcess;
31
+ /**
32
+ * Extract file data from various param formats
33
+ */
34
+ private extractFileData;
35
+ /**
36
+ * Detect which handler can process this file
37
+ */
38
+ private detectHandler;
39
+ /**
40
+ * Get handler by format name
41
+ */
42
+ getHandler(format: string): FormatHandler | undefined;
43
+ /**
44
+ * Get all registered handlers
45
+ */
46
+ getHandlers(): FormatHandler[];
47
+ /**
48
+ * Get supported formats
49
+ */
50
+ getSupportedFormats(): string[];
51
+ }
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Intelligent Import Augmentation
3
+ *
4
+ * Automatically detects and processes CSV, Excel, and PDF files with:
5
+ * - Format detection and routing
6
+ * - Lazy-loaded handlers
7
+ * - Intelligent entity and relationship extraction
8
+ * - Integration with NeuralImport augmentation
9
+ */
10
+ import { BaseAugmentation } from '../brainyAugmentation.js';
11
+ import { CSVHandler } from './handlers/csvHandler.js';
12
+ import { ExcelHandler } from './handlers/excelHandler.js';
13
+ import { PDFHandler } from './handlers/pdfHandler.js';
14
+ export class IntelligentImportAugmentation extends BaseAugmentation {
15
+ constructor(config = {}) {
16
+ super(config);
17
+ this.name = 'intelligent-import';
18
+ this.timing = 'before';
19
+ this.metadata = {
20
+ reads: '*',
21
+ writes: ['_intelligentImport', '_processedFormat', '_extractedData']
22
+ };
23
+ this.operations = ['import', 'importFile', 'importFromFile', 'importFromURL', 'all'];
24
+ this.priority = 75; // Before NeuralImport (80), after validation
25
+ this.handlers = new Map();
26
+ this.initialized = false;
27
+ this.config = {
28
+ enableCSV: true,
29
+ enableExcel: true,
30
+ enablePDF: true,
31
+ maxFileSize: 100 * 1024 * 1024, // 100MB default
32
+ enableCache: true,
33
+ cacheTTL: 24 * 60 * 60 * 1000, // 24 hours
34
+ ...config
35
+ };
36
+ }
37
+ async onInitialize() {
38
+ // Initialize handlers based on config
39
+ if (this.config.enableCSV) {
40
+ this.handlers.set('csv', new CSVHandler());
41
+ }
42
+ if (this.config.enableExcel) {
43
+ this.handlers.set('excel', new ExcelHandler());
44
+ }
45
+ if (this.config.enablePDF) {
46
+ this.handlers.set('pdf', new PDFHandler());
47
+ }
48
+ this.initialized = true;
49
+ this.log(`Initialized with ${this.handlers.size} format handlers (CSV: ${this.config.enableCSV}, Excel: ${this.config.enableExcel}, PDF: ${this.config.enablePDF})`);
50
+ }
51
+ async execute(operation, params, next) {
52
+ // Only process import operations
53
+ if (!this.shouldProcess(operation, params)) {
54
+ return next();
55
+ }
56
+ try {
57
+ // Extract file data from params
58
+ const fileData = this.extractFileData(params);
59
+ if (!fileData) {
60
+ return next();
61
+ }
62
+ // Check file size limit
63
+ if (this.config.maxFileSize && fileData.data.length > this.config.maxFileSize) {
64
+ this.log(`File too large (${fileData.data.length} bytes), skipping intelligent import`, 'warn');
65
+ return next();
66
+ }
67
+ // Detect format and get appropriate handler
68
+ const handler = this.detectHandler(fileData.data, fileData.filename);
69
+ if (!handler) {
70
+ // Not a supported format, pass through
71
+ return next();
72
+ }
73
+ this.log(`Processing ${fileData.filename || 'file'} with ${handler.format} handler`);
74
+ // Process the file
75
+ const processed = await handler.process(fileData.data, {
76
+ filename: fileData.filename,
77
+ ext: fileData.ext,
78
+ ...this.config.csvDefaults,
79
+ ...this.config.excelDefaults,
80
+ ...this.config.pdfDefaults,
81
+ ...params.options
82
+ });
83
+ // Enrich params with processed data
84
+ params._intelligentImport = true;
85
+ params._processedFormat = processed.format;
86
+ params._extractedData = processed.data;
87
+ params._metadata = {
88
+ ...params._metadata,
89
+ intelligentImport: processed.metadata
90
+ };
91
+ // If this is an import operation, transform params to include the structured data
92
+ if (processed.data.length > 0) {
93
+ // Store processed data for the neural import augmentation to use
94
+ params.data = processed.data;
95
+ params.metadata = params._metadata;
96
+ }
97
+ this.log(`Extracted ${processed.data.length} items from ${processed.format} file`);
98
+ return next();
99
+ }
100
+ catch (error) {
101
+ this.log(`Intelligent import processing failed: ${error instanceof Error ? error.message : String(error)}`, 'warn');
102
+ // Fall through to normal import on error
103
+ return next();
104
+ }
105
+ }
106
+ /**
107
+ * Check if we should process this operation
108
+ */
109
+ shouldProcess(operation, params) {
110
+ // Only process if we have handlers initialized
111
+ if (!this.initialized || this.handlers.size === 0) {
112
+ return false;
113
+ }
114
+ // Check operation type
115
+ const validOps = ['import', 'importFile', 'importFromFile', 'importFromURL'];
116
+ if (!validOps.some(op => operation.includes(op))) {
117
+ return false;
118
+ }
119
+ // Must have some data
120
+ if (!params || (!params.source && !params.data && !params.filePath && !params.url)) {
121
+ return false;
122
+ }
123
+ return true;
124
+ }
125
+ /**
126
+ * Extract file data from various param formats
127
+ */
128
+ extractFileData(params) {
129
+ // From source parameter
130
+ if (params.source) {
131
+ if (Buffer.isBuffer(params.source)) {
132
+ return { data: params.source, filename: params.filename };
133
+ }
134
+ if (typeof params.source === 'string') {
135
+ return { data: Buffer.from(params.source), filename: params.filename };
136
+ }
137
+ }
138
+ // From data parameter
139
+ if (params.data) {
140
+ if (Buffer.isBuffer(params.data)) {
141
+ return { data: params.data, filename: params.filename };
142
+ }
143
+ if (typeof params.data === 'string') {
144
+ return { data: Buffer.from(params.data), filename: params.filename };
145
+ }
146
+ }
147
+ // From file path (would need to read - but that should be handled by UniversalImportAPI)
148
+ if (params.filePath && typeof params.filePath === 'string') {
149
+ const ext = params.filePath.split('.').pop();
150
+ return null; // File reading handled elsewhere
151
+ }
152
+ return null;
153
+ }
154
+ /**
155
+ * Detect which handler can process this file
156
+ */
157
+ detectHandler(data, filename) {
158
+ // Try each handler's canHandle method
159
+ for (const handler of this.handlers.values()) {
160
+ if (handler.canHandle(data) || (filename && handler.canHandle({ filename }))) {
161
+ return handler;
162
+ }
163
+ }
164
+ return null;
165
+ }
166
+ /**
167
+ * Get handler by format name
168
+ */
169
+ getHandler(format) {
170
+ return this.handlers.get(format.toLowerCase());
171
+ }
172
+ /**
173
+ * Get all registered handlers
174
+ */
175
+ getHandlers() {
176
+ return Array.from(this.handlers.values());
177
+ }
178
+ /**
179
+ * Get supported formats
180
+ */
181
+ getSupportedFormats() {
182
+ return Array.from(this.handlers.keys());
183
+ }
184
+ }
185
+ //# sourceMappingURL=IntelligentImportAugmentation.js.map
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Base Format Handler
3
+ * Abstract class providing common functionality for all format handlers
4
+ */
5
+ import { FormatHandler, FormatHandlerOptions, ProcessedData } from '../types.js';
6
+ export declare abstract class BaseFormatHandler implements FormatHandler {
7
+ abstract readonly format: string;
8
+ abstract process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
9
+ abstract canHandle(data: Buffer | string | {
10
+ filename?: string;
11
+ ext?: string;
12
+ }): boolean;
13
+ /**
14
+ * Detect file extension from various inputs
15
+ */
16
+ protected detectExtension(data: Buffer | string | {
17
+ filename?: string;
18
+ ext?: string;
19
+ }): string | null;
20
+ /**
21
+ * Extract extension from filename
22
+ */
23
+ protected getExtension(filename: string): string;
24
+ /**
25
+ * Infer field types from data
26
+ * Analyzes multiple rows to determine the most appropriate type
27
+ */
28
+ protected inferFieldTypes(data: Array<Record<string, any>>): Record<string, string>;
29
+ /**
30
+ * Infer type of a single value
31
+ */
32
+ protected inferType(value: any): string;
33
+ /**
34
+ * Check if string looks like a date
35
+ */
36
+ protected isDateString(value: string): boolean;
37
+ /**
38
+ * Sanitize field names for use as object keys
39
+ */
40
+ protected sanitizeFieldName(name: string): string;
41
+ /**
42
+ * Convert value to appropriate type
43
+ */
44
+ protected convertValue(value: any, type: string): any;
45
+ /**
46
+ * Create metadata object with common fields
47
+ */
48
+ protected createMetadata(rowCount: number, fields: string[], processingTime: number, extra?: Record<string, any>): ProcessedData['metadata'];
49
+ }
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Base Format Handler
3
+ * Abstract class providing common functionality for all format handlers
4
+ */
5
+ export class BaseFormatHandler {
6
+ /**
7
+ * Detect file extension from various inputs
8
+ */
9
+ detectExtension(data) {
10
+ if (typeof data === 'object' && 'filename' in data && data.filename) {
11
+ return this.getExtension(data.filename);
12
+ }
13
+ if (typeof data === 'object' && 'ext' in data && data.ext) {
14
+ return data.ext.toLowerCase().replace(/^\./, '');
15
+ }
16
+ return null;
17
+ }
18
+ /**
19
+ * Extract extension from filename
20
+ */
21
+ getExtension(filename) {
22
+ const match = filename.match(/\.([^.]+)$/);
23
+ return match ? match[1].toLowerCase() : '';
24
+ }
25
+ /**
26
+ * Infer field types from data
27
+ * Analyzes multiple rows to determine the most appropriate type
28
+ */
29
+ inferFieldTypes(data) {
30
+ if (data.length === 0)
31
+ return {};
32
+ const types = {};
33
+ const firstRow = data[0];
34
+ const sampleSize = Math.min(10, data.length);
35
+ for (const key of Object.keys(firstRow)) {
36
+ // Check first few rows to get more accurate type
37
+ const sampleTypes = new Set();
38
+ for (let i = 0; i < sampleSize; i++) {
39
+ const value = data[i][key];
40
+ const type = this.inferType(value);
41
+ sampleTypes.add(type);
42
+ }
43
+ // If we see both integer and float, use float
44
+ if (sampleTypes.has('float') || (sampleTypes.has('integer') && sampleTypes.has('float'))) {
45
+ types[key] = 'float';
46
+ }
47
+ else if (sampleTypes.has('integer')) {
48
+ types[key] = 'integer';
49
+ }
50
+ else if (sampleTypes.has('date')) {
51
+ types[key] = 'date';
52
+ }
53
+ else if (sampleTypes.has('boolean')) {
54
+ types[key] = 'boolean';
55
+ }
56
+ else {
57
+ types[key] = 'string';
58
+ }
59
+ }
60
+ return types;
61
+ }
62
+ /**
63
+ * Infer type of a single value
64
+ */
65
+ inferType(value) {
66
+ if (value === null || value === undefined || value === '')
67
+ return 'string';
68
+ if (typeof value === 'number')
69
+ return 'number';
70
+ if (typeof value === 'boolean')
71
+ return 'boolean';
72
+ if (typeof value === 'string') {
73
+ // Check if it's a number
74
+ if (/^-?\d+$/.test(value))
75
+ return 'integer';
76
+ if (/^-?\d+\.\d+$/.test(value))
77
+ return 'float';
78
+ // Check if it's a date
79
+ if (this.isDateString(value))
80
+ return 'date';
81
+ // Check if it's a boolean
82
+ if (/^(true|false|yes|no|y|n)$/i.test(value))
83
+ return 'boolean';
84
+ }
85
+ return 'string';
86
+ }
87
+ /**
88
+ * Check if string looks like a date
89
+ */
90
+ isDateString(value) {
91
+ // ISO 8601
92
+ if (/^\d{4}-\d{2}-\d{2}/.test(value))
93
+ return true;
94
+ // Common date formats
95
+ if (/^\d{1,2}\/\d{1,2}\/\d{2,4}$/.test(value))
96
+ return true;
97
+ if (/^\d{1,2}-\d{1,2}-\d{2,4}$/.test(value))
98
+ return true;
99
+ return false;
100
+ }
101
+ /**
102
+ * Sanitize field names for use as object keys
103
+ */
104
+ sanitizeFieldName(name) {
105
+ return name
106
+ .trim()
107
+ .replace(/[^a-zA-Z0-9_\s-]/g, '')
108
+ .replace(/\s+/g, '_')
109
+ .replace(/-+/g, '_')
110
+ .replace(/_+/g, '_')
111
+ .replace(/^_|_$/g, '')
112
+ || 'field';
113
+ }
114
+ /**
115
+ * Convert value to appropriate type
116
+ */
117
+ convertValue(value, type) {
118
+ if (value === null || value === undefined || value === '')
119
+ return null;
120
+ switch (type) {
121
+ case 'integer':
122
+ return parseInt(String(value), 10);
123
+ case 'float':
124
+ case 'number':
125
+ return parseFloat(String(value));
126
+ case 'boolean':
127
+ if (typeof value === 'boolean')
128
+ return value;
129
+ const str = String(value).toLowerCase();
130
+ return ['true', 'yes', 'y', '1'].includes(str);
131
+ case 'date':
132
+ return new Date(value);
133
+ default:
134
+ return value;
135
+ }
136
+ }
137
+ /**
138
+ * Create metadata object with common fields
139
+ */
140
+ createMetadata(rowCount, fields, processingTime, extra = {}) {
141
+ return {
142
+ rowCount,
143
+ fields,
144
+ processingTime,
145
+ ...extra
146
+ };
147
+ }
148
+ }
149
+ //# sourceMappingURL=base.js.map
@@ -0,0 +1,34 @@
1
+ /**
2
+ * CSV Format Handler
3
+ * Handles CSV files with:
4
+ * - Automatic encoding detection
5
+ * - Automatic delimiter detection
6
+ * - Streaming for large files
7
+ * - Type inference
8
+ */
9
+ import { BaseFormatHandler } from './base.js';
10
+ import { FormatHandlerOptions, ProcessedData } from '../types.js';
11
+ export declare class CSVHandler extends BaseFormatHandler {
12
+ readonly format = "csv";
13
+ canHandle(data: Buffer | string | {
14
+ filename?: string;
15
+ ext?: string;
16
+ }): boolean;
17
+ process(data: Buffer | string, options: FormatHandlerOptions): Promise<ProcessedData>;
18
+ /**
19
+ * Check if text looks like CSV
20
+ */
21
+ private looksLikeCSV;
22
+ /**
23
+ * Detect CSV delimiter
24
+ */
25
+ private detectDelimiter;
26
+ /**
27
+ * Detect encoding safely (with fallback)
28
+ */
29
+ private detectEncodingSafe;
30
+ /**
31
+ * Normalize encoding names to Node.js-supported encodings
32
+ */
33
+ private normalizeEncoding;
34
+ }