@soulcraft/brainy 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/augmentations/intelligentImport/handlers/csvHandler.js +33 -1
- package/dist/augmentations/intelligentImport/handlers/excelHandler.js +48 -2
- package/dist/augmentations/intelligentImport/handlers/pdfHandler.js +37 -0
- package/dist/augmentations/intelligentImport/types.d.ts +33 -0
- package/dist/brainy.d.ts +22 -3
- package/dist/brainy.js +28 -2
- package/dist/cli/commands/core.d.ts +3 -0
- package/dist/cli/commands/core.js +21 -3
- package/dist/cli/commands/import.js +69 -34
- package/dist/importers/SmartCSVImporter.js +35 -1
- package/dist/importers/SmartDOCXImporter.js +12 -0
- package/dist/importers/SmartExcelImporter.js +37 -1
- package/dist/importers/SmartJSONImporter.js +18 -0
- package/dist/importers/SmartMarkdownImporter.js +25 -2
- package/dist/importers/SmartPDFImporter.js +37 -1
- package/dist/importers/SmartYAMLImporter.js +12 -0
- package/dist/types/brainy.types.d.ts +106 -0
- package/dist/utils/import-progress-tracker.d.ts +140 -0
- package/dist/utils/import-progress-tracker.js +444 -0
- package/dist/vfs/PathResolver.js +4 -2
- package/dist/vfs/VirtualFileSystem.js +22 -7
- package/package.json +1 -1
|
@@ -30,13 +30,26 @@ export class CSVHandler extends BaseFormatHandler {
|
|
|
30
30
|
}
|
|
31
31
|
async process(data, options) {
|
|
32
32
|
const startTime = Date.now();
|
|
33
|
+
const progressHooks = options.progressHooks;
|
|
33
34
|
// Convert to buffer if string
|
|
34
35
|
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'utf-8');
|
|
36
|
+
const totalBytes = buffer.length;
|
|
37
|
+
// v4.5.0: Report total bytes for progress tracking
|
|
38
|
+
if (progressHooks?.onBytesProcessed) {
|
|
39
|
+
progressHooks.onBytesProcessed(0);
|
|
40
|
+
}
|
|
41
|
+
if (progressHooks?.onCurrentItem) {
|
|
42
|
+
progressHooks.onCurrentItem('Detecting CSV encoding and delimiter...');
|
|
43
|
+
}
|
|
35
44
|
// Detect encoding
|
|
36
45
|
const detectedEncoding = options.encoding || this.detectEncodingSafe(buffer);
|
|
37
46
|
const text = buffer.toString(detectedEncoding);
|
|
38
47
|
// Detect delimiter if not specified
|
|
39
48
|
const delimiter = options.csvDelimiter || this.detectDelimiter(text);
|
|
49
|
+
// v4.5.0: Report progress - parsing started
|
|
50
|
+
if (progressHooks?.onCurrentItem) {
|
|
51
|
+
progressHooks.onCurrentItem(`Parsing CSV rows (delimiter: "${delimiter}")...`);
|
|
52
|
+
}
|
|
40
53
|
// Parse CSV
|
|
41
54
|
const hasHeaders = options.csvHeaders !== false;
|
|
42
55
|
const maxRows = options.maxRows;
|
|
@@ -50,19 +63,38 @@ export class CSVHandler extends BaseFormatHandler {
|
|
|
50
63
|
to: maxRows,
|
|
51
64
|
cast: false // We'll do type inference ourselves
|
|
52
65
|
});
|
|
66
|
+
// v4.5.0: Report bytes processed (entire file parsed)
|
|
67
|
+
if (progressHooks?.onBytesProcessed) {
|
|
68
|
+
progressHooks.onBytesProcessed(totalBytes);
|
|
69
|
+
}
|
|
53
70
|
// Convert to array of objects
|
|
54
71
|
const data = Array.isArray(records) ? records : [records];
|
|
72
|
+
// v4.5.0: Report data extraction progress
|
|
73
|
+
if (progressHooks?.onDataExtracted) {
|
|
74
|
+
progressHooks.onDataExtracted(data.length, data.length);
|
|
75
|
+
}
|
|
76
|
+
if (progressHooks?.onCurrentItem) {
|
|
77
|
+
progressHooks.onCurrentItem(`Extracted ${data.length} rows, inferring types...`);
|
|
78
|
+
}
|
|
55
79
|
// Infer types and convert values
|
|
56
80
|
const fields = data.length > 0 ? Object.keys(data[0]) : [];
|
|
57
81
|
const types = this.inferFieldTypes(data);
|
|
58
|
-
const convertedData = data.map(row => {
|
|
82
|
+
const convertedData = data.map((row, index) => {
|
|
59
83
|
const converted = {};
|
|
60
84
|
for (const [key, value] of Object.entries(row)) {
|
|
61
85
|
converted[key] = this.convertValue(value, types[key] || 'string');
|
|
62
86
|
}
|
|
87
|
+
// v4.5.0: Report progress every 1000 rows
|
|
88
|
+
if (progressHooks?.onCurrentItem && index > 0 && index % 1000 === 0) {
|
|
89
|
+
progressHooks.onCurrentItem(`Converting types: ${index}/${data.length} rows...`);
|
|
90
|
+
}
|
|
63
91
|
return converted;
|
|
64
92
|
});
|
|
65
93
|
const processingTime = Date.now() - startTime;
|
|
94
|
+
// v4.5.0: Final progress update
|
|
95
|
+
if (progressHooks?.onCurrentItem) {
|
|
96
|
+
progressHooks.onCurrentItem(`CSV processing complete: ${convertedData.length} rows`);
|
|
97
|
+
}
|
|
66
98
|
return {
|
|
67
99
|
format: this.format,
|
|
68
100
|
data: convertedData,
|
|
@@ -19,8 +19,17 @@ export class ExcelHandler extends BaseFormatHandler {
|
|
|
19
19
|
}
|
|
20
20
|
async process(data, options) {
|
|
21
21
|
const startTime = Date.now();
|
|
22
|
+
const progressHooks = options.progressHooks;
|
|
22
23
|
// Convert to buffer if string (though Excel should always be binary)
|
|
23
24
|
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'binary');
|
|
25
|
+
const totalBytes = buffer.length;
|
|
26
|
+
// v4.5.0: Report start
|
|
27
|
+
if (progressHooks?.onBytesProcessed) {
|
|
28
|
+
progressHooks.onBytesProcessed(0);
|
|
29
|
+
}
|
|
30
|
+
if (progressHooks?.onCurrentItem) {
|
|
31
|
+
progressHooks.onCurrentItem('Loading Excel workbook...');
|
|
32
|
+
}
|
|
24
33
|
try {
|
|
25
34
|
// Read workbook
|
|
26
35
|
const workbook = XLSX.read(buffer, {
|
|
@@ -31,10 +40,19 @@ export class ExcelHandler extends BaseFormatHandler {
|
|
|
31
40
|
});
|
|
32
41
|
// Determine which sheets to process
|
|
33
42
|
const sheetsToProcess = this.getSheetsToProcess(workbook, options);
|
|
43
|
+
// v4.5.0: Report workbook loaded
|
|
44
|
+
if (progressHooks?.onCurrentItem) {
|
|
45
|
+
progressHooks.onCurrentItem(`Processing ${sheetsToProcess.length} sheets...`);
|
|
46
|
+
}
|
|
34
47
|
// Extract data from sheets
|
|
35
48
|
const allData = [];
|
|
36
49
|
const sheetMetadata = {};
|
|
37
|
-
for (
|
|
50
|
+
for (let sheetIndex = 0; sheetIndex < sheetsToProcess.length; sheetIndex++) {
|
|
51
|
+
const sheetName = sheetsToProcess[sheetIndex];
|
|
52
|
+
// v4.5.0: Report current sheet
|
|
53
|
+
if (progressHooks?.onCurrentItem) {
|
|
54
|
+
progressHooks.onCurrentItem(`Reading sheet: ${sheetName} (${sheetIndex + 1}/${sheetsToProcess.length})`);
|
|
55
|
+
}
|
|
38
56
|
const sheet = workbook.Sheets[sheetName];
|
|
39
57
|
if (!sheet)
|
|
40
58
|
continue;
|
|
@@ -75,12 +93,28 @@ export class ExcelHandler extends BaseFormatHandler {
|
|
|
75
93
|
columnCount: headers.length,
|
|
76
94
|
headers
|
|
77
95
|
};
|
|
96
|
+
// v4.5.0: Estimate bytes processed (sheets are sequential)
|
|
97
|
+
const bytesProcessed = Math.floor(((sheetIndex + 1) / sheetsToProcess.length) * totalBytes);
|
|
98
|
+
if (progressHooks?.onBytesProcessed) {
|
|
99
|
+
progressHooks.onBytesProcessed(bytesProcessed);
|
|
100
|
+
}
|
|
101
|
+
// v4.5.0: Report extraction progress
|
|
102
|
+
if (progressHooks?.onDataExtracted) {
|
|
103
|
+
progressHooks.onDataExtracted(allData.length, undefined); // Total unknown until complete
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// v4.5.0: Report data extraction complete
|
|
107
|
+
if (progressHooks?.onCurrentItem) {
|
|
108
|
+
progressHooks.onCurrentItem(`Extracted ${allData.length} rows, inferring types...`);
|
|
109
|
+
}
|
|
110
|
+
if (progressHooks?.onDataExtracted) {
|
|
111
|
+
progressHooks.onDataExtracted(allData.length, allData.length);
|
|
78
112
|
}
|
|
79
113
|
// Infer types (excluding _sheet field)
|
|
80
114
|
const fields = allData.length > 0 ? Object.keys(allData[0]).filter(k => k !== '_sheet') : [];
|
|
81
115
|
const types = this.inferFieldTypes(allData);
|
|
82
116
|
// Convert values to appropriate types
|
|
83
|
-
const convertedData = allData.map(row => {
|
|
117
|
+
const convertedData = allData.map((row, index) => {
|
|
84
118
|
const converted = {};
|
|
85
119
|
for (const [key, value] of Object.entries(row)) {
|
|
86
120
|
if (key === '_sheet') {
|
|
@@ -90,9 +124,21 @@ export class ExcelHandler extends BaseFormatHandler {
|
|
|
90
124
|
converted[key] = this.convertValue(value, types[key] || 'string');
|
|
91
125
|
}
|
|
92
126
|
}
|
|
127
|
+
// v4.5.0: Report progress every 1000 rows (avoid spam)
|
|
128
|
+
if (progressHooks?.onCurrentItem && index > 0 && index % 1000 === 0) {
|
|
129
|
+
progressHooks.onCurrentItem(`Converting types: ${index}/${allData.length} rows...`);
|
|
130
|
+
}
|
|
93
131
|
return converted;
|
|
94
132
|
});
|
|
133
|
+
// v4.5.0: Final progress - all bytes processed
|
|
134
|
+
if (progressHooks?.onBytesProcessed) {
|
|
135
|
+
progressHooks.onBytesProcessed(totalBytes);
|
|
136
|
+
}
|
|
95
137
|
const processingTime = Date.now() - startTime;
|
|
138
|
+
// v4.5.0: Report completion
|
|
139
|
+
if (progressHooks?.onCurrentItem) {
|
|
140
|
+
progressHooks.onCurrentItem(`Excel complete: ${sheetsToProcess.length} sheets, ${convertedData.length} rows`);
|
|
141
|
+
}
|
|
96
142
|
return {
|
|
97
143
|
format: this.format,
|
|
98
144
|
data: convertedData,
|
|
@@ -42,8 +42,17 @@ export class PDFHandler extends BaseFormatHandler {
|
|
|
42
42
|
}
|
|
43
43
|
async process(data, options) {
|
|
44
44
|
const startTime = Date.now();
|
|
45
|
+
const progressHooks = options.progressHooks;
|
|
45
46
|
// Convert to buffer
|
|
46
47
|
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data, 'binary');
|
|
48
|
+
const totalBytes = buffer.length;
|
|
49
|
+
// v4.5.0: Report start
|
|
50
|
+
if (progressHooks?.onBytesProcessed) {
|
|
51
|
+
progressHooks.onBytesProcessed(0);
|
|
52
|
+
}
|
|
53
|
+
if (progressHooks?.onCurrentItem) {
|
|
54
|
+
progressHooks.onCurrentItem('Loading PDF document...');
|
|
55
|
+
}
|
|
47
56
|
try {
|
|
48
57
|
// Load PDF document
|
|
49
58
|
const loadingTask = pdfjsLib.getDocument({
|
|
@@ -55,11 +64,19 @@ export class PDFHandler extends BaseFormatHandler {
|
|
|
55
64
|
// Extract metadata
|
|
56
65
|
const metadata = await pdfDoc.getMetadata();
|
|
57
66
|
const numPages = pdfDoc.numPages;
|
|
67
|
+
// v4.5.0: Report document loaded
|
|
68
|
+
if (progressHooks?.onCurrentItem) {
|
|
69
|
+
progressHooks.onCurrentItem(`Processing ${numPages} pages...`);
|
|
70
|
+
}
|
|
58
71
|
// Extract text and structure from all pages
|
|
59
72
|
const allData = [];
|
|
60
73
|
let totalTextLength = 0;
|
|
61
74
|
let detectedTables = 0;
|
|
62
75
|
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
76
|
+
// v4.5.0: Report current page
|
|
77
|
+
if (progressHooks?.onCurrentItem) {
|
|
78
|
+
progressHooks.onCurrentItem(`Processing page ${pageNum} of ${numPages}`);
|
|
79
|
+
}
|
|
63
80
|
const page = await pdfDoc.getPage(pageNum);
|
|
64
81
|
const textContent = await page.getTextContent();
|
|
65
82
|
// Extract text items with positions
|
|
@@ -96,8 +113,28 @@ export class PDFHandler extends BaseFormatHandler {
|
|
|
96
113
|
});
|
|
97
114
|
}
|
|
98
115
|
}
|
|
116
|
+
// v4.5.0: Estimate bytes processed (pages are sequential)
|
|
117
|
+
const bytesProcessed = Math.floor((pageNum / numPages) * totalBytes);
|
|
118
|
+
if (progressHooks?.onBytesProcessed) {
|
|
119
|
+
progressHooks.onBytesProcessed(bytesProcessed);
|
|
120
|
+
}
|
|
121
|
+
// v4.5.0: Report extraction progress
|
|
122
|
+
if (progressHooks?.onDataExtracted) {
|
|
123
|
+
progressHooks.onDataExtracted(allData.length, undefined); // Total unknown until complete
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// v4.5.0: Final progress - all bytes processed
|
|
127
|
+
if (progressHooks?.onBytesProcessed) {
|
|
128
|
+
progressHooks.onBytesProcessed(totalBytes);
|
|
129
|
+
}
|
|
130
|
+
if (progressHooks?.onDataExtracted) {
|
|
131
|
+
progressHooks.onDataExtracted(allData.length, allData.length);
|
|
99
132
|
}
|
|
100
133
|
const processingTime = Date.now() - startTime;
|
|
134
|
+
// v4.5.0: Report completion
|
|
135
|
+
if (progressHooks?.onCurrentItem) {
|
|
136
|
+
progressHooks.onCurrentItem(`PDF complete: ${numPages} pages, ${allData.length} items extracted`);
|
|
137
|
+
}
|
|
101
138
|
// Get all unique fields (excluding metadata fields)
|
|
102
139
|
const fields = allData.length > 0
|
|
103
140
|
? Object.keys(allData[0]).filter(k => !k.startsWith('_'))
|
|
@@ -2,6 +2,29 @@
|
|
|
2
2
|
* Types for Intelligent Import Augmentation
|
|
3
3
|
* Handles Excel, PDF, and CSV import with intelligent extraction
|
|
4
4
|
*/
|
|
5
|
+
/**
|
|
6
|
+
* Progress hooks for format handlers
|
|
7
|
+
*
|
|
8
|
+
* Handlers call these hooks to report progress during processing.
|
|
9
|
+
* This enables real-time progress tracking for any file format.
|
|
10
|
+
*/
|
|
11
|
+
export interface FormatHandlerProgressHooks {
|
|
12
|
+
/**
|
|
13
|
+
* Report bytes processed
|
|
14
|
+
* Call this as you read/parse the file
|
|
15
|
+
*/
|
|
16
|
+
onBytesProcessed?: (bytes: number) => void;
|
|
17
|
+
/**
|
|
18
|
+
* Set current processing context
|
|
19
|
+
* Examples: "Processing page 5", "Reading sheet: Q2 Sales"
|
|
20
|
+
*/
|
|
21
|
+
onCurrentItem?: (item: string) => void;
|
|
22
|
+
/**
|
|
23
|
+
* Report structured data extraction progress
|
|
24
|
+
* Examples: "Extracted 100 rows", "Parsed 50 paragraphs"
|
|
25
|
+
*/
|
|
26
|
+
onDataExtracted?: (count: number, total?: number) => void;
|
|
27
|
+
}
|
|
5
28
|
export interface FormatHandler {
|
|
6
29
|
/**
|
|
7
30
|
* Format name (e.g., 'csv', 'xlsx', 'pdf')
|
|
@@ -47,6 +70,16 @@ export interface FormatHandlerOptions {
|
|
|
47
70
|
maxRows?: number;
|
|
48
71
|
/** Whether to stream large files */
|
|
49
72
|
streaming?: boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Progress hooks (v4.5.0)
|
|
75
|
+
* Handlers call these to report progress during processing
|
|
76
|
+
*/
|
|
77
|
+
progressHooks?: FormatHandlerProgressHooks;
|
|
78
|
+
/**
|
|
79
|
+
* Total file size in bytes (v4.5.0)
|
|
80
|
+
* Used for progress percentage calculation
|
|
81
|
+
*/
|
|
82
|
+
totalBytes?: number;
|
|
50
83
|
}
|
|
51
84
|
export interface ProcessedData {
|
|
52
85
|
/** Format that was processed */
|
package/dist/brainy.d.ts
CHANGED
|
@@ -800,9 +800,27 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
800
800
|
* groupBy: 'type', // Organize by entity type
|
|
801
801
|
* preserveSource: true, // Keep original file
|
|
802
802
|
*
|
|
803
|
-
* // Progress tracking
|
|
804
|
-
* onProgress: (p) =>
|
|
803
|
+
* // Progress tracking (v4.5.0 - STANDARDIZED FOR ALL 7 FORMATS!)
|
|
804
|
+
* onProgress: (p) => {
|
|
805
|
+
* console.log(`[${p.stage}] ${p.message}`)
|
|
806
|
+
* console.log(`Entities: ${p.entities || 0}, Rels: ${p.relationships || 0}`)
|
|
807
|
+
* if (p.throughput) console.log(`Rate: ${p.throughput.toFixed(1)}/sec`)
|
|
808
|
+
* }
|
|
805
809
|
* })
|
|
810
|
+
* // THIS SAME HANDLER WORKS FOR CSV, PDF, Excel, JSON, Markdown, YAML, DOCX!
|
|
811
|
+
* ```
|
|
812
|
+
*
|
|
813
|
+
* @example Universal Progress Handler (v4.5.0)
|
|
814
|
+
* ```typescript
|
|
815
|
+
* // ONE handler for ALL 7 formats - no format-specific code needed!
|
|
816
|
+
* const universalProgress = (p) => {
|
|
817
|
+
* updateUI(p.stage, p.message, p.entities, p.relationships)
|
|
818
|
+
* }
|
|
819
|
+
*
|
|
820
|
+
* await brain.import(csvBuffer, { onProgress: universalProgress })
|
|
821
|
+
* await brain.import(pdfBuffer, { onProgress: universalProgress })
|
|
822
|
+
* await brain.import(excelBuffer, { onProgress: universalProgress })
|
|
823
|
+
* // Works for JSON, Markdown, YAML, DOCX too!
|
|
806
824
|
* ```
|
|
807
825
|
*
|
|
808
826
|
* @example Performance Tuning (Large Files)
|
|
@@ -827,6 +845,7 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
827
845
|
*
|
|
828
846
|
* @see {@link https://brainy.dev/docs/api/import API Documentation}
|
|
829
847
|
* @see {@link https://brainy.dev/docs/guides/migrating-to-v4 Migration Guide}
|
|
848
|
+
* @see {@link https://brainy.dev/docs/guides/standard-import-progress Standard Progress API (v4.5.0)}
|
|
830
849
|
*
|
|
831
850
|
* @remarks
|
|
832
851
|
* **⚠️ Breaking Changes from v3.x:**
|
|
@@ -857,7 +876,7 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
857
876
|
* - Reduced confusion (removed redundant options)
|
|
858
877
|
*/
|
|
859
878
|
import(source: Buffer | string | object, options?: {
|
|
860
|
-
format?: 'excel' | 'pdf' | 'csv' | 'json' | 'markdown';
|
|
879
|
+
format?: 'excel' | 'pdf' | 'csv' | 'json' | 'markdown' | 'yaml' | 'docx';
|
|
861
880
|
vfsPath?: string;
|
|
862
881
|
groupBy?: 'type' | 'sheet' | 'flat' | 'custom';
|
|
863
882
|
customGrouping?: (entity: any) => string;
|
package/dist/brainy.js
CHANGED
|
@@ -846,6 +846,13 @@ export class Brainy {
|
|
|
846
846
|
if (params.service) {
|
|
847
847
|
filter.service = params.service;
|
|
848
848
|
}
|
|
849
|
+
// v4.5.1: Exclude VFS relationships by default (same pattern as brain.find())
|
|
850
|
+
// VFS relationships have metadata.isVFS = true
|
|
851
|
+
// Only include VFS relationships if explicitly requested
|
|
852
|
+
if (params.includeVFS !== true) {
|
|
853
|
+
filter.metadata = filter.metadata || {};
|
|
854
|
+
filter.metadata.isVFS = { notEquals: true };
|
|
855
|
+
}
|
|
849
856
|
// Fetch from storage with pagination at storage layer (efficient!)
|
|
850
857
|
const result = await this.storage.getVerbs({
|
|
851
858
|
pagination: {
|
|
@@ -1757,9 +1764,27 @@ export class Brainy {
|
|
|
1757
1764
|
* groupBy: 'type', // Organize by entity type
|
|
1758
1765
|
* preserveSource: true, // Keep original file
|
|
1759
1766
|
*
|
|
1760
|
-
* // Progress tracking
|
|
1761
|
-
* onProgress: (p) =>
|
|
1767
|
+
* // Progress tracking (v4.5.0 - STANDARDIZED FOR ALL 7 FORMATS!)
|
|
1768
|
+
* onProgress: (p) => {
|
|
1769
|
+
* console.log(`[${p.stage}] ${p.message}`)
|
|
1770
|
+
* console.log(`Entities: ${p.entities || 0}, Rels: ${p.relationships || 0}`)
|
|
1771
|
+
* if (p.throughput) console.log(`Rate: ${p.throughput.toFixed(1)}/sec`)
|
|
1772
|
+
* }
|
|
1762
1773
|
* })
|
|
1774
|
+
* // THIS SAME HANDLER WORKS FOR CSV, PDF, Excel, JSON, Markdown, YAML, DOCX!
|
|
1775
|
+
* ```
|
|
1776
|
+
*
|
|
1777
|
+
* @example Universal Progress Handler (v4.5.0)
|
|
1778
|
+
* ```typescript
|
|
1779
|
+
* // ONE handler for ALL 7 formats - no format-specific code needed!
|
|
1780
|
+
* const universalProgress = (p) => {
|
|
1781
|
+
* updateUI(p.stage, p.message, p.entities, p.relationships)
|
|
1782
|
+
* }
|
|
1783
|
+
*
|
|
1784
|
+
* await brain.import(csvBuffer, { onProgress: universalProgress })
|
|
1785
|
+
* await brain.import(pdfBuffer, { onProgress: universalProgress })
|
|
1786
|
+
* await brain.import(excelBuffer, { onProgress: universalProgress })
|
|
1787
|
+
* // Works for JSON, Markdown, YAML, DOCX too!
|
|
1763
1788
|
* ```
|
|
1764
1789
|
*
|
|
1765
1790
|
* @example Performance Tuning (Large Files)
|
|
@@ -1784,6 +1809,7 @@ export class Brainy {
|
|
|
1784
1809
|
*
|
|
1785
1810
|
* @see {@link https://brainy.dev/docs/api/import API Documentation}
|
|
1786
1811
|
* @see {@link https://brainy.dev/docs/guides/migrating-to-v4 Migration Guide}
|
|
1812
|
+
* @see {@link https://brainy.dev/docs/guides/standard-import-progress Standard Progress API (v4.5.0)}
|
|
1787
1813
|
*
|
|
1788
1814
|
* @remarks
|
|
1789
1815
|
* **⚠️ Breaking Changes from v3.x:**
|
|
@@ -12,6 +12,8 @@ interface AddOptions extends CoreOptions {
|
|
|
12
12
|
id?: string;
|
|
13
13
|
metadata?: string;
|
|
14
14
|
type?: string;
|
|
15
|
+
confidence?: string;
|
|
16
|
+
weight?: string;
|
|
15
17
|
}
|
|
16
18
|
interface SearchOptions extends CoreOptions {
|
|
17
19
|
limit?: string;
|
|
@@ -25,6 +27,7 @@ interface SearchOptions extends CoreOptions {
|
|
|
25
27
|
via?: string;
|
|
26
28
|
explain?: boolean;
|
|
27
29
|
includeRelations?: boolean;
|
|
30
|
+
includeVfs?: boolean;
|
|
28
31
|
fusion?: string;
|
|
29
32
|
vectorWeight?: string;
|
|
30
33
|
graphWeight?: string;
|
|
@@ -109,23 +109,37 @@ export const coreCommands = {
|
|
|
109
109
|
spinner.text = `Using detected type: ${nounType}`;
|
|
110
110
|
}
|
|
111
111
|
// Add with explicit type
|
|
112
|
-
const
|
|
112
|
+
const addParams = {
|
|
113
113
|
data: text,
|
|
114
114
|
type: nounType,
|
|
115
115
|
metadata
|
|
116
|
-
}
|
|
116
|
+
};
|
|
117
|
+
// v4.3.x: Add confidence and weight if provided
|
|
118
|
+
if (options.confidence) {
|
|
119
|
+
addParams.confidence = parseFloat(options.confidence);
|
|
120
|
+
}
|
|
121
|
+
if (options.weight) {
|
|
122
|
+
addParams.weight = parseFloat(options.weight);
|
|
123
|
+
}
|
|
124
|
+
const result = await brain.add(addParams);
|
|
117
125
|
spinner.succeed('Added successfully');
|
|
118
126
|
if (!options.json) {
|
|
119
127
|
console.log(chalk.green(`✓ Added with ID: ${result}`));
|
|
120
128
|
if (options.type) {
|
|
121
129
|
console.log(chalk.dim(` Type: ${options.type}`));
|
|
122
130
|
}
|
|
131
|
+
if (options.confidence) {
|
|
132
|
+
console.log(chalk.dim(` Confidence: ${options.confidence}`));
|
|
133
|
+
}
|
|
134
|
+
if (options.weight) {
|
|
135
|
+
console.log(chalk.dim(` Weight: ${options.weight}`));
|
|
136
|
+
}
|
|
123
137
|
if (Object.keys(metadata).length > 0) {
|
|
124
138
|
console.log(chalk.dim(` Metadata: ${JSON.stringify(metadata)}`));
|
|
125
139
|
}
|
|
126
140
|
}
|
|
127
141
|
else {
|
|
128
|
-
formatOutput({ id: result, metadata }, options);
|
|
142
|
+
formatOutput({ id: result, metadata, confidence: addParams.confidence, weight: addParams.weight }, options);
|
|
129
143
|
}
|
|
130
144
|
}
|
|
131
145
|
catch (error) {
|
|
@@ -260,6 +274,10 @@ export const coreCommands = {
|
|
|
260
274
|
if (options.includeRelations) {
|
|
261
275
|
searchParams.includeRelations = true;
|
|
262
276
|
}
|
|
277
|
+
// Include VFS files (v4.4.0 - find excludes VFS by default)
|
|
278
|
+
if (options.includeVfs) {
|
|
279
|
+
searchParams.includeVFS = true;
|
|
280
|
+
}
|
|
263
281
|
// Triple Intelligence Fusion - custom weighting
|
|
264
282
|
if (options.fusion || options.vectorWeight || options.graphWeight || options.fieldWeight) {
|
|
265
283
|
searchParams.fusion = {
|
|
@@ -120,19 +120,25 @@ export const importCommands = {
|
|
|
120
120
|
}]);
|
|
121
121
|
options.recursive = answer.recursive;
|
|
122
122
|
}
|
|
123
|
-
spinner = ora('Initializing
|
|
123
|
+
spinner = ora('Initializing import...').start();
|
|
124
124
|
const brain = getBrainy();
|
|
125
|
-
// Load UniversalImportAPI
|
|
126
|
-
const { UniversalImportAPI } = await import('../../api/UniversalImportAPI.js');
|
|
127
|
-
const universalImport = new UniversalImportAPI(brain);
|
|
128
|
-
await universalImport.init();
|
|
129
|
-
spinner.text = 'Processing import...';
|
|
130
125
|
// Handle different source types
|
|
131
126
|
let result;
|
|
132
127
|
if (isURL) {
|
|
133
|
-
// URL import
|
|
128
|
+
// URL import - fetch first
|
|
134
129
|
spinner.text = `Fetching from ${source}...`;
|
|
135
|
-
|
|
130
|
+
const response = await fetch(source);
|
|
131
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
132
|
+
spinner.text = 'Importing...';
|
|
133
|
+
result = await brain.import(buffer, {
|
|
134
|
+
enableNeuralExtraction: options.extractEntities !== false,
|
|
135
|
+
enableRelationshipInference: options.detectRelationships !== false,
|
|
136
|
+
enableConceptExtraction: options.extractConcepts || false,
|
|
137
|
+
confidenceThreshold: options.confidence ? parseFloat(options.confidence) : 0.6,
|
|
138
|
+
onProgress: options.progress ? (p) => {
|
|
139
|
+
spinner.text = `${p.message}${p.entities ? ` (${p.entities} entities)` : ''}`;
|
|
140
|
+
} : undefined
|
|
141
|
+
});
|
|
136
142
|
}
|
|
137
143
|
else if (isDirectory) {
|
|
138
144
|
// Directory import - process each file
|
|
@@ -163,31 +169,42 @@ export const importCommands = {
|
|
|
163
169
|
};
|
|
164
170
|
await collectFiles(source);
|
|
165
171
|
spinner.succeed(`Found ${files.length} files`);
|
|
166
|
-
// Process files
|
|
167
|
-
const batchSize = options.batchSize ? parseInt(options.batchSize) : 100;
|
|
172
|
+
// Process files with progress
|
|
168
173
|
let totalEntities = 0;
|
|
169
174
|
let totalRelationships = 0;
|
|
170
175
|
let filesProcessed = 0;
|
|
171
|
-
for (
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
}
|
|
176
|
-
for (const file of batch) {
|
|
177
|
-
try {
|
|
178
|
-
const fileResult = await universalImport.importFromFile(file);
|
|
179
|
-
totalEntities += fileResult.stats.entitiesCreated;
|
|
180
|
-
totalRelationships += fileResult.stats.relationshipsCreated;
|
|
181
|
-
filesProcessed++;
|
|
176
|
+
for (const file of files) {
|
|
177
|
+
try {
|
|
178
|
+
if (options.progress) {
|
|
179
|
+
spinner = ora(`[${filesProcessed + 1}/${files.length}] Importing ${file}...`).start();
|
|
182
180
|
}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
181
|
+
const fileResult = await brain.import(file, {
|
|
182
|
+
enableNeuralExtraction: options.extractEntities !== false,
|
|
183
|
+
enableRelationshipInference: options.detectRelationships !== false,
|
|
184
|
+
enableConceptExtraction: options.extractConcepts || false,
|
|
185
|
+
confidenceThreshold: options.confidence ? parseFloat(options.confidence) : 0.6,
|
|
186
|
+
onProgress: options.progress ? (p) => {
|
|
187
|
+
spinner.text = `[${filesProcessed + 1}/${files.length}] ${p.message}`;
|
|
188
|
+
} : undefined
|
|
189
|
+
});
|
|
190
|
+
totalEntities += fileResult.entities.length;
|
|
191
|
+
totalRelationships += fileResult.relationships.length;
|
|
192
|
+
filesProcessed++;
|
|
193
|
+
if (options.progress) {
|
|
194
|
+
spinner.succeed(`[${filesProcessed}/${files.length}] ${file}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch (error) {
|
|
198
|
+
if (options.verbose) {
|
|
199
|
+
if (spinner)
|
|
200
|
+
spinner.fail(`Failed: ${file}`);
|
|
201
|
+
console.log(chalk.yellow(`⚠️ ${error.message}`));
|
|
187
202
|
}
|
|
188
203
|
}
|
|
189
204
|
}
|
|
190
205
|
result = {
|
|
206
|
+
entities: [],
|
|
207
|
+
relationships: [],
|
|
191
208
|
stats: {
|
|
192
209
|
filesProcessed,
|
|
193
210
|
entitiesCreated: totalEntities,
|
|
@@ -195,11 +212,23 @@ export const importCommands = {
|
|
|
195
212
|
totalProcessed: filesProcessed
|
|
196
213
|
}
|
|
197
214
|
};
|
|
198
|
-
spinner.succeed(
|
|
215
|
+
spinner = ora().succeed(`Directory import complete: ${filesProcessed} files`);
|
|
199
216
|
}
|
|
200
217
|
else {
|
|
201
|
-
// File import
|
|
202
|
-
result = await
|
|
218
|
+
// File import with progress
|
|
219
|
+
result = await brain.import(source, {
|
|
220
|
+
format: options.format,
|
|
221
|
+
enableNeuralExtraction: options.extractEntities !== false,
|
|
222
|
+
enableRelationshipInference: options.detectRelationships !== false,
|
|
223
|
+
enableConceptExtraction: options.extractConcepts || false,
|
|
224
|
+
confidenceThreshold: options.confidence ? parseFloat(options.confidence) : 0.6,
|
|
225
|
+
onProgress: options.progress ? (p) => {
|
|
226
|
+
spinner.text = `${p.message}${p.entities ? ` (${p.entities} entities, ${p.relationships || 0} relationships)` : ''}`;
|
|
227
|
+
if (p.throughput && p.eta) {
|
|
228
|
+
spinner.text += ` - ${p.throughput.toFixed(1)}/sec, ETA: ${Math.round(p.eta / 1000)}s`;
|
|
229
|
+
}
|
|
230
|
+
} : undefined
|
|
231
|
+
});
|
|
203
232
|
}
|
|
204
233
|
spinner.succeed('Import complete');
|
|
205
234
|
// Post-processing: extract concepts if requested
|
|
@@ -270,15 +299,21 @@ export const importCommands = {
|
|
|
270
299
|
if (!options.json && !options.quiet) {
|
|
271
300
|
console.log(chalk.cyan('\n📊 Import Results:\n'));
|
|
272
301
|
console.log(chalk.bold('Statistics:'));
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
302
|
+
const entitiesCount = result.stats?.entitiesCreated || result.entities?.length || 0;
|
|
303
|
+
const relationshipsCount = result.stats?.relationshipsCreated || result.relationships?.length || 0;
|
|
304
|
+
console.log(` Entities created: ${chalk.green(entitiesCount)}`);
|
|
305
|
+
if (relationshipsCount > 0) {
|
|
306
|
+
console.log(` Relationships created: ${chalk.green(relationshipsCount)}`);
|
|
276
307
|
}
|
|
277
|
-
if (result.stats
|
|
308
|
+
if (result.stats?.filesProcessed) {
|
|
278
309
|
console.log(` Files processed: ${chalk.green(result.stats.filesProcessed)}`);
|
|
279
310
|
}
|
|
280
|
-
|
|
281
|
-
|
|
311
|
+
if (result.stats?.averageConfidence) {
|
|
312
|
+
console.log(` Average confidence: ${chalk.yellow((result.stats.averageConfidence * 100).toFixed(1))}%`);
|
|
313
|
+
}
|
|
314
|
+
if (result.stats?.processingTimeMs) {
|
|
315
|
+
console.log(` Processing time: ${chalk.dim(result.stats.processingTimeMs)}ms`);
|
|
316
|
+
}
|
|
282
317
|
if (options.verbose && result.entities && result.entities.length > 0) {
|
|
283
318
|
console.log(chalk.bold('\n📦 Imported Entities (first 10):'));
|
|
284
319
|
result.entities.slice(0, 10).forEach((entity, i) => {
|
|
@@ -53,10 +53,44 @@ export class SmartCSVImporter {
|
|
|
53
53
|
...options
|
|
54
54
|
};
|
|
55
55
|
// Parse CSV using existing handler
|
|
56
|
+
// v4.5.0: Pass progress hooks to handler for file parsing progress
|
|
56
57
|
const processedData = await this.csvHandler.process(buffer, {
|
|
57
58
|
...options,
|
|
58
59
|
csvDelimiter: opts.csvDelimiter,
|
|
59
|
-
csvHeaders: opts.csvHeaders
|
|
60
|
+
csvHeaders: opts.csvHeaders,
|
|
61
|
+
totalBytes: buffer.length,
|
|
62
|
+
progressHooks: {
|
|
63
|
+
onBytesProcessed: (bytes) => {
|
|
64
|
+
// Handler reports bytes processed during parsing
|
|
65
|
+
opts.onProgress?.({
|
|
66
|
+
processed: 0,
|
|
67
|
+
total: 0,
|
|
68
|
+
entities: 0,
|
|
69
|
+
relationships: 0,
|
|
70
|
+
phase: `Parsing CSV (${Math.round((bytes / buffer.length) * 100)}%)`
|
|
71
|
+
});
|
|
72
|
+
},
|
|
73
|
+
onCurrentItem: (message) => {
|
|
74
|
+
// Handler reports current processing step
|
|
75
|
+
opts.onProgress?.({
|
|
76
|
+
processed: 0,
|
|
77
|
+
total: 0,
|
|
78
|
+
entities: 0,
|
|
79
|
+
relationships: 0,
|
|
80
|
+
phase: message
|
|
81
|
+
});
|
|
82
|
+
},
|
|
83
|
+
onDataExtracted: (count, total) => {
|
|
84
|
+
// Handler reports rows extracted
|
|
85
|
+
opts.onProgress?.({
|
|
86
|
+
processed: 0,
|
|
87
|
+
total: total || count,
|
|
88
|
+
entities: 0,
|
|
89
|
+
relationships: 0,
|
|
90
|
+
phase: `Extracted ${count} rows`
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
60
94
|
});
|
|
61
95
|
const rows = processedData.data;
|
|
62
96
|
if (rows.length === 0) {
|
|
@@ -54,10 +54,22 @@ export class SmartDOCXImporter {
|
|
|
54
54
|
if (!this.mammothLoaded) {
|
|
55
55
|
await this.init();
|
|
56
56
|
}
|
|
57
|
+
// v4.5.0: Report parsing start
|
|
58
|
+
options.onProgress?.({
|
|
59
|
+
processed: 0,
|
|
60
|
+
entities: 0,
|
|
61
|
+
relationships: 0
|
|
62
|
+
});
|
|
57
63
|
// Extract raw text for entity extraction
|
|
58
64
|
const textResult = await mammoth.extractRawText({ buffer });
|
|
59
65
|
// Extract HTML for structure analysis (headings, tables)
|
|
60
66
|
const htmlResult = await mammoth.convertToHtml({ buffer });
|
|
67
|
+
// v4.5.0: Report parsing complete
|
|
68
|
+
options.onProgress?.({
|
|
69
|
+
processed: 0,
|
|
70
|
+
entities: 0,
|
|
71
|
+
relationships: 0
|
|
72
|
+
});
|
|
61
73
|
// Process the document
|
|
62
74
|
const result = await this.extractFromContent(textResult.value, htmlResult.value, options);
|
|
63
75
|
result.processingTime = Date.now() - startTime;
|