@soulcraft/brainy 3.27.1 → 3.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/brainy.d.ts +50 -0
- package/dist/brainy.js +36 -0
- package/dist/import/EntityDeduplicator.d.ts +84 -0
- package/dist/import/EntityDeduplicator.js +255 -0
- package/dist/import/FormatDetector.d.ts +65 -0
- package/dist/import/FormatDetector.js +263 -0
- package/dist/import/ImportCoordinator.d.ts +160 -0
- package/dist/import/ImportCoordinator.js +498 -0
- package/dist/import/ImportHistory.d.ts +92 -0
- package/dist/import/ImportHistory.js +183 -0
- package/dist/import/index.d.ts +16 -0
- package/dist/import/index.js +14 -0
- package/dist/importers/SmartCSVImporter.d.ts +136 -0
- package/dist/importers/SmartCSVImporter.js +308 -0
- package/dist/importers/SmartExcelImporter.d.ts +131 -0
- package/dist/importers/SmartExcelImporter.js +302 -0
- package/dist/importers/SmartImportOrchestrator.d.ts +125 -0
- package/dist/importers/SmartImportOrchestrator.js +531 -0
- package/dist/importers/SmartJSONImporter.d.ts +135 -0
- package/dist/importers/SmartJSONImporter.js +325 -0
- package/dist/importers/SmartMarkdownImporter.d.ts +159 -0
- package/dist/importers/SmartMarkdownImporter.js +369 -0
- package/dist/importers/SmartPDFImporter.d.ts +154 -0
- package/dist/importers/SmartPDFImporter.js +337 -0
- package/dist/importers/VFSStructureGenerator.d.ts +82 -0
- package/dist/importers/VFSStructureGenerator.js +260 -0
- package/dist/importers/index.d.ts +28 -0
- package/dist/importers/index.js +29 -0
- package/package.json +1 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Excel Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from Excel files using:
|
|
5
|
+
* - NeuralEntityExtractor for entity extraction
|
|
6
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
7
|
+
* - brain.extractConcepts() for tagging
|
|
8
|
+
*
|
|
9
|
+
* NO MOCKS - Production-ready implementation
|
|
10
|
+
*/
|
|
11
|
+
import { Brainy } from '../brainy.js';
|
|
12
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
13
|
+
import type { FormatHandlerOptions } from '../augmentations/intelligentImport/types.js';
|
|
14
|
+
export interface SmartExcelOptions extends FormatHandlerOptions {
|
|
15
|
+
/** Enable neural entity extraction */
|
|
16
|
+
enableNeuralExtraction?: boolean;
|
|
17
|
+
/** Enable relationship inference from text */
|
|
18
|
+
enableRelationshipInference?: boolean;
|
|
19
|
+
/** Enable concept extraction for tagging */
|
|
20
|
+
enableConceptExtraction?: boolean;
|
|
21
|
+
/** Confidence threshold for entities (0-1) */
|
|
22
|
+
confidenceThreshold?: number;
|
|
23
|
+
/** Column name patterns to detect */
|
|
24
|
+
termColumn?: string;
|
|
25
|
+
definitionColumn?: string;
|
|
26
|
+
typeColumn?: string;
|
|
27
|
+
relatedColumn?: string;
|
|
28
|
+
/** Progress callback */
|
|
29
|
+
onProgress?: (stats: {
|
|
30
|
+
processed: number;
|
|
31
|
+
total: number;
|
|
32
|
+
entities: number;
|
|
33
|
+
relationships: number;
|
|
34
|
+
}) => void;
|
|
35
|
+
}
|
|
36
|
+
export interface ExtractedRow {
|
|
37
|
+
/** Main entity from this row */
|
|
38
|
+
entity: {
|
|
39
|
+
id: string;
|
|
40
|
+
name: string;
|
|
41
|
+
type: NounType;
|
|
42
|
+
description: string;
|
|
43
|
+
confidence: number;
|
|
44
|
+
metadata: Record<string, any>;
|
|
45
|
+
};
|
|
46
|
+
/** Additional entities extracted from definition */
|
|
47
|
+
relatedEntities: Array<{
|
|
48
|
+
name: string;
|
|
49
|
+
type: NounType;
|
|
50
|
+
confidence: number;
|
|
51
|
+
}>;
|
|
52
|
+
/** Inferred relationships */
|
|
53
|
+
relationships: Array<{
|
|
54
|
+
from: string;
|
|
55
|
+
to: string;
|
|
56
|
+
type: VerbType;
|
|
57
|
+
confidence: number;
|
|
58
|
+
evidence: string;
|
|
59
|
+
}>;
|
|
60
|
+
/** Extracted concepts */
|
|
61
|
+
concepts?: string[];
|
|
62
|
+
}
|
|
63
|
+
export interface SmartExcelResult {
|
|
64
|
+
/** Total rows processed */
|
|
65
|
+
rowsProcessed: number;
|
|
66
|
+
/** Entities extracted (includes main + related) */
|
|
67
|
+
entitiesExtracted: number;
|
|
68
|
+
/** Relationships inferred */
|
|
69
|
+
relationshipsInferred: number;
|
|
70
|
+
/** All extracted data */
|
|
71
|
+
rows: ExtractedRow[];
|
|
72
|
+
/** Entity ID mapping (name -> ID) */
|
|
73
|
+
entityMap: Map<string, string>;
|
|
74
|
+
/** Processing time in ms */
|
|
75
|
+
processingTime: number;
|
|
76
|
+
/** Extraction statistics */
|
|
77
|
+
stats: {
|
|
78
|
+
byType: Record<string, number>;
|
|
79
|
+
byConfidence: {
|
|
80
|
+
high: number;
|
|
81
|
+
medium: number;
|
|
82
|
+
low: number;
|
|
83
|
+
};
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* SmartExcelImporter - Extracts structured knowledge from Excel files
|
|
88
|
+
*/
|
|
89
|
+
export declare class SmartExcelImporter {
|
|
90
|
+
private brain;
|
|
91
|
+
private extractor;
|
|
92
|
+
private nlp;
|
|
93
|
+
private excelHandler;
|
|
94
|
+
constructor(brain: Brainy);
|
|
95
|
+
/**
|
|
96
|
+
* Initialize the importer
|
|
97
|
+
*/
|
|
98
|
+
init(): Promise<void>;
|
|
99
|
+
/**
|
|
100
|
+
* Extract entities and relationships from Excel file
|
|
101
|
+
*/
|
|
102
|
+
extract(buffer: Buffer, options?: SmartExcelOptions): Promise<SmartExcelResult>;
|
|
103
|
+
/**
|
|
104
|
+
* Detect column names from first row
|
|
105
|
+
*/
|
|
106
|
+
private detectColumns;
|
|
107
|
+
/**
|
|
108
|
+
* Get value from row using column name
|
|
109
|
+
*/
|
|
110
|
+
private getColumnValue;
|
|
111
|
+
/**
|
|
112
|
+
* Map type string to NounType
|
|
113
|
+
*/
|
|
114
|
+
private mapTypeString;
|
|
115
|
+
/**
|
|
116
|
+
* Infer relationship type from context
|
|
117
|
+
*/
|
|
118
|
+
private inferRelationship;
|
|
119
|
+
/**
|
|
120
|
+
* Generate consistent entity ID from name
|
|
121
|
+
*/
|
|
122
|
+
private generateEntityId;
|
|
123
|
+
/**
|
|
124
|
+
* Update statistics
|
|
125
|
+
*/
|
|
126
|
+
private updateStats;
|
|
127
|
+
/**
|
|
128
|
+
* Create empty result
|
|
129
|
+
*/
|
|
130
|
+
private emptyResult;
|
|
131
|
+
}
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Excel Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from Excel files using:
|
|
5
|
+
* - NeuralEntityExtractor for entity extraction
|
|
6
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
7
|
+
* - brain.extractConcepts() for tagging
|
|
8
|
+
*
|
|
9
|
+
* NO MOCKS - Production-ready implementation
|
|
10
|
+
*/
|
|
11
|
+
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
12
|
+
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
13
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
14
|
+
import { ExcelHandler } from '../augmentations/intelligentImport/handlers/excelHandler.js';
|
|
15
|
+
/**
|
|
16
|
+
* SmartExcelImporter - Extracts structured knowledge from Excel files
|
|
17
|
+
*/
|
|
18
|
+
export class SmartExcelImporter {
|
|
19
|
+
constructor(brain) {
|
|
20
|
+
this.brain = brain;
|
|
21
|
+
this.extractor = new NeuralEntityExtractor(brain);
|
|
22
|
+
this.nlp = new NaturalLanguageProcessor(brain);
|
|
23
|
+
this.excelHandler = new ExcelHandler();
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Initialize the importer
|
|
27
|
+
*/
|
|
28
|
+
async init() {
|
|
29
|
+
await this.nlp.init();
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Extract entities and relationships from Excel file
|
|
33
|
+
*/
|
|
34
|
+
async extract(buffer, options = {}) {
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
// Set defaults
|
|
37
|
+
const opts = {
|
|
38
|
+
enableNeuralExtraction: true,
|
|
39
|
+
enableRelationshipInference: true,
|
|
40
|
+
enableConceptExtraction: true,
|
|
41
|
+
confidenceThreshold: 0.6,
|
|
42
|
+
termColumn: 'term|name|title|concept',
|
|
43
|
+
definitionColumn: 'definition|description|desc|details',
|
|
44
|
+
typeColumn: 'type|category|kind',
|
|
45
|
+
relatedColumn: 'related|see also|links',
|
|
46
|
+
onProgress: () => { },
|
|
47
|
+
...options
|
|
48
|
+
};
|
|
49
|
+
// Parse Excel using existing handler
|
|
50
|
+
const processedData = await this.excelHandler.process(buffer, options);
|
|
51
|
+
const rows = processedData.data;
|
|
52
|
+
if (rows.length === 0) {
|
|
53
|
+
return this.emptyResult(startTime);
|
|
54
|
+
}
|
|
55
|
+
// Detect column names
|
|
56
|
+
const columns = this.detectColumns(rows[0], opts);
|
|
57
|
+
// Process each row
|
|
58
|
+
const extractedRows = [];
|
|
59
|
+
const entityMap = new Map();
|
|
60
|
+
const stats = {
|
|
61
|
+
byType: {},
|
|
62
|
+
byConfidence: { high: 0, medium: 0, low: 0 }
|
|
63
|
+
};
|
|
64
|
+
for (let i = 0; i < rows.length; i++) {
|
|
65
|
+
const row = rows[i];
|
|
66
|
+
// Extract data from row
|
|
67
|
+
const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
|
|
68
|
+
const definition = this.getColumnValue(row, columns.definition) || '';
|
|
69
|
+
const type = this.getColumnValue(row, columns.type);
|
|
70
|
+
const relatedTerms = this.getColumnValue(row, columns.related);
|
|
71
|
+
// Extract entities from definition
|
|
72
|
+
let relatedEntities = [];
|
|
73
|
+
if (opts.enableNeuralExtraction && definition) {
|
|
74
|
+
relatedEntities = await this.extractor.extract(definition, {
|
|
75
|
+
confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
|
|
76
|
+
neuralMatching: true,
|
|
77
|
+
cache: { enabled: true }
|
|
78
|
+
});
|
|
79
|
+
// Filter out the main term from related entities
|
|
80
|
+
relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
|
|
81
|
+
}
|
|
82
|
+
// Determine main entity type
|
|
83
|
+
const mainEntityType = type ?
|
|
84
|
+
this.mapTypeString(type) :
|
|
85
|
+
(relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
|
|
86
|
+
// Generate entity ID
|
|
87
|
+
const entityId = this.generateEntityId(term);
|
|
88
|
+
entityMap.set(term.toLowerCase(), entityId);
|
|
89
|
+
// Extract concepts
|
|
90
|
+
let concepts = [];
|
|
91
|
+
if (opts.enableConceptExtraction && definition) {
|
|
92
|
+
try {
|
|
93
|
+
concepts = await this.brain.extractConcepts(definition, { limit: 10 });
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
// Concept extraction is optional
|
|
97
|
+
concepts = [];
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Create main entity
|
|
101
|
+
const mainEntity = {
|
|
102
|
+
id: entityId,
|
|
103
|
+
name: term,
|
|
104
|
+
type: mainEntityType,
|
|
105
|
+
description: definition,
|
|
106
|
+
confidence: 0.95, // Main entity from row has high confidence
|
|
107
|
+
metadata: {
|
|
108
|
+
source: 'excel',
|
|
109
|
+
row: i + 1,
|
|
110
|
+
originalData: row,
|
|
111
|
+
concepts,
|
|
112
|
+
extractedAt: Date.now()
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
// Track statistics
|
|
116
|
+
this.updateStats(stats, mainEntityType, mainEntity.confidence);
|
|
117
|
+
// Infer relationships
|
|
118
|
+
const relationships = [];
|
|
119
|
+
if (opts.enableRelationshipInference) {
|
|
120
|
+
// Extract relationships from definition text
|
|
121
|
+
for (const relEntity of relatedEntities) {
|
|
122
|
+
const verbType = await this.inferRelationship(term, relEntity.text, definition);
|
|
123
|
+
relationships.push({
|
|
124
|
+
from: entityId,
|
|
125
|
+
to: relEntity.text, // Use entity name directly, will be resolved later
|
|
126
|
+
type: verbType,
|
|
127
|
+
confidence: relEntity.confidence,
|
|
128
|
+
evidence: `Extracted from: "${definition.substring(0, 100)}..."`
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
// Parse explicit "Related Terms" column
|
|
132
|
+
if (relatedTerms) {
|
|
133
|
+
const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
|
|
134
|
+
for (const relTerm of terms) {
|
|
135
|
+
// Ensure we don't create self-relationships
|
|
136
|
+
if (relTerm.toLowerCase() !== term.toLowerCase()) {
|
|
137
|
+
relationships.push({
|
|
138
|
+
from: entityId,
|
|
139
|
+
to: relTerm, // Use term name directly
|
|
140
|
+
type: VerbType.RelatedTo,
|
|
141
|
+
confidence: 0.9, // Explicit relationships have high confidence
|
|
142
|
+
evidence: `Explicitly listed in "Related" column`
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Add extracted row
|
|
149
|
+
extractedRows.push({
|
|
150
|
+
entity: mainEntity,
|
|
151
|
+
relatedEntities: relatedEntities.map(e => ({
|
|
152
|
+
name: e.text,
|
|
153
|
+
type: e.type,
|
|
154
|
+
confidence: e.confidence
|
|
155
|
+
})),
|
|
156
|
+
relationships,
|
|
157
|
+
concepts
|
|
158
|
+
});
|
|
159
|
+
// Report progress
|
|
160
|
+
opts.onProgress({
|
|
161
|
+
processed: i + 1,
|
|
162
|
+
total: rows.length,
|
|
163
|
+
entities: extractedRows.length + relatedEntities.length,
|
|
164
|
+
relationships: relationships.length
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
rowsProcessed: rows.length,
|
|
169
|
+
entitiesExtracted: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
|
|
170
|
+
relationshipsInferred: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
|
|
171
|
+
rows: extractedRows,
|
|
172
|
+
entityMap,
|
|
173
|
+
processingTime: Date.now() - startTime,
|
|
174
|
+
stats
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Detect column names from first row
|
|
179
|
+
*/
|
|
180
|
+
detectColumns(firstRow, options) {
|
|
181
|
+
const columnNames = Object.keys(firstRow);
|
|
182
|
+
const matchColumn = (pattern) => {
|
|
183
|
+
const regex = new RegExp(pattern, 'i');
|
|
184
|
+
return columnNames.find(col => regex.test(col)) || null;
|
|
185
|
+
};
|
|
186
|
+
return {
|
|
187
|
+
term: matchColumn(options.termColumn || 'term|name'),
|
|
188
|
+
definition: matchColumn(options.definitionColumn || 'definition|description'),
|
|
189
|
+
type: matchColumn(options.typeColumn || 'type|category'),
|
|
190
|
+
related: matchColumn(options.relatedColumn || 'related|see also')
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Get value from row using column name
|
|
195
|
+
*/
|
|
196
|
+
getColumnValue(row, columnName) {
|
|
197
|
+
if (!columnName)
|
|
198
|
+
return '';
|
|
199
|
+
const value = row[columnName];
|
|
200
|
+
if (value === null || value === undefined)
|
|
201
|
+
return '';
|
|
202
|
+
return String(value).trim();
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Map type string to NounType
|
|
206
|
+
*/
|
|
207
|
+
mapTypeString(typeString) {
|
|
208
|
+
const normalized = typeString.toLowerCase().trim();
|
|
209
|
+
const mapping = {
|
|
210
|
+
'person': NounType.Person,
|
|
211
|
+
'character': NounType.Person,
|
|
212
|
+
'people': NounType.Person,
|
|
213
|
+
'place': NounType.Location,
|
|
214
|
+
'location': NounType.Location,
|
|
215
|
+
'geography': NounType.Location,
|
|
216
|
+
'organization': NounType.Organization,
|
|
217
|
+
'org': NounType.Organization,
|
|
218
|
+
'company': NounType.Organization,
|
|
219
|
+
'concept': NounType.Concept,
|
|
220
|
+
'idea': NounType.Concept,
|
|
221
|
+
'theory': NounType.Concept,
|
|
222
|
+
'event': NounType.Event,
|
|
223
|
+
'occurrence': NounType.Event,
|
|
224
|
+
'product': NounType.Product,
|
|
225
|
+
'item': NounType.Product,
|
|
226
|
+
'thing': NounType.Thing,
|
|
227
|
+
'document': NounType.Document,
|
|
228
|
+
'file': NounType.File,
|
|
229
|
+
'project': NounType.Project
|
|
230
|
+
};
|
|
231
|
+
return mapping[normalized] || NounType.Thing;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Infer relationship type from context
|
|
235
|
+
*/
|
|
236
|
+
async inferRelationship(fromTerm, toTerm, context) {
|
|
237
|
+
const lowerContext = context.toLowerCase();
|
|
238
|
+
// Pattern-based relationship detection
|
|
239
|
+
const patterns = [
|
|
240
|
+
[new RegExp(`${toTerm}.*of.*${fromTerm}`, 'i'), VerbType.PartOf],
|
|
241
|
+
[new RegExp(`${fromTerm}.*contains.*${toTerm}`, 'i'), VerbType.Contains],
|
|
242
|
+
[new RegExp(`located in.*${toTerm}`, 'i'), VerbType.LocatedAt],
|
|
243
|
+
[new RegExp(`ruled by.*${toTerm}`, 'i'), VerbType.Owns],
|
|
244
|
+
[new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
|
|
245
|
+
[new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
246
|
+
[new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
247
|
+
[new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
|
|
248
|
+
[new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
|
|
249
|
+
];
|
|
250
|
+
for (const [pattern, verbType] of patterns) {
|
|
251
|
+
if (pattern.test(lowerContext)) {
|
|
252
|
+
return verbType;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Default to RelatedTo
|
|
256
|
+
return VerbType.RelatedTo;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Generate consistent entity ID from name
|
|
260
|
+
*/
|
|
261
|
+
generateEntityId(name) {
|
|
262
|
+
// Create deterministic ID based on normalized name
|
|
263
|
+
const normalized = name.toLowerCase().trim().replace(/\s+/g, '_');
|
|
264
|
+
return `ent_${normalized}_${Date.now()}`;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Update statistics
|
|
268
|
+
*/
|
|
269
|
+
updateStats(stats, type, confidence) {
|
|
270
|
+
// Track by type
|
|
271
|
+
const typeName = String(type);
|
|
272
|
+
stats.byType[typeName] = (stats.byType[typeName] || 0) + 1;
|
|
273
|
+
// Track by confidence
|
|
274
|
+
if (confidence > 0.8) {
|
|
275
|
+
stats.byConfidence.high++;
|
|
276
|
+
}
|
|
277
|
+
else if (confidence >= 0.6) {
|
|
278
|
+
stats.byConfidence.medium++;
|
|
279
|
+
}
|
|
280
|
+
else {
|
|
281
|
+
stats.byConfidence.low++;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Create empty result
|
|
286
|
+
*/
|
|
287
|
+
emptyResult(startTime) {
|
|
288
|
+
return {
|
|
289
|
+
rowsProcessed: 0,
|
|
290
|
+
entitiesExtracted: 0,
|
|
291
|
+
relationshipsInferred: 0,
|
|
292
|
+
rows: [],
|
|
293
|
+
entityMap: new Map(),
|
|
294
|
+
processingTime: Date.now() - startTime,
|
|
295
|
+
stats: {
|
|
296
|
+
byType: {},
|
|
297
|
+
byConfidence: { high: 0, medium: 0, low: 0 }
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
//# sourceMappingURL=SmartExcelImporter.js.map
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Import Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Coordinates the entire smart import pipeline:
|
|
5
|
+
* 1. Extract entities/relationships using SmartExcelImporter
|
|
6
|
+
* 2. Create entities and relationships in Brainy
|
|
7
|
+
* 3. Organize into VFS structure using VFSStructureGenerator
|
|
8
|
+
*
|
|
9
|
+
* NO MOCKS - Production-ready implementation
|
|
10
|
+
*/
|
|
11
|
+
import { Brainy } from '../brainy.js';
|
|
12
|
+
import { SmartExcelOptions, SmartExcelResult } from './SmartExcelImporter.js';
|
|
13
|
+
import { SmartPDFOptions } from './SmartPDFImporter.js';
|
|
14
|
+
import { SmartCSVOptions } from './SmartCSVImporter.js';
|
|
15
|
+
import { SmartJSONOptions } from './SmartJSONImporter.js';
|
|
16
|
+
import { SmartMarkdownOptions } from './SmartMarkdownImporter.js';
|
|
17
|
+
export interface SmartImportOptions extends SmartExcelOptions {
|
|
18
|
+
/** Create VFS structure */
|
|
19
|
+
createVFSStructure?: boolean;
|
|
20
|
+
/** VFS root path */
|
|
21
|
+
vfsRootPath?: string;
|
|
22
|
+
/** VFS grouping strategy */
|
|
23
|
+
vfsGroupBy?: 'type' | 'sheet' | 'flat' | 'custom';
|
|
24
|
+
/** Create entities in Brainy */
|
|
25
|
+
createEntities?: boolean;
|
|
26
|
+
/** Create relationships in Brainy */
|
|
27
|
+
createRelationships?: boolean;
|
|
28
|
+
/** Source filename */
|
|
29
|
+
filename?: string;
|
|
30
|
+
}
|
|
31
|
+
export interface SmartImportProgress {
|
|
32
|
+
phase: 'parsing' | 'extracting' | 'creating' | 'organizing' | 'complete';
|
|
33
|
+
message: string;
|
|
34
|
+
processed: number;
|
|
35
|
+
total: number;
|
|
36
|
+
entities: number;
|
|
37
|
+
relationships: number;
|
|
38
|
+
}
|
|
39
|
+
export interface SmartImportResult {
|
|
40
|
+
success: boolean;
|
|
41
|
+
/** Extraction results */
|
|
42
|
+
extraction: SmartExcelResult;
|
|
43
|
+
/** Created entity IDs */
|
|
44
|
+
entityIds: string[];
|
|
45
|
+
/** Created relationship IDs */
|
|
46
|
+
relationshipIds: string[];
|
|
47
|
+
/** VFS structure created */
|
|
48
|
+
vfsStructure?: {
|
|
49
|
+
rootPath: string;
|
|
50
|
+
directories: string[];
|
|
51
|
+
files: number;
|
|
52
|
+
};
|
|
53
|
+
/** Overall statistics */
|
|
54
|
+
stats: {
|
|
55
|
+
rowsProcessed: number;
|
|
56
|
+
entitiesCreated: number;
|
|
57
|
+
relationshipsCreated: number;
|
|
58
|
+
filesCreated: number;
|
|
59
|
+
totalTime: number;
|
|
60
|
+
};
|
|
61
|
+
/** Any errors encountered */
|
|
62
|
+
errors: string[];
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* SmartImportOrchestrator - Main entry point for smart imports
|
|
66
|
+
*/
|
|
67
|
+
export declare class SmartImportOrchestrator {
|
|
68
|
+
private brain;
|
|
69
|
+
private excelImporter;
|
|
70
|
+
private pdfImporter;
|
|
71
|
+
private csvImporter;
|
|
72
|
+
private jsonImporter;
|
|
73
|
+
private markdownImporter;
|
|
74
|
+
private vfsGenerator;
|
|
75
|
+
constructor(brain: Brainy);
|
|
76
|
+
/**
|
|
77
|
+
* Initialize the orchestrator
|
|
78
|
+
*/
|
|
79
|
+
init(): Promise<void>;
|
|
80
|
+
/**
|
|
81
|
+
* Import Excel file with full pipeline
|
|
82
|
+
*/
|
|
83
|
+
importExcel(buffer: Buffer, options?: SmartImportOptions, onProgress?: (progress: SmartImportProgress) => void): Promise<SmartImportResult>;
|
|
84
|
+
/**
|
|
85
|
+
* Import PDF file with full pipeline
|
|
86
|
+
*/
|
|
87
|
+
importPDF(buffer: Buffer, options?: SmartImportOptions & SmartPDFOptions, onProgress?: (progress: SmartImportProgress) => void): Promise<SmartImportResult>;
|
|
88
|
+
/**
|
|
89
|
+
* Import CSV file with full pipeline
|
|
90
|
+
*/
|
|
91
|
+
importCSV(buffer: Buffer, options?: SmartImportOptions & SmartCSVOptions, onProgress?: (progress: SmartImportProgress) => void): Promise<SmartImportResult>;
|
|
92
|
+
/**
|
|
93
|
+
* Import JSON data with full pipeline
|
|
94
|
+
*/
|
|
95
|
+
importJSON(data: any, options?: SmartImportOptions & SmartJSONOptions, onProgress?: (progress: SmartImportProgress) => void): Promise<SmartImportResult>;
|
|
96
|
+
/**
|
|
97
|
+
* Import Markdown content with full pipeline
|
|
98
|
+
*/
|
|
99
|
+
importMarkdown(markdown: string, options?: SmartImportOptions & SmartMarkdownOptions, onProgress?: (progress: SmartImportProgress) => void): Promise<SmartImportResult>;
|
|
100
|
+
/**
|
|
101
|
+
* Helper: Create entities and relationships from extraction result
|
|
102
|
+
*/
|
|
103
|
+
private createEntitiesAndRelationships;
|
|
104
|
+
/**
|
|
105
|
+
* Helper: Convert PDF result to Excel-like format
|
|
106
|
+
*/
|
|
107
|
+
private convertPDFToExcelFormat;
|
|
108
|
+
/**
|
|
109
|
+
* Helper: Convert JSON result to Excel-like format
|
|
110
|
+
*/
|
|
111
|
+
private convertJSONToExcelFormat;
|
|
112
|
+
/**
|
|
113
|
+
* Helper: Convert Markdown result to Excel-like format
|
|
114
|
+
*/
|
|
115
|
+
private convertMarkdownToExcelFormat;
|
|
116
|
+
/**
|
|
117
|
+
* Get import statistics
|
|
118
|
+
*/
|
|
119
|
+
getImportStatistics(vfsRootPath: string): Promise<{
|
|
120
|
+
entitiesInGraph: number;
|
|
121
|
+
relationshipsInGraph: number;
|
|
122
|
+
filesInVFS: number;
|
|
123
|
+
lastImport?: Date;
|
|
124
|
+
}>;
|
|
125
|
+
}
|