@soulcraft/brainy 3.27.1 → 3.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/brainy.d.ts +50 -0
- package/dist/brainy.js +54 -2
- package/dist/config/storageAutoConfig.d.ts +2 -1
- package/dist/config/storageAutoConfig.js +5 -4
- package/dist/import/EntityDeduplicator.d.ts +84 -0
- package/dist/import/EntityDeduplicator.js +255 -0
- package/dist/import/FormatDetector.d.ts +65 -0
- package/dist/import/FormatDetector.js +263 -0
- package/dist/import/ImportCoordinator.d.ts +160 -0
- package/dist/import/ImportCoordinator.js +498 -0
- package/dist/import/ImportHistory.d.ts +92 -0
- package/dist/import/ImportHistory.js +183 -0
- package/dist/import/index.d.ts +16 -0
- package/dist/import/index.js +14 -0
- package/dist/importers/SmartCSVImporter.d.ts +136 -0
- package/dist/importers/SmartCSVImporter.js +308 -0
- package/dist/importers/SmartExcelImporter.d.ts +131 -0
- package/dist/importers/SmartExcelImporter.js +302 -0
- package/dist/importers/SmartImportOrchestrator.d.ts +125 -0
- package/dist/importers/SmartImportOrchestrator.js +531 -0
- package/dist/importers/SmartJSONImporter.d.ts +135 -0
- package/dist/importers/SmartJSONImporter.js +325 -0
- package/dist/importers/SmartMarkdownImporter.d.ts +159 -0
- package/dist/importers/SmartMarkdownImporter.js +369 -0
- package/dist/importers/SmartPDFImporter.d.ts +154 -0
- package/dist/importers/SmartPDFImporter.js +337 -0
- package/dist/importers/VFSStructureGenerator.d.ts +82 -0
- package/dist/importers/VFSStructureGenerator.js +260 -0
- package/dist/importers/index.d.ts +28 -0
- package/dist/importers/index.js +29 -0
- package/package.json +1 -1
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Import Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Coordinates the entire smart import pipeline:
|
|
5
|
+
* 1. Extract entities/relationships using SmartExcelImporter
|
|
6
|
+
* 2. Create entities and relationships in Brainy
|
|
7
|
+
* 3. Organize into VFS structure using VFSStructureGenerator
|
|
8
|
+
*
|
|
9
|
+
* NO MOCKS - Production-ready implementation
|
|
10
|
+
*/
|
|
11
|
+
import { VirtualFileSystem } from '../vfs/VirtualFileSystem.js';
|
|
12
|
+
import { NounType } from '../types/graphTypes.js';
|
|
13
|
+
import { SmartExcelImporter } from './SmartExcelImporter.js';
|
|
14
|
+
import { SmartPDFImporter } from './SmartPDFImporter.js';
|
|
15
|
+
import { SmartCSVImporter } from './SmartCSVImporter.js';
|
|
16
|
+
import { SmartJSONImporter } from './SmartJSONImporter.js';
|
|
17
|
+
import { SmartMarkdownImporter } from './SmartMarkdownImporter.js';
|
|
18
|
+
import { VFSStructureGenerator } from './VFSStructureGenerator.js';
|
|
19
|
+
/**
|
|
20
|
+
* SmartImportOrchestrator - Main entry point for smart imports
|
|
21
|
+
*/
|
|
22
|
+
export class SmartImportOrchestrator {
|
|
23
|
+
constructor(brain) {
|
|
24
|
+
this.brain = brain;
|
|
25
|
+
this.excelImporter = new SmartExcelImporter(brain);
|
|
26
|
+
this.pdfImporter = new SmartPDFImporter(brain);
|
|
27
|
+
this.csvImporter = new SmartCSVImporter(brain);
|
|
28
|
+
this.jsonImporter = new SmartJSONImporter(brain);
|
|
29
|
+
this.markdownImporter = new SmartMarkdownImporter(brain);
|
|
30
|
+
this.vfsGenerator = new VFSStructureGenerator(brain);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Initialize the orchestrator
|
|
34
|
+
*/
|
|
35
|
+
async init() {
|
|
36
|
+
await this.excelImporter.init();
|
|
37
|
+
await this.pdfImporter.init();
|
|
38
|
+
await this.csvImporter.init();
|
|
39
|
+
await this.jsonImporter.init();
|
|
40
|
+
await this.markdownImporter.init();
|
|
41
|
+
await this.vfsGenerator.init();
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Import Excel file with full pipeline
|
|
45
|
+
*/
|
|
46
|
+
async importExcel(buffer, options = {}, onProgress) {
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
const result = {
|
|
49
|
+
success: false,
|
|
50
|
+
extraction: null,
|
|
51
|
+
entityIds: [],
|
|
52
|
+
relationshipIds: [],
|
|
53
|
+
stats: {
|
|
54
|
+
rowsProcessed: 0,
|
|
55
|
+
entitiesCreated: 0,
|
|
56
|
+
relationshipsCreated: 0,
|
|
57
|
+
filesCreated: 0,
|
|
58
|
+
totalTime: 0
|
|
59
|
+
},
|
|
60
|
+
errors: []
|
|
61
|
+
};
|
|
62
|
+
try {
|
|
63
|
+
// Phase 1: Extract entities and relationships
|
|
64
|
+
onProgress?.({
|
|
65
|
+
phase: 'extracting',
|
|
66
|
+
message: 'Extracting entities and relationships...',
|
|
67
|
+
processed: 0,
|
|
68
|
+
total: 0,
|
|
69
|
+
entities: 0,
|
|
70
|
+
relationships: 0
|
|
71
|
+
});
|
|
72
|
+
result.extraction = await this.excelImporter.extract(buffer, {
|
|
73
|
+
...options,
|
|
74
|
+
onProgress: (stats) => {
|
|
75
|
+
onProgress?.({
|
|
76
|
+
phase: 'extracting',
|
|
77
|
+
message: `Processing row ${stats.processed}/${stats.total}...`,
|
|
78
|
+
processed: stats.processed,
|
|
79
|
+
total: stats.total,
|
|
80
|
+
entities: stats.entities,
|
|
81
|
+
relationships: stats.relationships
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
result.stats.rowsProcessed = result.extraction.rowsProcessed;
|
|
86
|
+
// Phase 2: Create entities in Brainy
|
|
87
|
+
if (options.createEntities !== false) {
|
|
88
|
+
onProgress?.({
|
|
89
|
+
phase: 'creating',
|
|
90
|
+
message: 'Creating entities in knowledge graph...',
|
|
91
|
+
processed: 0,
|
|
92
|
+
total: result.extraction.rows.length,
|
|
93
|
+
entities: 0,
|
|
94
|
+
relationships: 0
|
|
95
|
+
});
|
|
96
|
+
for (let i = 0; i < result.extraction.rows.length; i++) {
|
|
97
|
+
const extracted = result.extraction.rows[i];
|
|
98
|
+
try {
|
|
99
|
+
// Create main entity
|
|
100
|
+
const entityId = await this.brain.add({
|
|
101
|
+
data: extracted.entity.description,
|
|
102
|
+
type: extracted.entity.type,
|
|
103
|
+
metadata: {
|
|
104
|
+
...extracted.entity.metadata,
|
|
105
|
+
name: extracted.entity.name,
|
|
106
|
+
confidence: extracted.entity.confidence,
|
|
107
|
+
importedFrom: 'smart-import'
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
result.entityIds.push(entityId);
|
|
111
|
+
result.stats.entitiesCreated++;
|
|
112
|
+
// Update entity ID in extraction result
|
|
113
|
+
extracted.entity.id = entityId;
|
|
114
|
+
onProgress?.({
|
|
115
|
+
phase: 'creating',
|
|
116
|
+
message: `Created entity: ${extracted.entity.name}`,
|
|
117
|
+
processed: i + 1,
|
|
118
|
+
total: result.extraction.rows.length,
|
|
119
|
+
entities: result.entityIds.length,
|
|
120
|
+
relationships: result.relationshipIds.length
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
result.errors.push(`Failed to create entity ${extracted.entity.name}: ${error.message}`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Phase 3: Create relationships
|
|
129
|
+
if (options.createRelationships !== false && options.createEntities !== false) {
|
|
130
|
+
onProgress?.({
|
|
131
|
+
phase: 'creating',
|
|
132
|
+
message: 'Creating relationships...',
|
|
133
|
+
processed: 0,
|
|
134
|
+
total: result.extraction.rows.length,
|
|
135
|
+
entities: result.entityIds.length,
|
|
136
|
+
relationships: 0
|
|
137
|
+
});
|
|
138
|
+
// Build entity name -> ID map
|
|
139
|
+
const entityMap = new Map();
|
|
140
|
+
for (const extracted of result.extraction.rows) {
|
|
141
|
+
entityMap.set(extracted.entity.name.toLowerCase(), extracted.entity.id);
|
|
142
|
+
}
|
|
143
|
+
// Create relationships
|
|
144
|
+
for (const extracted of result.extraction.rows) {
|
|
145
|
+
for (const rel of extracted.relationships) {
|
|
146
|
+
try {
|
|
147
|
+
// Find target entity ID
|
|
148
|
+
let toEntityId;
|
|
149
|
+
// Try to find by name in our extracted entities
|
|
150
|
+
for (const otherExtracted of result.extraction.rows) {
|
|
151
|
+
if (rel.to.toLowerCase().includes(otherExtracted.entity.name.toLowerCase()) ||
|
|
152
|
+
otherExtracted.entity.name.toLowerCase().includes(rel.to.toLowerCase())) {
|
|
153
|
+
toEntityId = otherExtracted.entity.id;
|
|
154
|
+
break;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// If not found, create a placeholder entity
|
|
158
|
+
if (!toEntityId) {
|
|
159
|
+
toEntityId = await this.brain.add({
|
|
160
|
+
data: rel.to,
|
|
161
|
+
type: NounType.Thing,
|
|
162
|
+
metadata: {
|
|
163
|
+
name: rel.to,
|
|
164
|
+
placeholder: true,
|
|
165
|
+
extractedFrom: extracted.entity.name
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
result.entityIds.push(toEntityId);
|
|
169
|
+
}
|
|
170
|
+
// Create relationship
|
|
171
|
+
const relId = await this.brain.relate({
|
|
172
|
+
from: extracted.entity.id,
|
|
173
|
+
to: toEntityId,
|
|
174
|
+
type: rel.type,
|
|
175
|
+
metadata: {
|
|
176
|
+
confidence: rel.confidence,
|
|
177
|
+
evidence: rel.evidence
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
result.relationshipIds.push(relId);
|
|
181
|
+
result.stats.relationshipsCreated++;
|
|
182
|
+
}
|
|
183
|
+
catch (error) {
|
|
184
|
+
result.errors.push(`Failed to create relationship: ${error.message}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Phase 4: Create VFS structure
|
|
190
|
+
if (options.createVFSStructure !== false) {
|
|
191
|
+
onProgress?.({
|
|
192
|
+
phase: 'organizing',
|
|
193
|
+
message: 'Organizing into file structure...',
|
|
194
|
+
processed: 0,
|
|
195
|
+
total: result.extraction.rows.length,
|
|
196
|
+
entities: result.entityIds.length,
|
|
197
|
+
relationships: result.relationshipIds.length
|
|
198
|
+
});
|
|
199
|
+
const vfsOptions = {
|
|
200
|
+
rootPath: options.vfsRootPath || '/imports/' + (options.filename || 'import'),
|
|
201
|
+
groupBy: options.vfsGroupBy || 'type',
|
|
202
|
+
preserveSource: true,
|
|
203
|
+
sourceBuffer: buffer,
|
|
204
|
+
sourceFilename: options.filename || 'import.xlsx',
|
|
205
|
+
createRelationshipFile: true,
|
|
206
|
+
createMetadataFile: true
|
|
207
|
+
};
|
|
208
|
+
const vfsResult = await this.vfsGenerator.generate(result.extraction, vfsOptions);
|
|
209
|
+
result.vfsStructure = {
|
|
210
|
+
rootPath: vfsResult.rootPath,
|
|
211
|
+
directories: vfsResult.directories,
|
|
212
|
+
files: vfsResult.files.length
|
|
213
|
+
};
|
|
214
|
+
result.stats.filesCreated = vfsResult.files.length;
|
|
215
|
+
}
|
|
216
|
+
// Complete
|
|
217
|
+
result.success = result.errors.length === 0;
|
|
218
|
+
result.stats.totalTime = Date.now() - startTime;
|
|
219
|
+
onProgress?.({
|
|
220
|
+
phase: 'complete',
|
|
221
|
+
message: `Import complete: ${result.stats.entitiesCreated} entities, ${result.stats.relationshipsCreated} relationships`,
|
|
222
|
+
processed: result.extraction.rows.length,
|
|
223
|
+
total: result.extraction.rows.length,
|
|
224
|
+
entities: result.stats.entitiesCreated,
|
|
225
|
+
relationships: result.stats.relationshipsCreated
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
catch (error) {
|
|
229
|
+
result.errors.push(`Import failed: ${error.message}`);
|
|
230
|
+
result.success = false;
|
|
231
|
+
}
|
|
232
|
+
return result;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Import PDF file with full pipeline
|
|
236
|
+
*/
|
|
237
|
+
async importPDF(buffer, options = {}, onProgress) {
|
|
238
|
+
const startTime = Date.now();
|
|
239
|
+
const result = {
|
|
240
|
+
success: false,
|
|
241
|
+
extraction: null,
|
|
242
|
+
entityIds: [],
|
|
243
|
+
relationshipIds: [],
|
|
244
|
+
stats: {
|
|
245
|
+
rowsProcessed: 0,
|
|
246
|
+
entitiesCreated: 0,
|
|
247
|
+
relationshipsCreated: 0,
|
|
248
|
+
filesCreated: 0,
|
|
249
|
+
totalTime: 0
|
|
250
|
+
},
|
|
251
|
+
errors: []
|
|
252
|
+
};
|
|
253
|
+
try {
|
|
254
|
+
// Phase 1: Extract from PDF
|
|
255
|
+
onProgress?.({ phase: 'extracting', message: 'Extracting from PDF...', processed: 0, total: 0, entities: 0, relationships: 0 });
|
|
256
|
+
const pdfResult = await this.pdfImporter.extract(buffer, options);
|
|
257
|
+
// Convert PDF result to Excel-like format for processing
|
|
258
|
+
result.extraction = this.convertPDFToExcelFormat(pdfResult);
|
|
259
|
+
result.stats.rowsProcessed = pdfResult.sectionsProcessed;
|
|
260
|
+
// Phase 2 & 3: Create entities and relationships
|
|
261
|
+
await this.createEntitiesAndRelationships(result, options, onProgress);
|
|
262
|
+
// Phase 4: Create VFS structure
|
|
263
|
+
if (options.createVFSStructure !== false) {
|
|
264
|
+
const vfsOptions = {
|
|
265
|
+
rootPath: options.vfsRootPath || '/imports/' + (options.filename || 'import'),
|
|
266
|
+
groupBy: options.vfsGroupBy || 'type',
|
|
267
|
+
preserveSource: true,
|
|
268
|
+
sourceBuffer: buffer,
|
|
269
|
+
sourceFilename: options.filename || 'import.pdf',
|
|
270
|
+
createRelationshipFile: true,
|
|
271
|
+
createMetadataFile: true
|
|
272
|
+
};
|
|
273
|
+
const vfsResult = await this.vfsGenerator.generate(result.extraction, vfsOptions);
|
|
274
|
+
result.vfsStructure = { rootPath: vfsResult.rootPath, directories: vfsResult.directories, files: vfsResult.files.length };
|
|
275
|
+
result.stats.filesCreated = vfsResult.files.length;
|
|
276
|
+
}
|
|
277
|
+
result.success = result.errors.length === 0;
|
|
278
|
+
result.stats.totalTime = Date.now() - startTime;
|
|
279
|
+
onProgress?.({ phase: 'complete', message: `Import complete: ${result.stats.entitiesCreated} entities, ${result.stats.relationshipsCreated} relationships`, processed: result.stats.rowsProcessed, total: result.stats.rowsProcessed, entities: result.stats.entitiesCreated, relationships: result.stats.relationshipsCreated });
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
result.errors.push(`PDF import failed: ${error.message}`);
|
|
283
|
+
result.success = false;
|
|
284
|
+
}
|
|
285
|
+
return result;
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Import CSV file with full pipeline
|
|
289
|
+
*/
|
|
290
|
+
async importCSV(buffer, options = {}, onProgress) {
|
|
291
|
+
// CSV is very similar to Excel, can reuse importExcel logic
|
|
292
|
+
return this.importExcel(buffer, options, onProgress);
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Import JSON data with full pipeline
|
|
296
|
+
*/
|
|
297
|
+
async importJSON(data, options = {}, onProgress) {
|
|
298
|
+
const startTime = Date.now();
|
|
299
|
+
const result = {
|
|
300
|
+
success: false,
|
|
301
|
+
extraction: null,
|
|
302
|
+
entityIds: [],
|
|
303
|
+
relationshipIds: [],
|
|
304
|
+
stats: {
|
|
305
|
+
rowsProcessed: 0,
|
|
306
|
+
entitiesCreated: 0,
|
|
307
|
+
relationshipsCreated: 0,
|
|
308
|
+
filesCreated: 0,
|
|
309
|
+
totalTime: 0
|
|
310
|
+
},
|
|
311
|
+
errors: []
|
|
312
|
+
};
|
|
313
|
+
try {
|
|
314
|
+
onProgress?.({ phase: 'extracting', message: 'Extracting from JSON...', processed: 0, total: 0, entities: 0, relationships: 0 });
|
|
315
|
+
const jsonResult = await this.jsonImporter.extract(data, options);
|
|
316
|
+
result.extraction = this.convertJSONToExcelFormat(jsonResult);
|
|
317
|
+
result.stats.rowsProcessed = jsonResult.nodesProcessed;
|
|
318
|
+
await this.createEntitiesAndRelationships(result, options, onProgress);
|
|
319
|
+
if (options.createVFSStructure !== false) {
|
|
320
|
+
const sourceBuffer = Buffer.from(typeof data === 'string' ? data : JSON.stringify(data, null, 2));
|
|
321
|
+
const vfsOptions = {
|
|
322
|
+
rootPath: options.vfsRootPath || '/imports/' + (options.filename || 'import'),
|
|
323
|
+
groupBy: options.vfsGroupBy || 'type',
|
|
324
|
+
preserveSource: true,
|
|
325
|
+
sourceBuffer,
|
|
326
|
+
sourceFilename: options.filename || 'import.json',
|
|
327
|
+
createRelationshipFile: true,
|
|
328
|
+
createMetadataFile: true
|
|
329
|
+
};
|
|
330
|
+
const vfsResult = await this.vfsGenerator.generate(result.extraction, vfsOptions);
|
|
331
|
+
result.vfsStructure = { rootPath: vfsResult.rootPath, directories: vfsResult.directories, files: vfsResult.files.length };
|
|
332
|
+
result.stats.filesCreated = vfsResult.files.length;
|
|
333
|
+
}
|
|
334
|
+
result.success = result.errors.length === 0;
|
|
335
|
+
result.stats.totalTime = Date.now() - startTime;
|
|
336
|
+
onProgress?.({ phase: 'complete', message: `Import complete: ${result.stats.entitiesCreated} entities, ${result.stats.relationshipsCreated} relationships`, processed: result.stats.rowsProcessed, total: result.stats.rowsProcessed, entities: result.stats.entitiesCreated, relationships: result.stats.relationshipsCreated });
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
result.errors.push(`JSON import failed: ${error.message}`);
|
|
340
|
+
result.success = false;
|
|
341
|
+
}
|
|
342
|
+
return result;
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Import Markdown content with full pipeline
|
|
346
|
+
*/
|
|
347
|
+
async importMarkdown(markdown, options = {}, onProgress) {
|
|
348
|
+
const startTime = Date.now();
|
|
349
|
+
const result = {
|
|
350
|
+
success: false,
|
|
351
|
+
extraction: null,
|
|
352
|
+
entityIds: [],
|
|
353
|
+
relationshipIds: [],
|
|
354
|
+
stats: {
|
|
355
|
+
rowsProcessed: 0,
|
|
356
|
+
entitiesCreated: 0,
|
|
357
|
+
relationshipsCreated: 0,
|
|
358
|
+
filesCreated: 0,
|
|
359
|
+
totalTime: 0
|
|
360
|
+
},
|
|
361
|
+
errors: []
|
|
362
|
+
};
|
|
363
|
+
try {
|
|
364
|
+
onProgress?.({ phase: 'extracting', message: 'Extracting from Markdown...', processed: 0, total: 0, entities: 0, relationships: 0 });
|
|
365
|
+
const mdResult = await this.markdownImporter.extract(markdown, options);
|
|
366
|
+
result.extraction = this.convertMarkdownToExcelFormat(mdResult);
|
|
367
|
+
result.stats.rowsProcessed = mdResult.sectionsProcessed;
|
|
368
|
+
await this.createEntitiesAndRelationships(result, options, onProgress);
|
|
369
|
+
if (options.createVFSStructure !== false) {
|
|
370
|
+
const sourceBuffer = Buffer.from(markdown, 'utf-8');
|
|
371
|
+
const vfsOptions = {
|
|
372
|
+
rootPath: options.vfsRootPath || '/imports/' + (options.filename || 'import'),
|
|
373
|
+
groupBy: options.vfsGroupBy || 'type',
|
|
374
|
+
preserveSource: true,
|
|
375
|
+
sourceBuffer,
|
|
376
|
+
sourceFilename: options.filename || 'import.md',
|
|
377
|
+
createRelationshipFile: true,
|
|
378
|
+
createMetadataFile: true
|
|
379
|
+
};
|
|
380
|
+
const vfsResult = await this.vfsGenerator.generate(result.extraction, vfsOptions);
|
|
381
|
+
result.vfsStructure = { rootPath: vfsResult.rootPath, directories: vfsResult.directories, files: vfsResult.files.length };
|
|
382
|
+
result.stats.filesCreated = vfsResult.files.length;
|
|
383
|
+
}
|
|
384
|
+
result.success = result.errors.length === 0;
|
|
385
|
+
result.stats.totalTime = Date.now() - startTime;
|
|
386
|
+
onProgress?.({ phase: 'complete', message: `Import complete: ${result.stats.entitiesCreated} entities, ${result.stats.relationshipsCreated} relationships`, processed: result.stats.rowsProcessed, total: result.stats.rowsProcessed, entities: result.stats.entitiesCreated, relationships: result.stats.relationshipsCreated });
|
|
387
|
+
}
|
|
388
|
+
catch (error) {
|
|
389
|
+
result.errors.push(`Markdown import failed: ${error.message}`);
|
|
390
|
+
result.success = false;
|
|
391
|
+
}
|
|
392
|
+
return result;
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Helper: Create entities and relationships from extraction result
|
|
396
|
+
*/
|
|
397
|
+
async createEntitiesAndRelationships(result, options, onProgress) {
|
|
398
|
+
if (options.createEntities !== false) {
|
|
399
|
+
onProgress?.({ phase: 'creating', message: 'Creating entities in knowledge graph...', processed: 0, total: result.extraction.rows.length, entities: 0, relationships: 0 });
|
|
400
|
+
for (let i = 0; i < result.extraction.rows.length; i++) {
|
|
401
|
+
const extracted = result.extraction.rows[i];
|
|
402
|
+
try {
|
|
403
|
+
const entityId = await this.brain.add({
|
|
404
|
+
data: extracted.entity.description,
|
|
405
|
+
type: extracted.entity.type,
|
|
406
|
+
metadata: { ...extracted.entity.metadata, name: extracted.entity.name, confidence: extracted.entity.confidence, importedFrom: 'smart-import' }
|
|
407
|
+
});
|
|
408
|
+
result.entityIds.push(entityId);
|
|
409
|
+
result.stats.entitiesCreated++;
|
|
410
|
+
extracted.entity.id = entityId;
|
|
411
|
+
}
|
|
412
|
+
catch (error) {
|
|
413
|
+
result.errors.push(`Failed to create entity ${extracted.entity.name}: ${error.message}`);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
if (options.createRelationships !== false && options.createEntities !== false) {
|
|
418
|
+
onProgress?.({ phase: 'creating', message: 'Creating relationships...', processed: 0, total: result.extraction.rows.length, entities: result.entityIds.length, relationships: 0 });
|
|
419
|
+
for (const extracted of result.extraction.rows) {
|
|
420
|
+
for (const rel of extracted.relationships) {
|
|
421
|
+
try {
|
|
422
|
+
let toEntityId;
|
|
423
|
+
for (const otherExtracted of result.extraction.rows) {
|
|
424
|
+
if (rel.to.toLowerCase().includes(otherExtracted.entity.name.toLowerCase()) || otherExtracted.entity.name.toLowerCase().includes(rel.to.toLowerCase())) {
|
|
425
|
+
toEntityId = otherExtracted.entity.id;
|
|
426
|
+
break;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
if (!toEntityId) {
|
|
430
|
+
toEntityId = await this.brain.add({ data: rel.to, type: NounType.Thing, metadata: { name: rel.to, placeholder: true, extractedFrom: extracted.entity.name } });
|
|
431
|
+
result.entityIds.push(toEntityId);
|
|
432
|
+
}
|
|
433
|
+
const relId = await this.brain.relate({ from: extracted.entity.id, to: toEntityId, type: rel.type, metadata: { confidence: rel.confidence, evidence: rel.evidence } });
|
|
434
|
+
result.relationshipIds.push(relId);
|
|
435
|
+
result.stats.relationshipsCreated++;
|
|
436
|
+
}
|
|
437
|
+
catch (error) {
|
|
438
|
+
result.errors.push(`Failed to create relationship: ${error.message}`);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Helper: Convert PDF result to Excel-like format
|
|
446
|
+
*/
|
|
447
|
+
convertPDFToExcelFormat(pdfResult) {
|
|
448
|
+
const rows = pdfResult.sections.flatMap(section => section.entities.map(entity => ({
|
|
449
|
+
entity,
|
|
450
|
+
relatedEntities: [],
|
|
451
|
+
relationships: section.relationships.filter(r => r.from === entity.id),
|
|
452
|
+
concepts: section.concepts
|
|
453
|
+
})));
|
|
454
|
+
return {
|
|
455
|
+
rowsProcessed: pdfResult.sectionsProcessed,
|
|
456
|
+
entitiesExtracted: pdfResult.entitiesExtracted,
|
|
457
|
+
relationshipsInferred: pdfResult.relationshipsInferred,
|
|
458
|
+
rows,
|
|
459
|
+
entityMap: pdfResult.entityMap,
|
|
460
|
+
processingTime: pdfResult.processingTime,
|
|
461
|
+
stats: pdfResult.stats
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
/**
|
|
465
|
+
* Helper: Convert JSON result to Excel-like format
|
|
466
|
+
*/
|
|
467
|
+
convertJSONToExcelFormat(jsonResult) {
|
|
468
|
+
const rows = jsonResult.entities.map(entity => ({
|
|
469
|
+
entity,
|
|
470
|
+
relatedEntities: [],
|
|
471
|
+
relationships: jsonResult.relationships.filter(r => r.from === entity.id),
|
|
472
|
+
concepts: entity.metadata.concepts || []
|
|
473
|
+
}));
|
|
474
|
+
return {
|
|
475
|
+
rowsProcessed: jsonResult.nodesProcessed,
|
|
476
|
+
entitiesExtracted: jsonResult.entitiesExtracted,
|
|
477
|
+
relationshipsInferred: jsonResult.relationshipsInferred,
|
|
478
|
+
rows,
|
|
479
|
+
entityMap: jsonResult.entityMap,
|
|
480
|
+
processingTime: jsonResult.processingTime,
|
|
481
|
+
stats: jsonResult.stats
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
/**
|
|
485
|
+
* Helper: Convert Markdown result to Excel-like format
|
|
486
|
+
*/
|
|
487
|
+
convertMarkdownToExcelFormat(mdResult) {
|
|
488
|
+
const rows = mdResult.sections.flatMap(section => section.entities.map(entity => ({
|
|
489
|
+
entity,
|
|
490
|
+
relatedEntities: [],
|
|
491
|
+
relationships: section.relationships.filter(r => r.from === entity.id),
|
|
492
|
+
concepts: section.concepts
|
|
493
|
+
})));
|
|
494
|
+
return {
|
|
495
|
+
rowsProcessed: mdResult.sectionsProcessed,
|
|
496
|
+
entitiesExtracted: mdResult.entitiesExtracted,
|
|
497
|
+
relationshipsInferred: mdResult.relationshipsInferred,
|
|
498
|
+
rows,
|
|
499
|
+
entityMap: mdResult.entityMap,
|
|
500
|
+
processingTime: mdResult.processingTime,
|
|
501
|
+
stats: mdResult.stats
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
/**
|
|
505
|
+
* Get import statistics
|
|
506
|
+
*/
|
|
507
|
+
async getImportStatistics(vfsRootPath) {
|
|
508
|
+
// Read metadata file
|
|
509
|
+
const vfs = new VirtualFileSystem(this.brain);
|
|
510
|
+
await vfs.init();
|
|
511
|
+
const metadataPath = `${vfsRootPath}/_metadata.json`;
|
|
512
|
+
try {
|
|
513
|
+
const metadataBuffer = await vfs.readFile(metadataPath);
|
|
514
|
+
const metadata = JSON.parse(metadataBuffer.toString('utf-8'));
|
|
515
|
+
return {
|
|
516
|
+
entitiesInGraph: metadata.import.stats.entitiesExtracted,
|
|
517
|
+
relationshipsInGraph: metadata.import.stats.relationshipsInferred,
|
|
518
|
+
filesInVFS: metadata.structure.fileCount,
|
|
519
|
+
lastImport: new Date(metadata.import.timestamp)
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
catch (error) {
|
|
523
|
+
return {
|
|
524
|
+
entitiesInGraph: 0,
|
|
525
|
+
relationshipsInGraph: 0,
|
|
526
|
+
filesInVFS: 0
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
//# sourceMappingURL=SmartImportOrchestrator.js.map
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart JSON Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from JSON files using:
|
|
5
|
+
* - Recursive traversal of nested structures
|
|
6
|
+
* - NeuralEntityExtractor for entity extraction from text values
|
|
7
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
8
|
+
* - Hierarchical relationship creation (parent-child, contains, etc.)
|
|
9
|
+
*
|
|
10
|
+
* NO MOCKS - Production-ready implementation
|
|
11
|
+
*/
|
|
12
|
+
import { Brainy } from '../brainy.js';
|
|
13
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
14
|
+
export interface SmartJSONOptions {
|
|
15
|
+
/** Enable neural entity extraction from string values */
|
|
16
|
+
enableNeuralExtraction?: boolean;
|
|
17
|
+
/** Enable hierarchical relationship creation */
|
|
18
|
+
enableHierarchicalRelationships?: boolean;
|
|
19
|
+
/** Enable concept extraction for tagging */
|
|
20
|
+
enableConceptExtraction?: boolean;
|
|
21
|
+
/** Confidence threshold for entities (0-1) */
|
|
22
|
+
confidenceThreshold?: number;
|
|
23
|
+
/** Maximum depth to traverse */
|
|
24
|
+
maxDepth?: number;
|
|
25
|
+
/** Minimum string length to process for entity extraction */
|
|
26
|
+
minStringLength?: number;
|
|
27
|
+
/** Keys that indicate entity names */
|
|
28
|
+
nameKeys?: string[];
|
|
29
|
+
/** Keys that indicate entity descriptions */
|
|
30
|
+
descriptionKeys?: string[];
|
|
31
|
+
/** Keys that indicate entity types */
|
|
32
|
+
typeKeys?: string[];
|
|
33
|
+
/** Progress callback */
|
|
34
|
+
onProgress?: (stats: {
|
|
35
|
+
processed: number;
|
|
36
|
+
entities: number;
|
|
37
|
+
relationships: number;
|
|
38
|
+
}) => void;
|
|
39
|
+
}
|
|
40
|
+
export interface ExtractedJSONEntity {
|
|
41
|
+
/** Entity ID */
|
|
42
|
+
id: string;
|
|
43
|
+
/** Entity name */
|
|
44
|
+
name: string;
|
|
45
|
+
/** Entity type */
|
|
46
|
+
type: NounType;
|
|
47
|
+
/** Entity description/value */
|
|
48
|
+
description: string;
|
|
49
|
+
/** Confidence score */
|
|
50
|
+
confidence: number;
|
|
51
|
+
/** JSON path to this entity */
|
|
52
|
+
path: string;
|
|
53
|
+
/** Parent path in JSON hierarchy */
|
|
54
|
+
parentPath: string | null;
|
|
55
|
+
/** Metadata */
|
|
56
|
+
metadata: Record<string, any>;
|
|
57
|
+
}
|
|
58
|
+
export interface ExtractedJSONRelationship {
|
|
59
|
+
from: string;
|
|
60
|
+
to: string;
|
|
61
|
+
type: VerbType;
|
|
62
|
+
confidence: number;
|
|
63
|
+
evidence: string;
|
|
64
|
+
}
|
|
65
|
+
export interface SmartJSONResult {
|
|
66
|
+
/** Total nodes processed */
|
|
67
|
+
nodesProcessed: number;
|
|
68
|
+
/** Entities extracted */
|
|
69
|
+
entitiesExtracted: number;
|
|
70
|
+
/** Relationships inferred */
|
|
71
|
+
relationshipsInferred: number;
|
|
72
|
+
/** All extracted entities */
|
|
73
|
+
entities: ExtractedJSONEntity[];
|
|
74
|
+
/** All relationships */
|
|
75
|
+
relationships: ExtractedJSONRelationship[];
|
|
76
|
+
/** Entity ID mapping (path -> ID) */
|
|
77
|
+
entityMap: Map<string, string>;
|
|
78
|
+
/** Processing time in ms */
|
|
79
|
+
processingTime: number;
|
|
80
|
+
/** Extraction statistics */
|
|
81
|
+
stats: {
|
|
82
|
+
byType: Record<string, number>;
|
|
83
|
+
byDepth: Record<number, number>;
|
|
84
|
+
byConfidence: {
|
|
85
|
+
high: number;
|
|
86
|
+
medium: number;
|
|
87
|
+
low: number;
|
|
88
|
+
};
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* SmartJSONImporter - Extracts structured knowledge from JSON files
|
|
93
|
+
*/
|
|
94
|
+
export declare class SmartJSONImporter {
|
|
95
|
+
private brain;
|
|
96
|
+
private extractor;
|
|
97
|
+
private nlp;
|
|
98
|
+
constructor(brain: Brainy);
|
|
99
|
+
/**
|
|
100
|
+
* Initialize the importer
|
|
101
|
+
*/
|
|
102
|
+
init(): Promise<void>;
|
|
103
|
+
/**
|
|
104
|
+
* Extract entities and relationships from JSON data
|
|
105
|
+
*/
|
|
106
|
+
extract(data: any, options?: SmartJSONOptions): Promise<SmartJSONResult>;
|
|
107
|
+
/**
|
|
108
|
+
* Recursively traverse JSON structure
|
|
109
|
+
*/
|
|
110
|
+
private traverseJSON;
|
|
111
|
+
/**
|
|
112
|
+
* Extract entity from JSON object
|
|
113
|
+
*/
|
|
114
|
+
private extractEntityFromObject;
|
|
115
|
+
/**
|
|
116
|
+
* Find value in object by key patterns
|
|
117
|
+
*/
|
|
118
|
+
private findValue;
|
|
119
|
+
/**
|
|
120
|
+
* Infer type from JSON structure
|
|
121
|
+
*/
|
|
122
|
+
private inferTypeFromStructure;
|
|
123
|
+
/**
|
|
124
|
+
* Map type string to NounType
|
|
125
|
+
*/
|
|
126
|
+
private mapTypeString;
|
|
127
|
+
/**
|
|
128
|
+
* Generate consistent entity ID
|
|
129
|
+
*/
|
|
130
|
+
private generateEntityId;
|
|
131
|
+
/**
|
|
132
|
+
* Update statistics
|
|
133
|
+
*/
|
|
134
|
+
private updateStats;
|
|
135
|
+
}
|