@soulcraft/brainy 3.27.1 → 3.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/brainy.d.ts +50 -0
  3. package/dist/brainy.js +54 -2
  4. package/dist/config/storageAutoConfig.d.ts +2 -1
  5. package/dist/config/storageAutoConfig.js +5 -4
  6. package/dist/import/EntityDeduplicator.d.ts +84 -0
  7. package/dist/import/EntityDeduplicator.js +255 -0
  8. package/dist/import/FormatDetector.d.ts +65 -0
  9. package/dist/import/FormatDetector.js +263 -0
  10. package/dist/import/ImportCoordinator.d.ts +160 -0
  11. package/dist/import/ImportCoordinator.js +498 -0
  12. package/dist/import/ImportHistory.d.ts +92 -0
  13. package/dist/import/ImportHistory.js +183 -0
  14. package/dist/import/index.d.ts +16 -0
  15. package/dist/import/index.js +14 -0
  16. package/dist/importers/SmartCSVImporter.d.ts +136 -0
  17. package/dist/importers/SmartCSVImporter.js +308 -0
  18. package/dist/importers/SmartExcelImporter.d.ts +131 -0
  19. package/dist/importers/SmartExcelImporter.js +302 -0
  20. package/dist/importers/SmartImportOrchestrator.d.ts +125 -0
  21. package/dist/importers/SmartImportOrchestrator.js +531 -0
  22. package/dist/importers/SmartJSONImporter.d.ts +135 -0
  23. package/dist/importers/SmartJSONImporter.js +325 -0
  24. package/dist/importers/SmartMarkdownImporter.d.ts +159 -0
  25. package/dist/importers/SmartMarkdownImporter.js +369 -0
  26. package/dist/importers/SmartPDFImporter.d.ts +154 -0
  27. package/dist/importers/SmartPDFImporter.js +337 -0
  28. package/dist/importers/VFSStructureGenerator.d.ts +82 -0
  29. package/dist/importers/VFSStructureGenerator.js +260 -0
  30. package/dist/importers/index.d.ts +28 -0
  31. package/dist/importers/index.js +29 -0
  32. package/package.json +1 -1
@@ -0,0 +1,498 @@
1
+ /**
2
+ * Import Coordinator
3
+ *
4
+ * Unified import orchestrator that:
5
+ * - Auto-detects file formats
6
+ * - Routes to appropriate handlers
7
+ * - Coordinates dual storage (VFS + Graph)
8
+ * - Provides simple, unified API
9
+ *
10
+ * NO MOCKS - Production-ready implementation
11
+ */
12
+ import { FormatDetector } from './FormatDetector.js';
13
+ import { EntityDeduplicator } from './EntityDeduplicator.js';
14
+ import { ImportHistory } from './ImportHistory.js';
15
+ import { SmartExcelImporter } from '../importers/SmartExcelImporter.js';
16
+ import { SmartPDFImporter } from '../importers/SmartPDFImporter.js';
17
+ import { SmartCSVImporter } from '../importers/SmartCSVImporter.js';
18
+ import { SmartJSONImporter } from '../importers/SmartJSONImporter.js';
19
+ import { SmartMarkdownImporter } from '../importers/SmartMarkdownImporter.js';
20
+ import { VFSStructureGenerator } from '../importers/VFSStructureGenerator.js';
21
+ import { NounType } from '../types/graphTypes.js';
22
+ import { v4 as uuidv4 } from '../universal/uuid.js';
23
+ import * as fs from 'fs';
24
+ import * as path from 'path';
25
+ /**
26
+ * ImportCoordinator - Main entry point for all imports
27
+ */
28
+ export class ImportCoordinator {
29
+ constructor(brain) {
30
+ this.brain = brain;
31
+ this.detector = new FormatDetector();
32
+ this.deduplicator = new EntityDeduplicator(brain);
33
+ this.history = new ImportHistory(brain);
34
+ this.excelImporter = new SmartExcelImporter(brain);
35
+ this.pdfImporter = new SmartPDFImporter(brain);
36
+ this.csvImporter = new SmartCSVImporter(brain);
37
+ this.jsonImporter = new SmartJSONImporter(brain);
38
+ this.markdownImporter = new SmartMarkdownImporter(brain);
39
+ this.vfsGenerator = new VFSStructureGenerator(brain);
40
+ }
41
+ /**
42
+ * Initialize all importers
43
+ */
44
+ async init() {
45
+ await this.excelImporter.init();
46
+ await this.pdfImporter.init();
47
+ await this.csvImporter.init();
48
+ await this.jsonImporter.init();
49
+ await this.markdownImporter.init();
50
+ await this.vfsGenerator.init();
51
+ await this.history.init();
52
+ }
53
+ /**
54
+ * Get import history
55
+ */
56
+ getHistory() {
57
+ return this.history;
58
+ }
59
+ /**
60
+ * Import from any source with auto-detection
61
+ */
62
+ async import(source, options = {}) {
63
+ const startTime = Date.now();
64
+ const importId = uuidv4();
65
+ // Normalize source
66
+ const normalizedSource = this.normalizeSource(source, options.format);
67
+ // Report detection stage
68
+ options.onProgress?.({
69
+ stage: 'detecting',
70
+ message: 'Detecting format...'
71
+ });
72
+ // Detect format
73
+ const detection = options.format
74
+ ? { format: options.format, confidence: 1.0, evidence: ['Explicitly specified'] }
75
+ : this.detectFormat(normalizedSource);
76
+ if (!detection) {
77
+ throw new Error('Unable to detect file format. Please specify format explicitly.');
78
+ }
79
+ // Report extraction stage
80
+ options.onProgress?.({
81
+ stage: 'extracting',
82
+ message: `Extracting entities from ${detection.format}...`
83
+ });
84
+ // Extract entities and relationships
85
+ const extractionResult = await this.extract(normalizedSource, detection.format, options);
86
+ // Set defaults
87
+ const opts = {
88
+ vfsPath: options.vfsPath || `/imports/${Date.now()}`,
89
+ groupBy: options.groupBy || 'type',
90
+ createEntities: options.createEntities !== false,
91
+ createRelationships: options.createRelationships !== false,
92
+ preserveSource: options.preserveSource !== false,
93
+ enableDeduplication: options.enableDeduplication !== false,
94
+ deduplicationThreshold: options.deduplicationThreshold || 0.85,
95
+ ...options
96
+ };
97
+ // Report VFS storage stage
98
+ options.onProgress?.({
99
+ stage: 'storing-vfs',
100
+ message: 'Creating VFS structure...'
101
+ });
102
+ // Normalize extraction result to unified format
103
+ const normalizedResult = this.normalizeExtractionResult(extractionResult, detection.format);
104
+ // Create VFS structure
105
+ const vfsResult = await this.vfsGenerator.generate(normalizedResult, {
106
+ rootPath: opts.vfsPath,
107
+ groupBy: opts.groupBy,
108
+ customGrouping: opts.customGrouping,
109
+ preserveSource: opts.preserveSource,
110
+ sourceBuffer: normalizedSource.type === 'buffer' ? normalizedSource.data : undefined,
111
+ sourceFilename: normalizedSource.filename || `import.${detection.format}`,
112
+ createRelationshipFile: true,
113
+ createMetadataFile: true
114
+ });
115
+ // Report graph storage stage
116
+ options.onProgress?.({
117
+ stage: 'storing-graph',
118
+ message: 'Creating knowledge graph...'
119
+ });
120
+ // Create entities and relationships in graph
121
+ const graphResult = await this.createGraphEntities(normalizedResult, vfsResult, opts);
122
+ // Report complete
123
+ options.onProgress?.({
124
+ stage: 'complete',
125
+ message: 'Import complete',
126
+ entities: graphResult.entities.length,
127
+ relationships: graphResult.relationships.length
128
+ });
129
+ const result = {
130
+ importId,
131
+ format: detection.format,
132
+ formatConfidence: detection.confidence,
133
+ vfs: {
134
+ rootPath: vfsResult.rootPath,
135
+ directories: vfsResult.directories,
136
+ files: vfsResult.files
137
+ },
138
+ entities: graphResult.entities,
139
+ relationships: graphResult.relationships,
140
+ stats: {
141
+ entitiesExtracted: extractionResult.entitiesExtracted,
142
+ relationshipsInferred: extractionResult.relationshipsInferred,
143
+ vfsFilesCreated: vfsResult.files.length,
144
+ graphNodesCreated: graphResult.entities.length,
145
+ graphEdgesCreated: graphResult.relationships.length,
146
+ entitiesMerged: graphResult.merged || 0,
147
+ entitiesNew: graphResult.newEntities || 0,
148
+ processingTime: Date.now() - startTime
149
+ }
150
+ };
151
+ // Record in history if enabled
152
+ if (options.enableHistory !== false) {
153
+ await this.history.recordImport(importId, {
154
+ type: normalizedSource.type === 'path' ? 'file' : normalizedSource.type,
155
+ filename: normalizedSource.filename,
156
+ format: detection.format
157
+ }, result);
158
+ }
159
+ return result;
160
+ }
161
+ /**
162
+ * Normalize source to ImportSource
163
+ */
164
+ normalizeSource(source, formatHint) {
165
+ // Buffer
166
+ if (Buffer.isBuffer(source)) {
167
+ return {
168
+ type: 'buffer',
169
+ data: source
170
+ };
171
+ }
172
+ // String - could be path or content
173
+ if (typeof source === 'string') {
174
+ // Check if it's a file path
175
+ if (this.isFilePath(source)) {
176
+ const buffer = fs.readFileSync(source);
177
+ return {
178
+ type: 'path',
179
+ data: buffer,
180
+ filename: path.basename(source)
181
+ };
182
+ }
183
+ // Otherwise treat as content
184
+ return {
185
+ type: 'string',
186
+ data: source
187
+ };
188
+ }
189
+ // Object
190
+ if (typeof source === 'object' && source !== null) {
191
+ return {
192
+ type: 'object',
193
+ data: source
194
+ };
195
+ }
196
+ throw new Error('Invalid source type. Expected Buffer, string, or object.');
197
+ }
198
+ /**
199
+ * Check if string is a file path
200
+ */
201
+ isFilePath(str) {
202
+ // Check if file exists
203
+ try {
204
+ return fs.existsSync(str) && fs.statSync(str).isFile();
205
+ }
206
+ catch {
207
+ return false;
208
+ }
209
+ }
210
+ /**
211
+ * Detect format from source
212
+ */
213
+ detectFormat(source) {
214
+ switch (source.type) {
215
+ case 'buffer':
216
+ case 'path':
217
+ const buffer = source.data;
218
+ let result = this.detector.detectFromBuffer(buffer);
219
+ // Try filename hint if buffer detection fails
220
+ if (!result && source.filename) {
221
+ result = this.detector.detectFromPath(source.filename);
222
+ }
223
+ return result;
224
+ case 'string':
225
+ return this.detector.detectFromString(source.data);
226
+ case 'object':
227
+ return this.detector.detectFromObject(source.data);
228
+ }
229
+ }
230
+ /**
231
+ * Extract entities using format-specific importer
232
+ */
233
+ async extract(source, format, options) {
234
+ const extractOptions = {
235
+ enableNeuralExtraction: options.enableNeuralExtraction !== false,
236
+ enableRelationshipInference: options.enableRelationshipInference !== false,
237
+ enableConceptExtraction: options.enableConceptExtraction !== false,
238
+ confidenceThreshold: options.confidenceThreshold || 0.6,
239
+ onProgress: (stats) => {
240
+ options.onProgress?.({
241
+ stage: 'extracting',
242
+ message: `Extracting entities from ${format}...`,
243
+ processed: stats.processed,
244
+ total: stats.total,
245
+ entities: stats.entities,
246
+ relationships: stats.relationships
247
+ });
248
+ }
249
+ };
250
+ switch (format) {
251
+ case 'excel':
252
+ const buffer = source.type === 'buffer' || source.type === 'path'
253
+ ? source.data
254
+ : Buffer.from(JSON.stringify(source.data));
255
+ return await this.excelImporter.extract(buffer, extractOptions);
256
+ case 'pdf':
257
+ const pdfBuffer = source.data;
258
+ return await this.pdfImporter.extract(pdfBuffer, extractOptions);
259
+ case 'csv':
260
+ const csvBuffer = source.type === 'buffer' || source.type === 'path'
261
+ ? source.data
262
+ : Buffer.from(source.data);
263
+ return await this.csvImporter.extract(csvBuffer, extractOptions);
264
+ case 'json':
265
+ const jsonData = source.type === 'object'
266
+ ? source.data
267
+ : source.type === 'string'
268
+ ? source.data
269
+ : source.data.toString('utf8');
270
+ return await this.jsonImporter.extract(jsonData, extractOptions);
271
+ case 'markdown':
272
+ const mdContent = source.type === 'string'
273
+ ? source.data
274
+ : source.data.toString('utf8');
275
+ return await this.markdownImporter.extract(mdContent, extractOptions);
276
+ default:
277
+ throw new Error(`Unsupported format: ${format}`);
278
+ }
279
+ }
280
+ /**
281
+ * Create entities and relationships in knowledge graph
282
+ */
283
+ async createGraphEntities(extractionResult, vfsResult, options) {
284
+ const entities = [];
285
+ const relationships = [];
286
+ let mergedCount = 0;
287
+ let newCount = 0;
288
+ if (!options.createEntities) {
289
+ return { entities, relationships, merged: 0, newEntities: 0 };
290
+ }
291
+ // Extract rows/sections/entities from result (unified across formats)
292
+ const rows = extractionResult.rows || extractionResult.sections || extractionResult.entities || [];
293
+ // Create entities in graph
294
+ for (const row of rows) {
295
+ const entity = row.entity || row;
296
+ // Find corresponding VFS file
297
+ const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
298
+ // Create or merge entity
299
+ try {
300
+ const importSource = vfsResult.rootPath;
301
+ let entityId;
302
+ let wasMerged = false;
303
+ if (options.enableDeduplication) {
304
+ // Use deduplicator to check for existing entities
305
+ const mergeResult = await this.deduplicator.createOrMerge({
306
+ id: entity.id,
307
+ name: entity.name,
308
+ type: entity.type,
309
+ description: entity.description || entity.name,
310
+ confidence: entity.confidence,
311
+ metadata: {
312
+ ...entity.metadata,
313
+ vfsPath: vfsFile?.path,
314
+ importedFrom: 'import-coordinator'
315
+ }
316
+ }, importSource, {
317
+ similarityThreshold: options.deduplicationThreshold || 0.85,
318
+ strictTypeMatching: true,
319
+ enableFuzzyMatching: true
320
+ });
321
+ entityId = mergeResult.mergedEntityId;
322
+ wasMerged = mergeResult.wasMerged;
323
+ if (wasMerged) {
324
+ mergedCount++;
325
+ }
326
+ else {
327
+ newCount++;
328
+ }
329
+ }
330
+ else {
331
+ // Direct creation without deduplication
332
+ entityId = await this.brain.add({
333
+ data: entity.description || entity.name,
334
+ type: entity.type,
335
+ metadata: {
336
+ ...entity.metadata,
337
+ name: entity.name,
338
+ confidence: entity.confidence,
339
+ vfsPath: vfsFile?.path,
340
+ importedAt: Date.now(),
341
+ importedFrom: 'import-coordinator',
342
+ imports: [importSource]
343
+ }
344
+ });
345
+ newCount++;
346
+ }
347
+ // Update entity ID in extraction result
348
+ entity.id = entityId;
349
+ entities.push({
350
+ id: entityId,
351
+ name: entity.name,
352
+ type: entity.type,
353
+ vfsPath: vfsFile?.path
354
+ });
355
+ // Create relationships if enabled
356
+ if (options.createRelationships && row.relationships) {
357
+ for (const rel of row.relationships) {
358
+ try {
359
+ // Find or create target entity
360
+ let targetEntityId;
361
+ // Check if target already exists in our entities list
362
+ const existingTarget = entities.find(e => e.name.toLowerCase() === rel.to.toLowerCase());
363
+ if (existingTarget) {
364
+ targetEntityId = existingTarget.id;
365
+ }
366
+ else {
367
+ // Try to find in other extracted entities
368
+ for (const otherRow of rows) {
369
+ const otherEntity = otherRow.entity || otherRow;
370
+ if (rel.to.toLowerCase().includes(otherEntity.name.toLowerCase()) ||
371
+ otherEntity.name.toLowerCase().includes(rel.to.toLowerCase())) {
372
+ targetEntityId = otherEntity.id;
373
+ break;
374
+ }
375
+ }
376
+ // If still not found, create placeholder entity
377
+ if (!targetEntityId) {
378
+ targetEntityId = await this.brain.add({
379
+ data: rel.to,
380
+ type: NounType.Thing,
381
+ metadata: {
382
+ name: rel.to,
383
+ placeholder: true,
384
+ inferredFrom: entity.name,
385
+ importedAt: Date.now()
386
+ }
387
+ });
388
+ entities.push({
389
+ id: targetEntityId,
390
+ name: rel.to,
391
+ type: NounType.Thing
392
+ });
393
+ }
394
+ }
395
+ // Create relationship using brain.relate()
396
+ const relId = await this.brain.relate({
397
+ from: entityId,
398
+ to: targetEntityId,
399
+ type: rel.type,
400
+ metadata: {
401
+ confidence: rel.confidence,
402
+ evidence: rel.evidence,
403
+ importedAt: Date.now()
404
+ }
405
+ });
406
+ relationships.push({
407
+ id: relId,
408
+ from: entityId,
409
+ to: targetEntityId,
410
+ type: rel.type
411
+ });
412
+ }
413
+ catch (error) {
414
+ // Skip relationship creation errors (entity might not exist, etc.)
415
+ continue;
416
+ }
417
+ }
418
+ }
419
+ }
420
+ catch (error) {
421
+ // Skip entity creation errors (might already exist, etc.)
422
+ continue;
423
+ }
424
+ }
425
+ return {
426
+ entities,
427
+ relationships,
428
+ merged: mergedCount,
429
+ newEntities: newCount
430
+ };
431
+ }
432
+ /**
433
+ * Normalize extraction result to unified format (Excel-like structure)
434
+ */
435
+ normalizeExtractionResult(result, format) {
436
+ // Excel and CSV already have the right format
437
+ if (format === 'excel' || format === 'csv') {
438
+ return result;
439
+ }
440
+ // PDF: sections -> rows
441
+ if (format === 'pdf') {
442
+ const rows = result.sections.flatMap((section) => section.entities.map((entity) => ({
443
+ entity,
444
+ relatedEntities: [],
445
+ relationships: section.relationships.filter((r) => r.from === entity.id),
446
+ concepts: section.concepts || []
447
+ })));
448
+ return {
449
+ rowsProcessed: result.sectionsProcessed,
450
+ entitiesExtracted: result.entitiesExtracted,
451
+ relationshipsInferred: result.relationshipsInferred,
452
+ rows,
453
+ entityMap: result.entityMap,
454
+ processingTime: result.processingTime,
455
+ stats: result.stats
456
+ };
457
+ }
458
+ // JSON: entities -> rows
459
+ if (format === 'json') {
460
+ const rows = result.entities.map((entity) => ({
461
+ entity,
462
+ relatedEntities: [],
463
+ relationships: result.relationships.filter((r) => r.from === entity.id),
464
+ concepts: entity.metadata?.concepts || []
465
+ }));
466
+ return {
467
+ rowsProcessed: result.nodesProcessed,
468
+ entitiesExtracted: result.entitiesExtracted,
469
+ relationshipsInferred: result.relationshipsInferred,
470
+ rows,
471
+ entityMap: result.entityMap,
472
+ processingTime: result.processingTime,
473
+ stats: result.stats
474
+ };
475
+ }
476
+ // Markdown: sections -> rows
477
+ if (format === 'markdown') {
478
+ const rows = result.sections.flatMap((section) => section.entities.map((entity) => ({
479
+ entity,
480
+ relatedEntities: [],
481
+ relationships: section.relationships.filter((r) => r.from === entity.id),
482
+ concepts: section.concepts || []
483
+ })));
484
+ return {
485
+ rowsProcessed: result.sectionsProcessed,
486
+ entitiesExtracted: result.entitiesExtracted,
487
+ relationshipsInferred: result.relationshipsInferred,
488
+ rows,
489
+ entityMap: result.entityMap,
490
+ processingTime: result.processingTime,
491
+ stats: result.stats
492
+ };
493
+ }
494
+ // Fallback: return as-is
495
+ return result;
496
+ }
497
+ }
498
+ //# sourceMappingURL=ImportCoordinator.js.map
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Import History & Rollback (Phase 4)
3
+ *
4
+ * Tracks all imports with:
5
+ * - Complete metadata and provenance
6
+ * - Entity and relationship tracking
7
+ * - Rollback capability
8
+ * - Import statistics
9
+ *
10
+ * NO MOCKS - Production-ready implementation
11
+ */
12
+ import { Brainy } from '../brainy.js';
13
+ import type { ImportResult } from './ImportCoordinator.js';
14
+ export interface ImportHistoryEntry {
15
+ /** Unique import ID */
16
+ importId: string;
17
+ /** Import timestamp */
18
+ timestamp: number;
19
+ /** Source information */
20
+ source: {
21
+ type: 'file' | 'buffer' | 'object' | 'string';
22
+ filename?: string;
23
+ format: string;
24
+ };
25
+ /** Import results */
26
+ result: ImportResult;
27
+ /** Entities created in this import */
28
+ entities: string[];
29
+ /** Relationships created in this import */
30
+ relationships: string[];
31
+ /** VFS paths created */
32
+ vfsPaths: string[];
33
+ /** Import status */
34
+ status: 'success' | 'partial' | 'failed';
35
+ /** Error messages (if any) */
36
+ errors?: string[];
37
+ }
38
+ export interface RollbackResult {
39
+ /** Was rollback successful */
40
+ success: boolean;
41
+ /** Entities deleted */
42
+ entitiesDeleted: number;
43
+ /** Relationships deleted */
44
+ relationshipsDeleted: number;
45
+ /** VFS files deleted */
46
+ vfsFilesDeleted: number;
47
+ /** Errors encountered */
48
+ errors: string[];
49
+ }
50
+ /**
51
+ * ImportHistory - Track and manage import history with rollback
52
+ */
53
+ export declare class ImportHistory {
54
+ private brain;
55
+ private history;
56
+ private historyFile;
57
+ constructor(brain: Brainy, historyFile?: string);
58
+ /**
59
+ * Initialize history (load from VFS if exists)
60
+ */
61
+ init(): Promise<void>;
62
+ /**
63
+ * Record an import
64
+ */
65
+ recordImport(importId: string, source: ImportHistoryEntry['source'], result: ImportResult): Promise<void>;
66
+ /**
67
+ * Get import history
68
+ */
69
+ getHistory(): ImportHistoryEntry[];
70
+ /**
71
+ * Get specific import
72
+ */
73
+ getImport(importId: string): ImportHistoryEntry | null;
74
+ /**
75
+ * Rollback an import (delete all entities, relationships, VFS files)
76
+ */
77
+ rollback(importId: string): Promise<RollbackResult>;
78
+ /**
79
+ * Get import statistics
80
+ */
81
+ getStatistics(): {
82
+ totalImports: number;
83
+ totalEntities: number;
84
+ totalRelationships: number;
85
+ byFormat: Record<string, number>;
86
+ byStatus: Record<string, number>;
87
+ };
88
+ /**
89
+ * Persist history to VFS
90
+ */
91
+ private persist;
92
+ }