@soulcraft/brainy 3.27.1 → 3.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/brainy.d.ts +50 -0
- package/dist/brainy.js +36 -0
- package/dist/import/EntityDeduplicator.d.ts +84 -0
- package/dist/import/EntityDeduplicator.js +255 -0
- package/dist/import/FormatDetector.d.ts +65 -0
- package/dist/import/FormatDetector.js +263 -0
- package/dist/import/ImportCoordinator.d.ts +160 -0
- package/dist/import/ImportCoordinator.js +498 -0
- package/dist/import/ImportHistory.d.ts +92 -0
- package/dist/import/ImportHistory.js +183 -0
- package/dist/import/index.d.ts +16 -0
- package/dist/import/index.js +14 -0
- package/dist/importers/SmartCSVImporter.d.ts +136 -0
- package/dist/importers/SmartCSVImporter.js +308 -0
- package/dist/importers/SmartExcelImporter.d.ts +131 -0
- package/dist/importers/SmartExcelImporter.js +302 -0
- package/dist/importers/SmartImportOrchestrator.d.ts +125 -0
- package/dist/importers/SmartImportOrchestrator.js +531 -0
- package/dist/importers/SmartJSONImporter.d.ts +135 -0
- package/dist/importers/SmartJSONImporter.js +325 -0
- package/dist/importers/SmartMarkdownImporter.d.ts +159 -0
- package/dist/importers/SmartMarkdownImporter.js +369 -0
- package/dist/importers/SmartPDFImporter.d.ts +154 -0
- package/dist/importers/SmartPDFImporter.js +337 -0
- package/dist/importers/VFSStructureGenerator.d.ts +82 -0
- package/dist/importers/VFSStructureGenerator.js +260 -0
- package/dist/importers/index.d.ts +28 -0
- package/dist/importers/index.js +29 -0
- package/package.json +1 -1
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Import History & Rollback (Phase 4)
|
|
3
|
+
*
|
|
4
|
+
* Tracks all imports with:
|
|
5
|
+
* - Complete metadata and provenance
|
|
6
|
+
* - Entity and relationship tracking
|
|
7
|
+
* - Rollback capability
|
|
8
|
+
* - Import statistics
|
|
9
|
+
*
|
|
10
|
+
* NO MOCKS - Production-ready implementation
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* ImportHistory - Track and manage import history with rollback
|
|
14
|
+
*/
|
|
15
|
+
export class ImportHistory {
|
|
16
|
+
constructor(brain, historyFile = '/.brainy/import_history.json') {
|
|
17
|
+
this.brain = brain;
|
|
18
|
+
this.history = new Map();
|
|
19
|
+
this.historyFile = historyFile;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Initialize history (load from VFS if exists)
|
|
23
|
+
*/
|
|
24
|
+
async init() {
|
|
25
|
+
try {
|
|
26
|
+
const vfs = this.brain.vfs();
|
|
27
|
+
await vfs.init();
|
|
28
|
+
// Try to load existing history
|
|
29
|
+
const content = await vfs.readFile(this.historyFile);
|
|
30
|
+
const data = JSON.parse(content.toString('utf-8'));
|
|
31
|
+
this.history = new Map(Object.entries(data));
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
// No existing history or VFS not available, start fresh
|
|
35
|
+
this.history = new Map();
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Record an import
|
|
40
|
+
*/
|
|
41
|
+
async recordImport(importId, source, result) {
|
|
42
|
+
const entry = {
|
|
43
|
+
importId,
|
|
44
|
+
timestamp: Date.now(),
|
|
45
|
+
source,
|
|
46
|
+
result,
|
|
47
|
+
entities: result.entities.map(e => e.id),
|
|
48
|
+
relationships: result.relationships.map(r => r.id),
|
|
49
|
+
vfsPaths: result.vfs.files.map(f => f.path),
|
|
50
|
+
status: result.stats.entitiesExtracted > 0 ? 'success' : 'partial'
|
|
51
|
+
};
|
|
52
|
+
this.history.set(importId, entry);
|
|
53
|
+
// Persist to VFS
|
|
54
|
+
await this.persist();
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Get import history
|
|
58
|
+
*/
|
|
59
|
+
getHistory() {
|
|
60
|
+
return Array.from(this.history.values()).sort((a, b) => b.timestamp - a.timestamp);
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Get specific import
|
|
64
|
+
*/
|
|
65
|
+
getImport(importId) {
|
|
66
|
+
return this.history.get(importId) || null;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Rollback an import (delete all entities, relationships, VFS files)
|
|
70
|
+
*/
|
|
71
|
+
async rollback(importId) {
|
|
72
|
+
const entry = this.history.get(importId);
|
|
73
|
+
if (!entry) {
|
|
74
|
+
throw new Error(`Import ${importId} not found in history`);
|
|
75
|
+
}
|
|
76
|
+
const result = {
|
|
77
|
+
success: true,
|
|
78
|
+
entitiesDeleted: 0,
|
|
79
|
+
relationshipsDeleted: 0,
|
|
80
|
+
vfsFilesDeleted: 0,
|
|
81
|
+
errors: []
|
|
82
|
+
};
|
|
83
|
+
// Delete relationships first
|
|
84
|
+
for (const relId of entry.relationships) {
|
|
85
|
+
try {
|
|
86
|
+
await this.brain.unrelate(relId);
|
|
87
|
+
result.relationshipsDeleted++;
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
result.errors.push(`Failed to delete relationship ${relId}: ${error instanceof Error ? error.message : String(error)}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// Delete entities
|
|
94
|
+
for (const entityId of entry.entities) {
|
|
95
|
+
try {
|
|
96
|
+
await this.brain.delete(entityId);
|
|
97
|
+
result.entitiesDeleted++;
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
result.errors.push(`Failed to delete entity ${entityId}: ${error instanceof Error ? error.message : String(error)}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Delete VFS files
|
|
104
|
+
try {
|
|
105
|
+
const vfs = this.brain.vfs();
|
|
106
|
+
await vfs.init();
|
|
107
|
+
for (const vfsPath of entry.vfsPaths) {
|
|
108
|
+
try {
|
|
109
|
+
await vfs.unlink(vfsPath);
|
|
110
|
+
result.vfsFilesDeleted++;
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
// File might not exist or VFS unavailable
|
|
114
|
+
result.errors.push(`Failed to delete VFS file ${vfsPath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// Try to delete VFS root directory if empty
|
|
118
|
+
try {
|
|
119
|
+
const rootPath = entry.result.vfs.rootPath;
|
|
120
|
+
const contents = await vfs.readdir(rootPath);
|
|
121
|
+
if (contents.length === 0) {
|
|
122
|
+
await vfs.rmdir(rootPath);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
catch (error) {
|
|
126
|
+
// Ignore errors for directory cleanup
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
result.errors.push(`VFS cleanup failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
131
|
+
}
|
|
132
|
+
// Remove from history
|
|
133
|
+
this.history.delete(importId);
|
|
134
|
+
// Persist updated history
|
|
135
|
+
await this.persist();
|
|
136
|
+
result.success = result.errors.length === 0;
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Get import statistics
|
|
141
|
+
*/
|
|
142
|
+
getStatistics() {
|
|
143
|
+
const history = Array.from(this.history.values());
|
|
144
|
+
return {
|
|
145
|
+
totalImports: history.length,
|
|
146
|
+
totalEntities: history.reduce((sum, h) => sum + h.entities.length, 0),
|
|
147
|
+
totalRelationships: history.reduce((sum, h) => sum + h.relationships.length, 0),
|
|
148
|
+
byFormat: history.reduce((acc, h) => {
|
|
149
|
+
acc[h.source.format] = (acc[h.source.format] || 0) + 1;
|
|
150
|
+
return acc;
|
|
151
|
+
}, {}),
|
|
152
|
+
byStatus: history.reduce((acc, h) => {
|
|
153
|
+
acc[h.status] = (acc[h.status] || 0) + 1;
|
|
154
|
+
return acc;
|
|
155
|
+
}, {})
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Persist history to VFS
|
|
160
|
+
*/
|
|
161
|
+
async persist() {
|
|
162
|
+
try {
|
|
163
|
+
const vfs = this.brain.vfs();
|
|
164
|
+
await vfs.init();
|
|
165
|
+
// Ensure directory exists
|
|
166
|
+
const dir = this.historyFile.substring(0, this.historyFile.lastIndexOf('/'));
|
|
167
|
+
try {
|
|
168
|
+
await vfs.mkdir(dir, { recursive: true });
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
// Directory might exist
|
|
172
|
+
}
|
|
173
|
+
// Convert Map to object for JSON
|
|
174
|
+
const data = Object.fromEntries(this.history);
|
|
175
|
+
await vfs.writeFile(this.historyFile, JSON.stringify(data, null, 2));
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
// VFS might not be available, continue without persistence
|
|
179
|
+
console.warn('Failed to persist import history:', error instanceof Error ? error.message : String(error));
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=ImportHistory.js.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Import System
|
|
3
|
+
*
|
|
4
|
+
* Single entry point for importing any file format into Brainy with:
|
|
5
|
+
* - Auto-detection of formats
|
|
6
|
+
* - Dual storage (VFS + Knowledge Graph)
|
|
7
|
+
* - Shared entities across imports (deduplication)
|
|
8
|
+
* - Simple, powerful API
|
|
9
|
+
*/
|
|
10
|
+
export { ImportCoordinator } from './ImportCoordinator.js';
|
|
11
|
+
export { FormatDetector, SupportedFormat, DetectionResult } from './FormatDetector.js';
|
|
12
|
+
export { EntityDeduplicator } from './EntityDeduplicator.js';
|
|
13
|
+
export { ImportHistory } from './ImportHistory.js';
|
|
14
|
+
export type { ImportSource, ImportOptions, ImportProgress, ImportResult } from './ImportCoordinator.js';
|
|
15
|
+
export type { EntityCandidate, DuplicateMatch, EntityDeduplicationOptions, MergeResult } from './EntityDeduplicator.js';
|
|
16
|
+
export type { ImportHistoryEntry, RollbackResult } from './ImportHistory.js';
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Import System
|
|
3
|
+
*
|
|
4
|
+
* Single entry point for importing any file format into Brainy with:
|
|
5
|
+
* - Auto-detection of formats
|
|
6
|
+
* - Dual storage (VFS + Knowledge Graph)
|
|
7
|
+
* - Shared entities across imports (deduplication)
|
|
8
|
+
* - Simple, powerful API
|
|
9
|
+
*/
|
|
10
|
+
export { ImportCoordinator } from './ImportCoordinator.js';
|
|
11
|
+
export { FormatDetector } from './FormatDetector.js';
|
|
12
|
+
export { EntityDeduplicator } from './EntityDeduplicator.js';
|
|
13
|
+
export { ImportHistory } from './ImportHistory.js';
|
|
14
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart CSV Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from CSV files using:
|
|
5
|
+
* - NeuralEntityExtractor for entity extraction
|
|
6
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
7
|
+
* - brain.extractConcepts() for tagging
|
|
8
|
+
*
|
|
9
|
+
* Very similar to SmartExcelImporter but handles CSV-specific features
|
|
10
|
+
*
|
|
11
|
+
* NO MOCKS - Production-ready implementation
|
|
12
|
+
*/
|
|
13
|
+
import { Brainy } from '../brainy.js';
|
|
14
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
15
|
+
import type { FormatHandlerOptions } from '../augmentations/intelligentImport/types.js';
|
|
16
|
+
export interface SmartCSVOptions extends FormatHandlerOptions {
|
|
17
|
+
/** Enable neural entity extraction */
|
|
18
|
+
enableNeuralExtraction?: boolean;
|
|
19
|
+
/** Enable relationship inference from text */
|
|
20
|
+
enableRelationshipInference?: boolean;
|
|
21
|
+
/** Enable concept extraction for tagging */
|
|
22
|
+
enableConceptExtraction?: boolean;
|
|
23
|
+
/** Confidence threshold for entities (0-1) */
|
|
24
|
+
confidenceThreshold?: number;
|
|
25
|
+
/** Column name patterns to detect */
|
|
26
|
+
termColumn?: string;
|
|
27
|
+
definitionColumn?: string;
|
|
28
|
+
typeColumn?: string;
|
|
29
|
+
relatedColumn?: string;
|
|
30
|
+
/** CSV-specific options */
|
|
31
|
+
csvDelimiter?: string;
|
|
32
|
+
csvHeaders?: boolean;
|
|
33
|
+
/** Progress callback */
|
|
34
|
+
onProgress?: (stats: {
|
|
35
|
+
processed: number;
|
|
36
|
+
total: number;
|
|
37
|
+
entities: number;
|
|
38
|
+
relationships: number;
|
|
39
|
+
}) => void;
|
|
40
|
+
}
|
|
41
|
+
export interface ExtractedRow {
|
|
42
|
+
/** Main entity from this row */
|
|
43
|
+
entity: {
|
|
44
|
+
id: string;
|
|
45
|
+
name: string;
|
|
46
|
+
type: NounType;
|
|
47
|
+
description: string;
|
|
48
|
+
confidence: number;
|
|
49
|
+
metadata: Record<string, any>;
|
|
50
|
+
};
|
|
51
|
+
/** Additional entities extracted from definition */
|
|
52
|
+
relatedEntities: Array<{
|
|
53
|
+
name: string;
|
|
54
|
+
type: NounType;
|
|
55
|
+
confidence: number;
|
|
56
|
+
}>;
|
|
57
|
+
/** Inferred relationships */
|
|
58
|
+
relationships: Array<{
|
|
59
|
+
from: string;
|
|
60
|
+
to: string;
|
|
61
|
+
type: VerbType;
|
|
62
|
+
confidence: number;
|
|
63
|
+
evidence: string;
|
|
64
|
+
}>;
|
|
65
|
+
/** Extracted concepts */
|
|
66
|
+
concepts?: string[];
|
|
67
|
+
}
|
|
68
|
+
export interface SmartCSVResult {
|
|
69
|
+
/** Total rows processed */
|
|
70
|
+
rowsProcessed: number;
|
|
71
|
+
/** Entities extracted (includes main + related) */
|
|
72
|
+
entitiesExtracted: number;
|
|
73
|
+
/** Relationships inferred */
|
|
74
|
+
relationshipsInferred: number;
|
|
75
|
+
/** All extracted data */
|
|
76
|
+
rows: ExtractedRow[];
|
|
77
|
+
/** Entity ID mapping (name -> ID) */
|
|
78
|
+
entityMap: Map<string, string>;
|
|
79
|
+
/** Processing time in ms */
|
|
80
|
+
processingTime: number;
|
|
81
|
+
/** Extraction statistics */
|
|
82
|
+
stats: {
|
|
83
|
+
byType: Record<string, number>;
|
|
84
|
+
byConfidence: {
|
|
85
|
+
high: number;
|
|
86
|
+
medium: number;
|
|
87
|
+
low: number;
|
|
88
|
+
};
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* SmartCSVImporter - Extracts structured knowledge from CSV files
|
|
93
|
+
*/
|
|
94
|
+
export declare class SmartCSVImporter {
|
|
95
|
+
private brain;
|
|
96
|
+
private extractor;
|
|
97
|
+
private nlp;
|
|
98
|
+
private csvHandler;
|
|
99
|
+
constructor(brain: Brainy);
|
|
100
|
+
/**
|
|
101
|
+
* Initialize the importer
|
|
102
|
+
*/
|
|
103
|
+
init(): Promise<void>;
|
|
104
|
+
/**
|
|
105
|
+
* Extract entities and relationships from CSV file
|
|
106
|
+
*/
|
|
107
|
+
extract(buffer: Buffer, options?: SmartCSVOptions): Promise<SmartCSVResult>;
|
|
108
|
+
/**
|
|
109
|
+
* Detect column names from first row
|
|
110
|
+
*/
|
|
111
|
+
private detectColumns;
|
|
112
|
+
/**
|
|
113
|
+
* Get value from row using column name
|
|
114
|
+
*/
|
|
115
|
+
private getColumnValue;
|
|
116
|
+
/**
|
|
117
|
+
* Map type string to NounType
|
|
118
|
+
*/
|
|
119
|
+
private mapTypeString;
|
|
120
|
+
/**
|
|
121
|
+
* Infer relationship type from context
|
|
122
|
+
*/
|
|
123
|
+
private inferRelationship;
|
|
124
|
+
/**
|
|
125
|
+
* Generate consistent entity ID from name
|
|
126
|
+
*/
|
|
127
|
+
private generateEntityId;
|
|
128
|
+
/**
|
|
129
|
+
* Update statistics
|
|
130
|
+
*/
|
|
131
|
+
private updateStats;
|
|
132
|
+
/**
|
|
133
|
+
* Create empty result
|
|
134
|
+
*/
|
|
135
|
+
private emptyResult;
|
|
136
|
+
}
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart CSV Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from CSV files using:
|
|
5
|
+
* - NeuralEntityExtractor for entity extraction
|
|
6
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
7
|
+
* - brain.extractConcepts() for tagging
|
|
8
|
+
*
|
|
9
|
+
* Very similar to SmartExcelImporter but handles CSV-specific features
|
|
10
|
+
*
|
|
11
|
+
* NO MOCKS - Production-ready implementation
|
|
12
|
+
*/
|
|
13
|
+
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
14
|
+
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
15
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
16
|
+
import { CSVHandler } from '../augmentations/intelligentImport/handlers/csvHandler.js';
|
|
17
|
+
/**
|
|
18
|
+
* SmartCSVImporter - Extracts structured knowledge from CSV files
|
|
19
|
+
*/
|
|
20
|
+
export class SmartCSVImporter {
|
|
21
|
+
constructor(brain) {
|
|
22
|
+
this.brain = brain;
|
|
23
|
+
this.extractor = new NeuralEntityExtractor(brain);
|
|
24
|
+
this.nlp = new NaturalLanguageProcessor(brain);
|
|
25
|
+
this.csvHandler = new CSVHandler();
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Initialize the importer
|
|
29
|
+
*/
|
|
30
|
+
async init() {
|
|
31
|
+
await this.nlp.init();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Extract entities and relationships from CSV file
|
|
35
|
+
*/
|
|
36
|
+
async extract(buffer, options = {}) {
|
|
37
|
+
const startTime = Date.now();
|
|
38
|
+
// Set defaults
|
|
39
|
+
const opts = {
|
|
40
|
+
enableNeuralExtraction: true,
|
|
41
|
+
enableRelationshipInference: true,
|
|
42
|
+
enableConceptExtraction: true,
|
|
43
|
+
confidenceThreshold: 0.6,
|
|
44
|
+
termColumn: 'term|name|title|concept|entity',
|
|
45
|
+
definitionColumn: 'definition|description|desc|details|text',
|
|
46
|
+
typeColumn: 'type|category|kind|class',
|
|
47
|
+
relatedColumn: 'related|see also|links|references',
|
|
48
|
+
csvDelimiter: undefined,
|
|
49
|
+
csvHeaders: true,
|
|
50
|
+
onProgress: () => { },
|
|
51
|
+
...options
|
|
52
|
+
};
|
|
53
|
+
// Parse CSV using existing handler
|
|
54
|
+
const processedData = await this.csvHandler.process(buffer, {
|
|
55
|
+
...options,
|
|
56
|
+
csvDelimiter: opts.csvDelimiter,
|
|
57
|
+
csvHeaders: opts.csvHeaders
|
|
58
|
+
});
|
|
59
|
+
const rows = processedData.data;
|
|
60
|
+
if (rows.length === 0) {
|
|
61
|
+
return this.emptyResult(startTime);
|
|
62
|
+
}
|
|
63
|
+
// Detect column names
|
|
64
|
+
const columns = this.detectColumns(rows[0], opts);
|
|
65
|
+
// Process each row
|
|
66
|
+
const extractedRows = [];
|
|
67
|
+
const entityMap = new Map();
|
|
68
|
+
const stats = {
|
|
69
|
+
byType: {},
|
|
70
|
+
byConfidence: { high: 0, medium: 0, low: 0 }
|
|
71
|
+
};
|
|
72
|
+
for (let i = 0; i < rows.length; i++) {
|
|
73
|
+
const row = rows[i];
|
|
74
|
+
// Extract data from row
|
|
75
|
+
const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
|
|
76
|
+
const definition = this.getColumnValue(row, columns.definition) || '';
|
|
77
|
+
const type = this.getColumnValue(row, columns.type);
|
|
78
|
+
const relatedTerms = this.getColumnValue(row, columns.related);
|
|
79
|
+
// Extract entities from definition
|
|
80
|
+
let relatedEntities = [];
|
|
81
|
+
if (opts.enableNeuralExtraction && definition) {
|
|
82
|
+
relatedEntities = await this.extractor.extract(definition, {
|
|
83
|
+
confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
|
|
84
|
+
neuralMatching: true,
|
|
85
|
+
cache: { enabled: true }
|
|
86
|
+
});
|
|
87
|
+
// Filter out the main term from related entities
|
|
88
|
+
relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
|
|
89
|
+
}
|
|
90
|
+
// Determine main entity type
|
|
91
|
+
const mainEntityType = type ?
|
|
92
|
+
this.mapTypeString(type) :
|
|
93
|
+
(relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
|
|
94
|
+
// Generate entity ID
|
|
95
|
+
const entityId = this.generateEntityId(term);
|
|
96
|
+
entityMap.set(term.toLowerCase(), entityId);
|
|
97
|
+
// Extract concepts
|
|
98
|
+
let concepts = [];
|
|
99
|
+
if (opts.enableConceptExtraction && definition) {
|
|
100
|
+
try {
|
|
101
|
+
concepts = await this.brain.extractConcepts(definition, { limit: 10 });
|
|
102
|
+
}
|
|
103
|
+
catch (error) {
|
|
104
|
+
concepts = [];
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Create main entity
|
|
108
|
+
const mainEntity = {
|
|
109
|
+
id: entityId,
|
|
110
|
+
name: term,
|
|
111
|
+
type: mainEntityType,
|
|
112
|
+
description: definition,
|
|
113
|
+
confidence: 0.95, // Main entity from row has high confidence
|
|
114
|
+
metadata: {
|
|
115
|
+
source: 'csv',
|
|
116
|
+
row: i + 1,
|
|
117
|
+
originalData: row,
|
|
118
|
+
concepts,
|
|
119
|
+
extractedAt: Date.now()
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
// Track statistics
|
|
123
|
+
this.updateStats(stats, mainEntityType, mainEntity.confidence);
|
|
124
|
+
// Infer relationships
|
|
125
|
+
const relationships = [];
|
|
126
|
+
if (opts.enableRelationshipInference) {
|
|
127
|
+
// Extract relationships from definition text
|
|
128
|
+
for (const relEntity of relatedEntities) {
|
|
129
|
+
const verbType = await this.inferRelationship(term, relEntity.text, definition);
|
|
130
|
+
relationships.push({
|
|
131
|
+
from: entityId,
|
|
132
|
+
to: relEntity.text,
|
|
133
|
+
type: verbType,
|
|
134
|
+
confidence: relEntity.confidence,
|
|
135
|
+
evidence: `Extracted from: "${definition.substring(0, 100)}..."`
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
// Parse explicit "Related" column
|
|
139
|
+
if (relatedTerms) {
|
|
140
|
+
const terms = relatedTerms.split(/[,;|]/).map(t => t.trim()).filter(Boolean);
|
|
141
|
+
for (const relTerm of terms) {
|
|
142
|
+
// Ensure we don't create self-relationships
|
|
143
|
+
if (relTerm.toLowerCase() !== term.toLowerCase()) {
|
|
144
|
+
relationships.push({
|
|
145
|
+
from: entityId,
|
|
146
|
+
to: relTerm,
|
|
147
|
+
type: VerbType.RelatedTo,
|
|
148
|
+
confidence: 0.9,
|
|
149
|
+
evidence: `Explicitly listed in "Related" column`
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// Add extracted row
|
|
156
|
+
extractedRows.push({
|
|
157
|
+
entity: mainEntity,
|
|
158
|
+
relatedEntities: relatedEntities.map(e => ({
|
|
159
|
+
name: e.text,
|
|
160
|
+
type: e.type,
|
|
161
|
+
confidence: e.confidence
|
|
162
|
+
})),
|
|
163
|
+
relationships,
|
|
164
|
+
concepts
|
|
165
|
+
});
|
|
166
|
+
// Report progress
|
|
167
|
+
opts.onProgress({
|
|
168
|
+
processed: i + 1,
|
|
169
|
+
total: rows.length,
|
|
170
|
+
entities: extractedRows.length + relatedEntities.length,
|
|
171
|
+
relationships: relationships.length
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
return {
|
|
175
|
+
rowsProcessed: rows.length,
|
|
176
|
+
entitiesExtracted: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
|
|
177
|
+
relationshipsInferred: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
|
|
178
|
+
rows: extractedRows,
|
|
179
|
+
entityMap,
|
|
180
|
+
processingTime: Date.now() - startTime,
|
|
181
|
+
stats
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Detect column names from first row
|
|
186
|
+
*/
|
|
187
|
+
detectColumns(firstRow, options) {
|
|
188
|
+
const columnNames = Object.keys(firstRow);
|
|
189
|
+
const matchColumn = (pattern) => {
|
|
190
|
+
const regex = new RegExp(pattern, 'i');
|
|
191
|
+
return columnNames.find(col => regex.test(col)) || null;
|
|
192
|
+
};
|
|
193
|
+
return {
|
|
194
|
+
term: matchColumn(options.termColumn || 'term|name'),
|
|
195
|
+
definition: matchColumn(options.definitionColumn || 'definition|description'),
|
|
196
|
+
type: matchColumn(options.typeColumn || 'type|category'),
|
|
197
|
+
related: matchColumn(options.relatedColumn || 'related|see also')
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Get value from row using column name
|
|
202
|
+
*/
|
|
203
|
+
getColumnValue(row, columnName) {
|
|
204
|
+
if (!columnName)
|
|
205
|
+
return '';
|
|
206
|
+
const value = row[columnName];
|
|
207
|
+
if (value === null || value === undefined)
|
|
208
|
+
return '';
|
|
209
|
+
return String(value).trim();
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Map type string to NounType
|
|
213
|
+
*/
|
|
214
|
+
mapTypeString(typeString) {
|
|
215
|
+
const normalized = typeString.toLowerCase().trim();
|
|
216
|
+
const mapping = {
|
|
217
|
+
'person': NounType.Person,
|
|
218
|
+
'character': NounType.Person,
|
|
219
|
+
'people': NounType.Person,
|
|
220
|
+
'place': NounType.Location,
|
|
221
|
+
'location': NounType.Location,
|
|
222
|
+
'geography': NounType.Location,
|
|
223
|
+
'organization': NounType.Organization,
|
|
224
|
+
'org': NounType.Organization,
|
|
225
|
+
'company': NounType.Organization,
|
|
226
|
+
'concept': NounType.Concept,
|
|
227
|
+
'idea': NounType.Concept,
|
|
228
|
+
'theory': NounType.Concept,
|
|
229
|
+
'event': NounType.Event,
|
|
230
|
+
'occurrence': NounType.Event,
|
|
231
|
+
'product': NounType.Product,
|
|
232
|
+
'item': NounType.Product,
|
|
233
|
+
'thing': NounType.Thing,
|
|
234
|
+
'document': NounType.Document,
|
|
235
|
+
'file': NounType.File,
|
|
236
|
+
'project': NounType.Project
|
|
237
|
+
};
|
|
238
|
+
return mapping[normalized] || NounType.Thing;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Infer relationship type from context
|
|
242
|
+
*/
|
|
243
|
+
async inferRelationship(fromTerm, toTerm, context) {
|
|
244
|
+
const lowerContext = context.toLowerCase();
|
|
245
|
+
// Pattern-based relationship detection
|
|
246
|
+
const patterns = [
|
|
247
|
+
[new RegExp(`${toTerm}.*of.*${fromTerm}`, 'i'), VerbType.PartOf],
|
|
248
|
+
[new RegExp(`${fromTerm}.*contains.*${toTerm}`, 'i'), VerbType.Contains],
|
|
249
|
+
[new RegExp(`located in.*${toTerm}`, 'i'), VerbType.LocatedAt],
|
|
250
|
+
[new RegExp(`ruled by.*${toTerm}`, 'i'), VerbType.Owns],
|
|
251
|
+
[new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
|
|
252
|
+
[new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
253
|
+
[new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
254
|
+
[new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
|
|
255
|
+
[new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
|
|
256
|
+
];
|
|
257
|
+
for (const [pattern, verbType] of patterns) {
|
|
258
|
+
if (pattern.test(lowerContext)) {
|
|
259
|
+
return verbType;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Default to RelatedTo
|
|
263
|
+
return VerbType.RelatedTo;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Generate consistent entity ID from name
|
|
267
|
+
*/
|
|
268
|
+
generateEntityId(name) {
|
|
269
|
+
const normalized = name.toLowerCase().trim().replace(/\s+/g, '_');
|
|
270
|
+
return `ent_${normalized}_${Date.now()}`;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Update statistics
|
|
274
|
+
*/
|
|
275
|
+
updateStats(stats, type, confidence) {
|
|
276
|
+
// Track by type
|
|
277
|
+
const typeName = String(type);
|
|
278
|
+
stats.byType[typeName] = (stats.byType[typeName] || 0) + 1;
|
|
279
|
+
// Track by confidence
|
|
280
|
+
if (confidence > 0.8) {
|
|
281
|
+
stats.byConfidence.high++;
|
|
282
|
+
}
|
|
283
|
+
else if (confidence >= 0.6) {
|
|
284
|
+
stats.byConfidence.medium++;
|
|
285
|
+
}
|
|
286
|
+
else {
|
|
287
|
+
stats.byConfidence.low++;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Create empty result
|
|
292
|
+
*/
|
|
293
|
+
emptyResult(startTime) {
|
|
294
|
+
return {
|
|
295
|
+
rowsProcessed: 0,
|
|
296
|
+
entitiesExtracted: 0,
|
|
297
|
+
relationshipsInferred: 0,
|
|
298
|
+
rows: [],
|
|
299
|
+
entityMap: new Map(),
|
|
300
|
+
processingTime: Date.now() - startTime,
|
|
301
|
+
stats: {
|
|
302
|
+
byType: {},
|
|
303
|
+
byConfidence: { high: 0, medium: 0, low: 0 }
|
|
304
|
+
}
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
//# sourceMappingURL=SmartCSVImporter.js.map
|