@soulcraft/brainy 4.1.4 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +102 -4
- package/dist/import/ImportCoordinator.js +248 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/dist/utils/metadataIndex.d.ts +22 -0
- package/dist/utils/metadataIndex.js +76 -0
- package/package.json +4 -1
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart DOCX Importer
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from Word documents using:
|
|
5
|
+
* - Mammoth parser for DOCX → HTML/text conversion
|
|
6
|
+
* - Heading extraction for document structure
|
|
7
|
+
* - Table extraction for structured data
|
|
8
|
+
* - NeuralEntityExtractor for entity extraction from paragraphs
|
|
9
|
+
* - NaturalLanguageProcessor for relationship inference
|
|
10
|
+
* - Hierarchical relationship creation based on heading hierarchy
|
|
11
|
+
*
|
|
12
|
+
* v4.2.0: New format handler
|
|
13
|
+
* NO MOCKS - Production-ready implementation
|
|
14
|
+
*/
|
|
15
|
+
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
16
|
+
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
17
|
+
import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
|
|
18
|
+
import { VerbType } from '../types/graphTypes.js';
|
|
19
|
+
// Dynamic import for mammoth (ESM compatibility)
|
|
20
|
+
let mammoth;
|
|
21
|
+
/**
|
|
22
|
+
* SmartDOCXImporter - Extracts structured knowledge from Word documents
|
|
23
|
+
*/
|
|
24
|
+
export class SmartDOCXImporter {
|
|
25
|
+
constructor(brain) {
|
|
26
|
+
this.mammothLoaded = false;
|
|
27
|
+
this.brain = brain;
|
|
28
|
+
this.extractor = new NeuralEntityExtractor(brain);
|
|
29
|
+
this.nlp = new NaturalLanguageProcessor(brain);
|
|
30
|
+
this.relationshipExtractor = new SmartRelationshipExtractor(brain);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Initialize the importer
|
|
34
|
+
*/
|
|
35
|
+
async init() {
|
|
36
|
+
await this.nlp.init();
|
|
37
|
+
// Lazy load mammoth
|
|
38
|
+
if (!this.mammothLoaded) {
|
|
39
|
+
try {
|
|
40
|
+
mammoth = await import('mammoth');
|
|
41
|
+
this.mammothLoaded = true;
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
throw new Error(`Failed to load mammoth parser: ${error.message}`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Extract entities and relationships from DOCX buffer
|
|
50
|
+
*/
|
|
51
|
+
async extract(buffer, options = {}) {
|
|
52
|
+
const startTime = Date.now();
|
|
53
|
+
// Ensure mammoth is loaded
|
|
54
|
+
if (!this.mammothLoaded) {
|
|
55
|
+
await this.init();
|
|
56
|
+
}
|
|
57
|
+
// Extract raw text for entity extraction
|
|
58
|
+
const textResult = await mammoth.extractRawText({ buffer });
|
|
59
|
+
// Extract HTML for structure analysis (headings, tables)
|
|
60
|
+
const htmlResult = await mammoth.convertToHtml({ buffer });
|
|
61
|
+
// Process the document
|
|
62
|
+
const result = await this.extractFromContent(textResult.value, htmlResult.value, options);
|
|
63
|
+
result.processingTime = Date.now() - startTime;
|
|
64
|
+
return result;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Extract entities and relationships from parsed DOCX content
|
|
68
|
+
*/
|
|
69
|
+
async extractFromContent(rawText, html, options) {
|
|
70
|
+
const opts = {
|
|
71
|
+
enableNeuralExtraction: options.enableNeuralExtraction !== false,
|
|
72
|
+
enableHierarchicalRelationships: options.enableHierarchicalRelationships !== false,
|
|
73
|
+
enableConceptExtraction: options.enableConceptExtraction !== false,
|
|
74
|
+
confidenceThreshold: options.confidenceThreshold || 0.6,
|
|
75
|
+
minParagraphLength: options.minParagraphLength || 20
|
|
76
|
+
};
|
|
77
|
+
const entities = [];
|
|
78
|
+
const relationships = [];
|
|
79
|
+
const entityMap = new Map();
|
|
80
|
+
const stats = {
|
|
81
|
+
byType: {},
|
|
82
|
+
bySection: {},
|
|
83
|
+
byConfidence: { high: 0, medium: 0, low: 0 }
|
|
84
|
+
};
|
|
85
|
+
// Parse document structure from HTML
|
|
86
|
+
const structure = this.parseStructure(html);
|
|
87
|
+
// Split into paragraphs
|
|
88
|
+
const paragraphs = rawText.split(/\n\n+/).filter(p => p.trim().length >= opts.minParagraphLength);
|
|
89
|
+
let currentSection = 'Introduction';
|
|
90
|
+
let headingIndex = 0;
|
|
91
|
+
// Process each paragraph
|
|
92
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
93
|
+
const paragraph = paragraphs[i].trim();
|
|
94
|
+
// Check if this paragraph is a heading
|
|
95
|
+
if (headingIndex < structure.headings.length) {
|
|
96
|
+
const heading = structure.headings[headingIndex];
|
|
97
|
+
if (paragraph.startsWith(heading.text) || heading.text.includes(paragraph.substring(0, 50))) {
|
|
98
|
+
currentSection = heading.text;
|
|
99
|
+
headingIndex++;
|
|
100
|
+
stats.bySection[currentSection] = 0;
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// Extract entities from paragraph
|
|
105
|
+
if (opts.enableNeuralExtraction) {
|
|
106
|
+
const extractedEntities = await this.extractor.extract(paragraph, {
|
|
107
|
+
confidence: opts.confidenceThreshold
|
|
108
|
+
});
|
|
109
|
+
for (const extracted of extractedEntities) {
|
|
110
|
+
const entityId = `para${i}:${extracted.text}`;
|
|
111
|
+
const entity = {
|
|
112
|
+
id: entityId,
|
|
113
|
+
name: extracted.text,
|
|
114
|
+
type: extracted.type,
|
|
115
|
+
description: paragraph,
|
|
116
|
+
confidence: extracted.confidence,
|
|
117
|
+
weight: extracted.weight || 1.0,
|
|
118
|
+
section: currentSection,
|
|
119
|
+
paragraphIndex: i,
|
|
120
|
+
metadata: {
|
|
121
|
+
position: extracted.position,
|
|
122
|
+
headingContext: currentSection
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
entities.push(entity);
|
|
126
|
+
entityMap.set(entityId, entityId);
|
|
127
|
+
// Update stats
|
|
128
|
+
stats.byType[entity.type] = (stats.byType[entity.type] || 0) + 1;
|
|
129
|
+
stats.bySection[currentSection] = (stats.bySection[currentSection] || 0) + 1;
|
|
130
|
+
if (entity.confidence > 0.8)
|
|
131
|
+
stats.byConfidence.high++;
|
|
132
|
+
else if (entity.confidence >= 0.6)
|
|
133
|
+
stats.byConfidence.medium++;
|
|
134
|
+
else
|
|
135
|
+
stats.byConfidence.low++;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Report progress
|
|
139
|
+
if (options.onProgress && i % 10 === 0) {
|
|
140
|
+
options.onProgress({
|
|
141
|
+
processed: i,
|
|
142
|
+
entities: entities.length,
|
|
143
|
+
relationships: relationships.length
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Create hierarchical relationships based on sections
|
|
148
|
+
if (opts.enableHierarchicalRelationships) {
|
|
149
|
+
const entitiesBySection = new Map();
|
|
150
|
+
for (const entity of entities) {
|
|
151
|
+
const section = entity.section || 'Unknown';
|
|
152
|
+
if (!entitiesBySection.has(section)) {
|
|
153
|
+
entitiesBySection.set(section, []);
|
|
154
|
+
}
|
|
155
|
+
entitiesBySection.get(section).push(entity);
|
|
156
|
+
}
|
|
157
|
+
// Create relationships within sections
|
|
158
|
+
for (const [section, sectionEntities] of entitiesBySection) {
|
|
159
|
+
for (let i = 0; i < sectionEntities.length - 1; i++) {
|
|
160
|
+
for (let j = i + 1; j < Math.min(i + 3, sectionEntities.length); j++) {
|
|
161
|
+
const entityA = sectionEntities[i];
|
|
162
|
+
const entityB = sectionEntities[j];
|
|
163
|
+
// Infer relationship type using SmartRelationshipExtractor
|
|
164
|
+
// Combine entity descriptions for better context
|
|
165
|
+
const context = `In section "${section}": ${entityA.description.substring(0, 150)}... ${entityB.description.substring(0, 150)}...`;
|
|
166
|
+
const inferredRelationship = await this.relationshipExtractor.infer(entityA.name, entityB.name, context, {
|
|
167
|
+
subjectType: entityA.type,
|
|
168
|
+
objectType: entityB.type
|
|
169
|
+
});
|
|
170
|
+
relationships.push({
|
|
171
|
+
from: entityA.id,
|
|
172
|
+
to: entityB.id,
|
|
173
|
+
type: inferredRelationship?.type || VerbType.RelatedTo, // Fallback to RelatedTo for co-occurrence
|
|
174
|
+
confidence: inferredRelationship?.confidence || 0.7,
|
|
175
|
+
weight: inferredRelationship?.weight || 0.8,
|
|
176
|
+
evidence: inferredRelationship?.evidence || `Both entities appear in section: ${section}`
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Final progress report
|
|
183
|
+
if (options.onProgress) {
|
|
184
|
+
options.onProgress({
|
|
185
|
+
processed: paragraphs.length,
|
|
186
|
+
entities: entities.length,
|
|
187
|
+
relationships: relationships.length
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
paragraphsProcessed: paragraphs.length,
|
|
192
|
+
entitiesExtracted: entities.length,
|
|
193
|
+
relationshipsInferred: relationships.length,
|
|
194
|
+
entities,
|
|
195
|
+
relationships,
|
|
196
|
+
entityMap,
|
|
197
|
+
processingTime: 0, // Will be set by caller
|
|
198
|
+
structure,
|
|
199
|
+
stats
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Parse document structure from HTML
|
|
204
|
+
*/
|
|
205
|
+
parseStructure(html) {
|
|
206
|
+
const headings = [];
|
|
207
|
+
// Extract headings (h1-h6)
|
|
208
|
+
const headingRegex = /<h([1-6])>(.*?)<\/h\1>/gi;
|
|
209
|
+
let match;
|
|
210
|
+
let index = 0;
|
|
211
|
+
while ((match = headingRegex.exec(html)) !== null) {
|
|
212
|
+
const level = parseInt(match[1]);
|
|
213
|
+
const text = match[2].replace(/<[^>]+>/g, '').trim(); // Strip HTML tags
|
|
214
|
+
headings.push({ level, text, index: index++ });
|
|
215
|
+
}
|
|
216
|
+
// Count paragraphs
|
|
217
|
+
const paragraphCount = (html.match(/<p>/g) || []).length;
|
|
218
|
+
// Count tables
|
|
219
|
+
const tableCount = (html.match(/<table>/g) || []).length;
|
|
220
|
+
return {
|
|
221
|
+
headings,
|
|
222
|
+
paragraphCount,
|
|
223
|
+
tableCount
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=SmartDOCXImporter.js.map
|
|
@@ -88,6 +88,16 @@ export interface SmartExcelResult {
|
|
|
88
88
|
low: number;
|
|
89
89
|
};
|
|
90
90
|
};
|
|
91
|
+
/** Sheet-specific data for VFS extraction (v4.2.0) */
|
|
92
|
+
sheets?: Array<{
|
|
93
|
+
name: string;
|
|
94
|
+
rows: ExtractedRow[];
|
|
95
|
+
stats: {
|
|
96
|
+
rowCount: number;
|
|
97
|
+
entityCount: number;
|
|
98
|
+
relationshipCount: number;
|
|
99
|
+
};
|
|
100
|
+
}>;
|
|
91
101
|
}
|
|
92
102
|
/**
|
|
93
103
|
* SmartExcelImporter - Extracts structured knowledge from Excel files
|
|
@@ -96,6 +106,7 @@ export declare class SmartExcelImporter {
|
|
|
96
106
|
private brain;
|
|
97
107
|
private extractor;
|
|
98
108
|
private nlp;
|
|
109
|
+
private relationshipExtractor;
|
|
99
110
|
private excelHandler;
|
|
100
111
|
constructor(brain: Brainy);
|
|
101
112
|
/**
|
|
@@ -119,7 +130,7 @@ export declare class SmartExcelImporter {
|
|
|
119
130
|
*/
|
|
120
131
|
private mapTypeString;
|
|
121
132
|
/**
|
|
122
|
-
* Infer relationship type from context
|
|
133
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
123
134
|
*/
|
|
124
135
|
private inferRelationship;
|
|
125
136
|
/**
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
12
12
|
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
13
|
+
import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
|
|
13
14
|
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
14
15
|
import { ExcelHandler } from '../augmentations/intelligentImport/handlers/excelHandler.js';
|
|
15
16
|
/**
|
|
@@ -20,6 +21,7 @@ export class SmartExcelImporter {
|
|
|
20
21
|
this.brain = brain;
|
|
21
22
|
this.extractor = new NeuralEntityExtractor(brain);
|
|
22
23
|
this.nlp = new NaturalLanguageProcessor(brain);
|
|
24
|
+
this.relationshipExtractor = new SmartRelationshipExtractor(brain);
|
|
23
25
|
this.excelHandler = new ExcelHandler();
|
|
24
26
|
}
|
|
25
27
|
/**
|
|
@@ -131,7 +133,9 @@ export class SmartExcelImporter {
|
|
|
131
133
|
if (opts.enableRelationshipInference) {
|
|
132
134
|
// Extract relationships from definition text
|
|
133
135
|
for (const relEntity of relatedEntities) {
|
|
134
|
-
const verbType = await this.inferRelationship(term, relEntity.text, definition
|
|
136
|
+
const verbType = await this.inferRelationship(term, relEntity.text, definition, mainEntityType, // Pass subject type hint
|
|
137
|
+
relEntity.type // Pass object type hint
|
|
138
|
+
);
|
|
135
139
|
relationships.push({
|
|
136
140
|
from: entityId,
|
|
137
141
|
to: relEntity.text,
|
|
@@ -145,10 +149,13 @@ export class SmartExcelImporter {
|
|
|
145
149
|
const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
|
|
146
150
|
for (const relTerm of terms) {
|
|
147
151
|
if (relTerm.toLowerCase() !== term.toLowerCase()) {
|
|
152
|
+
// Use SmartRelationshipExtractor even for explicit relationships
|
|
153
|
+
const verbType = await this.inferRelationship(term, relTerm, `${term} related to ${relTerm}. ${definition}`, // Combine for better context
|
|
154
|
+
mainEntityType);
|
|
148
155
|
relationships.push({
|
|
149
156
|
from: entityId,
|
|
150
157
|
to: relTerm,
|
|
151
|
-
type:
|
|
158
|
+
type: verbType,
|
|
152
159
|
confidence: 0.9,
|
|
153
160
|
evidence: `Explicitly listed in "Related" column`
|
|
154
161
|
});
|
|
@@ -203,6 +210,26 @@ export class SmartExcelImporter {
|
|
|
203
210
|
phase: 'extracting'
|
|
204
211
|
});
|
|
205
212
|
}
|
|
213
|
+
// Group rows by sheet for VFS extraction (v4.2.0)
|
|
214
|
+
const sheetGroups = new Map();
|
|
215
|
+
extractedRows.forEach((extractedRow, index) => {
|
|
216
|
+
const originalRow = rows[index];
|
|
217
|
+
const sheetName = originalRow._sheet || 'Sheet1';
|
|
218
|
+
if (!sheetGroups.has(sheetName)) {
|
|
219
|
+
sheetGroups.set(sheetName, []);
|
|
220
|
+
}
|
|
221
|
+
sheetGroups.get(sheetName).push(extractedRow);
|
|
222
|
+
});
|
|
223
|
+
// Build sheet-specific statistics
|
|
224
|
+
const sheets = Array.from(sheetGroups.entries()).map(([name, sheetRows]) => ({
|
|
225
|
+
name,
|
|
226
|
+
rows: sheetRows,
|
|
227
|
+
stats: {
|
|
228
|
+
rowCount: sheetRows.length,
|
|
229
|
+
entityCount: sheetRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
|
|
230
|
+
relationshipCount: sheetRows.reduce((sum, row) => sum + row.relationships.length, 0)
|
|
231
|
+
}
|
|
232
|
+
}));
|
|
206
233
|
return {
|
|
207
234
|
rowsProcessed: rows.length,
|
|
208
235
|
entitiesExtracted: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
|
|
@@ -210,7 +237,8 @@ export class SmartExcelImporter {
|
|
|
210
237
|
rows: extractedRows,
|
|
211
238
|
entityMap,
|
|
212
239
|
processingTime: Date.now() - startTime,
|
|
213
|
-
stats
|
|
240
|
+
stats,
|
|
241
|
+
sheets
|
|
214
242
|
};
|
|
215
243
|
}
|
|
216
244
|
/**
|
|
@@ -270,29 +298,16 @@ export class SmartExcelImporter {
|
|
|
270
298
|
return mapping[normalized] || NounType.Thing;
|
|
271
299
|
}
|
|
272
300
|
/**
|
|
273
|
-
* Infer relationship type from context
|
|
301
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
274
302
|
*/
|
|
275
|
-
async inferRelationship(fromTerm, toTerm, context) {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
[new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
|
|
284
|
-
[new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
285
|
-
[new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
|
|
286
|
-
[new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
|
|
287
|
-
[new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
|
|
288
|
-
];
|
|
289
|
-
for (const [pattern, verbType] of patterns) {
|
|
290
|
-
if (pattern.test(lowerContext)) {
|
|
291
|
-
return verbType;
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
// Default to RelatedTo
|
|
295
|
-
return VerbType.RelatedTo;
|
|
303
|
+
async inferRelationship(fromTerm, toTerm, context, fromType, toType) {
|
|
304
|
+
// Use SmartRelationshipExtractor for robust relationship classification
|
|
305
|
+
const result = await this.relationshipExtractor.infer(fromTerm, toTerm, context, {
|
|
306
|
+
subjectType: fromType,
|
|
307
|
+
objectType: toType
|
|
308
|
+
});
|
|
309
|
+
// Return inferred type or fallback to RelatedTo
|
|
310
|
+
return result?.type || VerbType.RelatedTo;
|
|
296
311
|
}
|
|
297
312
|
/**
|
|
298
313
|
* Generate consistent entity ID from name
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
13
13
|
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
14
|
+
import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
|
|
14
15
|
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
15
16
|
/**
|
|
16
17
|
* SmartJSONImporter - Extracts structured knowledge from JSON files
|
|
@@ -20,6 +21,7 @@ export class SmartJSONImporter {
|
|
|
20
21
|
this.brain = brain;
|
|
21
22
|
this.extractor = new NeuralEntityExtractor(brain);
|
|
22
23
|
this.nlp = new NaturalLanguageProcessor(brain);
|
|
24
|
+
this.relationshipExtractor = new SmartRelationshipExtractor(brain);
|
|
23
25
|
}
|
|
24
26
|
/**
|
|
25
27
|
* Initialize the importer
|
|
@@ -120,12 +122,20 @@ export class SmartJSONImporter {
|
|
|
120
122
|
// Create hierarchical relationship if parent exists
|
|
121
123
|
if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
|
|
122
124
|
const parentId = entityMap.get(parentPath);
|
|
125
|
+
// Extract parent and child names from paths
|
|
126
|
+
const parentName = parentPath.split('.').pop()?.replace(/\[(\d+)\]/, 'item $1') || 'parent';
|
|
127
|
+
const childName = entity.name;
|
|
128
|
+
// Infer relationship type using SmartRelationshipExtractor
|
|
129
|
+
const context = `Hierarchical JSON structure: ${parentName} contains ${childName}. Parent path: ${parentPath}, Child path: ${path}`;
|
|
130
|
+
const inferredRelationship = await this.relationshipExtractor.infer(parentName, childName, context, {
|
|
131
|
+
objectType: entity.type // Pass child entity type as hint
|
|
132
|
+
});
|
|
123
133
|
relationships.push({
|
|
124
134
|
from: parentId,
|
|
125
135
|
to: entity.id,
|
|
126
|
-
type: VerbType.Contains,
|
|
127
|
-
confidence: 0.95,
|
|
128
|
-
evidence: `Hierarchical relationship: ${parentPath} contains ${path}`
|
|
136
|
+
type: inferredRelationship?.type || VerbType.Contains, // Fallback to Contains for hierarchical relationships
|
|
137
|
+
confidence: inferredRelationship?.confidence || 0.95,
|
|
138
|
+
evidence: inferredRelationship?.evidence || `Hierarchical relationship: ${parentPath} contains ${path}`
|
|
129
139
|
});
|
|
130
140
|
}
|
|
131
141
|
}
|
|
@@ -165,12 +175,21 @@ export class SmartJSONImporter {
|
|
|
165
175
|
// Link to parent if exists
|
|
166
176
|
if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
|
|
167
177
|
const parentId = entityMap.get(parentPath);
|
|
178
|
+
// Extract parent name from path
|
|
179
|
+
const parentName = parentPath.split('.').pop()?.replace(/\[(\d+)\]/, 'item $1') || 'parent';
|
|
180
|
+
const childName = entity.name;
|
|
181
|
+
// Infer relationship type using SmartRelationshipExtractor
|
|
182
|
+
// Context: entity was extracted from string value within parent container
|
|
183
|
+
const context = `Entity "${childName}" found in text value at path ${path} within parent "${parentName}". Full text: "${node.substring(0, 200)}..."`;
|
|
184
|
+
const inferredRelationship = await this.relationshipExtractor.infer(parentName, childName, context, {
|
|
185
|
+
objectType: entity.type // Pass extracted entity type as hint
|
|
186
|
+
});
|
|
168
187
|
relationships.push({
|
|
169
188
|
from: parentId,
|
|
170
189
|
to: entity.id,
|
|
171
|
-
type: VerbType.RelatedTo,
|
|
172
|
-
confidence: extracted.confidence * 0.9,
|
|
173
|
-
evidence: `Found in: ${path}`
|
|
190
|
+
type: inferredRelationship?.type || VerbType.RelatedTo, // Fallback to RelatedTo for text extraction
|
|
191
|
+
confidence: inferredRelationship?.confidence || (extracted.confidence * 0.9),
|
|
192
|
+
evidence: inferredRelationship?.evidence || `Found in: ${path}`
|
|
174
193
|
});
|
|
175
194
|
}
|
|
176
195
|
}
|
|
@@ -107,6 +107,7 @@ export declare class SmartMarkdownImporter {
|
|
|
107
107
|
private brain;
|
|
108
108
|
private extractor;
|
|
109
109
|
private nlp;
|
|
110
|
+
private relationshipExtractor;
|
|
110
111
|
constructor(brain: Brainy);
|
|
111
112
|
/**
|
|
112
113
|
* Initialize the importer
|
|
@@ -145,7 +146,7 @@ export declare class SmartMarkdownImporter {
|
|
|
145
146
|
*/
|
|
146
147
|
private entitiesAreRelated;
|
|
147
148
|
/**
|
|
148
|
-
* Infer relationship type from context
|
|
149
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
149
150
|
*/
|
|
150
151
|
private inferRelationship;
|
|
151
152
|
/**
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
13
13
|
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
14
|
+
import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
|
|
14
15
|
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
15
16
|
/**
|
|
16
17
|
* SmartMarkdownImporter - Extracts structured knowledge from Markdown files
|
|
@@ -20,6 +21,7 @@ export class SmartMarkdownImporter {
|
|
|
20
21
|
this.brain = brain;
|
|
21
22
|
this.extractor = new NeuralEntityExtractor(brain);
|
|
22
23
|
this.nlp = new NaturalLanguageProcessor(brain);
|
|
24
|
+
this.relationshipExtractor = new SmartRelationshipExtractor(brain);
|
|
23
25
|
}
|
|
24
26
|
/**
|
|
25
27
|
* Initialize the importer
|
|
@@ -321,23 +323,16 @@ export class SmartMarkdownImporter {
|
|
|
321
323
|
return Math.abs(index1 - index2) < 300;
|
|
322
324
|
}
|
|
323
325
|
/**
|
|
324
|
-
* Infer relationship type from context
|
|
326
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
325
327
|
*/
|
|
326
|
-
async inferRelationship(fromEntity, toEntity, context) {
|
|
327
|
-
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
];
|
|
335
|
-
for (const [pattern, verbType] of patterns) {
|
|
336
|
-
if (pattern.test(lowerContext)) {
|
|
337
|
-
return verbType;
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
return VerbType.RelatedTo;
|
|
328
|
+
async inferRelationship(fromEntity, toEntity, context, fromType, toType) {
|
|
329
|
+
// Use SmartRelationshipExtractor for robust relationship classification
|
|
330
|
+
const result = await this.relationshipExtractor.infer(fromEntity, toEntity, context, {
|
|
331
|
+
subjectType: fromType,
|
|
332
|
+
objectType: toType
|
|
333
|
+
});
|
|
334
|
+
// Return inferred type or fallback to RelatedTo
|
|
335
|
+
return result?.type || VerbType.RelatedTo;
|
|
341
336
|
}
|
|
342
337
|
/**
|
|
343
338
|
* Generate consistent entity ID
|
|
@@ -111,6 +111,7 @@ export declare class SmartPDFImporter {
|
|
|
111
111
|
private brain;
|
|
112
112
|
private extractor;
|
|
113
113
|
private nlp;
|
|
114
|
+
private relationshipExtractor;
|
|
114
115
|
private pdfHandler;
|
|
115
116
|
constructor(brain: Brainy);
|
|
116
117
|
/**
|
|
@@ -142,7 +143,7 @@ export declare class SmartPDFImporter {
|
|
|
142
143
|
*/
|
|
143
144
|
private extractRelationshipContext;
|
|
144
145
|
/**
|
|
145
|
-
* Infer relationship type from context
|
|
146
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
146
147
|
*/
|
|
147
148
|
private inferRelationship;
|
|
148
149
|
/**
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
13
13
|
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
14
|
+
import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
|
|
14
15
|
import { VerbType } from '../types/graphTypes.js';
|
|
15
16
|
import { PDFHandler } from '../augmentations/intelligentImport/handlers/pdfHandler.js';
|
|
16
17
|
/**
|
|
@@ -21,6 +22,7 @@ export class SmartPDFImporter {
|
|
|
21
22
|
this.brain = brain;
|
|
22
23
|
this.extractor = new NeuralEntityExtractor(brain);
|
|
23
24
|
this.nlp = new NaturalLanguageProcessor(brain);
|
|
25
|
+
this.relationshipExtractor = new SmartRelationshipExtractor(brain);
|
|
24
26
|
this.pdfHandler = new PDFHandler();
|
|
25
27
|
}
|
|
26
28
|
/**
|
|
@@ -272,29 +274,16 @@ export class SmartPDFImporter {
|
|
|
272
274
|
return text.substring(start, end + 100).trim();
|
|
273
275
|
}
|
|
274
276
|
/**
|
|
275
|
-
* Infer relationship type from context
|
|
277
|
+
* Infer relationship type from context using SmartRelationshipExtractor
|
|
276
278
|
*/
|
|
277
|
-
async inferRelationship(fromEntity, toEntity, context) {
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
[new RegExp(`${fromEntity}.*created.*${toEntity}`, 'i'), VerbType.Creates],
|
|
286
|
-
[new RegExp(`${fromEntity}.*authored.*${toEntity}`, 'i'), VerbType.CreatedBy],
|
|
287
|
-
[new RegExp(`${fromEntity}.*part of.*${toEntity}`, 'i'), VerbType.PartOf],
|
|
288
|
-
[new RegExp(`${fromEntity}.*related to.*${toEntity}`, 'i'), VerbType.RelatedTo],
|
|
289
|
-
[new RegExp(`${fromEntity}.*and.*${toEntity}`, 'i'), VerbType.RelatedTo]
|
|
290
|
-
];
|
|
291
|
-
for (const [pattern, verbType] of patterns) {
|
|
292
|
-
if (pattern.test(lowerContext)) {
|
|
293
|
-
return verbType;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
// Default to RelatedTo
|
|
297
|
-
return VerbType.RelatedTo;
|
|
279
|
+
async inferRelationship(fromEntity, toEntity, context, fromType, toType) {
|
|
280
|
+
// Use SmartRelationshipExtractor for robust relationship classification
|
|
281
|
+
const result = await this.relationshipExtractor.infer(fromEntity, toEntity, context, {
|
|
282
|
+
subjectType: fromType,
|
|
283
|
+
objectType: toType
|
|
284
|
+
});
|
|
285
|
+
// Return inferred type or fallback to RelatedTo
|
|
286
|
+
return result?.type || VerbType.RelatedTo;
|
|
298
287
|
}
|
|
299
288
|
/**
|
|
300
289
|
* Generate consistent entity ID
|