@soulcraft/brainy 4.1.4 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/import/FormatDetector.d.ts +6 -1
  2. package/dist/import/FormatDetector.js +40 -1
  3. package/dist/import/ImportCoordinator.d.ts +102 -4
  4. package/dist/import/ImportCoordinator.js +248 -6
  5. package/dist/import/InstancePool.d.ts +136 -0
  6. package/dist/import/InstancePool.js +231 -0
  7. package/dist/importers/SmartCSVImporter.d.ts +2 -1
  8. package/dist/importers/SmartCSVImporter.js +11 -22
  9. package/dist/importers/SmartDOCXImporter.d.ts +125 -0
  10. package/dist/importers/SmartDOCXImporter.js +227 -0
  11. package/dist/importers/SmartExcelImporter.d.ts +12 -1
  12. package/dist/importers/SmartExcelImporter.js +40 -25
  13. package/dist/importers/SmartJSONImporter.d.ts +1 -0
  14. package/dist/importers/SmartJSONImporter.js +25 -6
  15. package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
  16. package/dist/importers/SmartMarkdownImporter.js +11 -16
  17. package/dist/importers/SmartPDFImporter.d.ts +2 -1
  18. package/dist/importers/SmartPDFImporter.js +11 -22
  19. package/dist/importers/SmartYAMLImporter.d.ts +121 -0
  20. package/dist/importers/SmartYAMLImporter.js +275 -0
  21. package/dist/importers/VFSStructureGenerator.js +12 -0
  22. package/dist/neural/SmartExtractor.d.ts +279 -0
  23. package/dist/neural/SmartExtractor.js +592 -0
  24. package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
  25. package/dist/neural/SmartRelationshipExtractor.js +396 -0
  26. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  27. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  28. package/dist/neural/entityExtractor.d.ts +3 -0
  29. package/dist/neural/entityExtractor.js +34 -36
  30. package/dist/neural/presets.d.ts +189 -0
  31. package/dist/neural/presets.js +365 -0
  32. package/dist/neural/signals/ContextSignal.d.ts +166 -0
  33. package/dist/neural/signals/ContextSignal.js +646 -0
  34. package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
  35. package/dist/neural/signals/EmbeddingSignal.js +435 -0
  36. package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
  37. package/dist/neural/signals/ExactMatchSignal.js +542 -0
  38. package/dist/neural/signals/PatternSignal.d.ts +159 -0
  39. package/dist/neural/signals/PatternSignal.js +478 -0
  40. package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
  41. package/dist/neural/signals/VerbContextSignal.js +390 -0
  42. package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
  43. package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
  44. package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
  45. package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
  46. package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
  47. package/dist/neural/signals/VerbPatternSignal.js +457 -0
  48. package/dist/types/graphTypes.d.ts +2 -0
  49. package/package.json +4 -1
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Smart DOCX Importer
3
+ *
4
+ * Extracts entities and relationships from Word documents using:
5
+ * - Mammoth parser for DOCX → HTML/text conversion
6
+ * - Heading extraction for document structure
7
+ * - Table extraction for structured data
8
+ * - NeuralEntityExtractor for entity extraction from paragraphs
9
+ * - NaturalLanguageProcessor for relationship inference
10
+ * - Hierarchical relationship creation based on heading hierarchy
11
+ *
12
+ * v4.2.0: New format handler
13
+ * NO MOCKS - Production-ready implementation
14
+ */
15
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
16
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
17
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
18
+ import { VerbType } from '../types/graphTypes.js';
19
+ // Dynamic import for mammoth (ESM compatibility)
20
+ let mammoth;
21
+ /**
22
+ * SmartDOCXImporter - Extracts structured knowledge from Word documents
23
+ */
24
+ export class SmartDOCXImporter {
25
+ constructor(brain) {
26
+ this.mammothLoaded = false;
27
+ this.brain = brain;
28
+ this.extractor = new NeuralEntityExtractor(brain);
29
+ this.nlp = new NaturalLanguageProcessor(brain);
30
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
31
+ }
32
+ /**
33
+ * Initialize the importer
34
+ */
35
+ async init() {
36
+ await this.nlp.init();
37
+ // Lazy load mammoth
38
+ if (!this.mammothLoaded) {
39
+ try {
40
+ mammoth = await import('mammoth');
41
+ this.mammothLoaded = true;
42
+ }
43
+ catch (error) {
44
+ throw new Error(`Failed to load mammoth parser: ${error.message}`);
45
+ }
46
+ }
47
+ }
48
+ /**
49
+ * Extract entities and relationships from DOCX buffer
50
+ */
51
+ async extract(buffer, options = {}) {
52
+ const startTime = Date.now();
53
+ // Ensure mammoth is loaded
54
+ if (!this.mammothLoaded) {
55
+ await this.init();
56
+ }
57
+ // Extract raw text for entity extraction
58
+ const textResult = await mammoth.extractRawText({ buffer });
59
+ // Extract HTML for structure analysis (headings, tables)
60
+ const htmlResult = await mammoth.convertToHtml({ buffer });
61
+ // Process the document
62
+ const result = await this.extractFromContent(textResult.value, htmlResult.value, options);
63
+ result.processingTime = Date.now() - startTime;
64
+ return result;
65
+ }
66
+ /**
67
+ * Extract entities and relationships from parsed DOCX content
68
+ */
69
+ async extractFromContent(rawText, html, options) {
70
+ const opts = {
71
+ enableNeuralExtraction: options.enableNeuralExtraction !== false,
72
+ enableHierarchicalRelationships: options.enableHierarchicalRelationships !== false,
73
+ enableConceptExtraction: options.enableConceptExtraction !== false,
74
+ confidenceThreshold: options.confidenceThreshold || 0.6,
75
+ minParagraphLength: options.minParagraphLength || 20
76
+ };
77
+ const entities = [];
78
+ const relationships = [];
79
+ const entityMap = new Map();
80
+ const stats = {
81
+ byType: {},
82
+ bySection: {},
83
+ byConfidence: { high: 0, medium: 0, low: 0 }
84
+ };
85
+ // Parse document structure from HTML
86
+ const structure = this.parseStructure(html);
87
+ // Split into paragraphs
88
+ const paragraphs = rawText.split(/\n\n+/).filter(p => p.trim().length >= opts.minParagraphLength);
89
+ let currentSection = 'Introduction';
90
+ let headingIndex = 0;
91
+ // Process each paragraph
92
+ for (let i = 0; i < paragraphs.length; i++) {
93
+ const paragraph = paragraphs[i].trim();
94
+ // Check if this paragraph is a heading
95
+ if (headingIndex < structure.headings.length) {
96
+ const heading = structure.headings[headingIndex];
97
+ if (paragraph.startsWith(heading.text) || heading.text.includes(paragraph.substring(0, 50))) {
98
+ currentSection = heading.text;
99
+ headingIndex++;
100
+ stats.bySection[currentSection] = 0;
101
+ continue;
102
+ }
103
+ }
104
+ // Extract entities from paragraph
105
+ if (opts.enableNeuralExtraction) {
106
+ const extractedEntities = await this.extractor.extract(paragraph, {
107
+ confidence: opts.confidenceThreshold
108
+ });
109
+ for (const extracted of extractedEntities) {
110
+ const entityId = `para${i}:${extracted.text}`;
111
+ const entity = {
112
+ id: entityId,
113
+ name: extracted.text,
114
+ type: extracted.type,
115
+ description: paragraph,
116
+ confidence: extracted.confidence,
117
+ weight: extracted.weight || 1.0,
118
+ section: currentSection,
119
+ paragraphIndex: i,
120
+ metadata: {
121
+ position: extracted.position,
122
+ headingContext: currentSection
123
+ }
124
+ };
125
+ entities.push(entity);
126
+ entityMap.set(entityId, entityId);
127
+ // Update stats
128
+ stats.byType[entity.type] = (stats.byType[entity.type] || 0) + 1;
129
+ stats.bySection[currentSection] = (stats.bySection[currentSection] || 0) + 1;
130
+ if (entity.confidence > 0.8)
131
+ stats.byConfidence.high++;
132
+ else if (entity.confidence >= 0.6)
133
+ stats.byConfidence.medium++;
134
+ else
135
+ stats.byConfidence.low++;
136
+ }
137
+ }
138
+ // Report progress
139
+ if (options.onProgress && i % 10 === 0) {
140
+ options.onProgress({
141
+ processed: i,
142
+ entities: entities.length,
143
+ relationships: relationships.length
144
+ });
145
+ }
146
+ }
147
+ // Create hierarchical relationships based on sections
148
+ if (opts.enableHierarchicalRelationships) {
149
+ const entitiesBySection = new Map();
150
+ for (const entity of entities) {
151
+ const section = entity.section || 'Unknown';
152
+ if (!entitiesBySection.has(section)) {
153
+ entitiesBySection.set(section, []);
154
+ }
155
+ entitiesBySection.get(section).push(entity);
156
+ }
157
+ // Create relationships within sections
158
+ for (const [section, sectionEntities] of entitiesBySection) {
159
+ for (let i = 0; i < sectionEntities.length - 1; i++) {
160
+ for (let j = i + 1; j < Math.min(i + 3, sectionEntities.length); j++) {
161
+ const entityA = sectionEntities[i];
162
+ const entityB = sectionEntities[j];
163
+ // Infer relationship type using SmartRelationshipExtractor
164
+ // Combine entity descriptions for better context
165
+ const context = `In section "${section}": ${entityA.description.substring(0, 150)}... ${entityB.description.substring(0, 150)}...`;
166
+ const inferredRelationship = await this.relationshipExtractor.infer(entityA.name, entityB.name, context, {
167
+ subjectType: entityA.type,
168
+ objectType: entityB.type
169
+ });
170
+ relationships.push({
171
+ from: entityA.id,
172
+ to: entityB.id,
173
+ type: inferredRelationship?.type || VerbType.RelatedTo, // Fallback to RelatedTo for co-occurrence
174
+ confidence: inferredRelationship?.confidence || 0.7,
175
+ weight: inferredRelationship?.weight || 0.8,
176
+ evidence: inferredRelationship?.evidence || `Both entities appear in section: ${section}`
177
+ });
178
+ }
179
+ }
180
+ }
181
+ }
182
+ // Final progress report
183
+ if (options.onProgress) {
184
+ options.onProgress({
185
+ processed: paragraphs.length,
186
+ entities: entities.length,
187
+ relationships: relationships.length
188
+ });
189
+ }
190
+ return {
191
+ paragraphsProcessed: paragraphs.length,
192
+ entitiesExtracted: entities.length,
193
+ relationshipsInferred: relationships.length,
194
+ entities,
195
+ relationships,
196
+ entityMap,
197
+ processingTime: 0, // Will be set by caller
198
+ structure,
199
+ stats
200
+ };
201
+ }
202
+ /**
203
+ * Parse document structure from HTML
204
+ */
205
+ parseStructure(html) {
206
+ const headings = [];
207
+ // Extract headings (h1-h6)
208
+ const headingRegex = /<h([1-6])>(.*?)<\/h\1>/gi;
209
+ let match;
210
+ let index = 0;
211
+ while ((match = headingRegex.exec(html)) !== null) {
212
+ const level = parseInt(match[1]);
213
+ const text = match[2].replace(/<[^>]+>/g, '').trim(); // Strip HTML tags
214
+ headings.push({ level, text, index: index++ });
215
+ }
216
+ // Count paragraphs
217
+ const paragraphCount = (html.match(/<p>/g) || []).length;
218
+ // Count tables
219
+ const tableCount = (html.match(/<table>/g) || []).length;
220
+ return {
221
+ headings,
222
+ paragraphCount,
223
+ tableCount
224
+ };
225
+ }
226
+ }
227
+ //# sourceMappingURL=SmartDOCXImporter.js.map
@@ -88,6 +88,16 @@ export interface SmartExcelResult {
88
88
  low: number;
89
89
  };
90
90
  };
91
+ /** Sheet-specific data for VFS extraction (v4.2.0) */
92
+ sheets?: Array<{
93
+ name: string;
94
+ rows: ExtractedRow[];
95
+ stats: {
96
+ rowCount: number;
97
+ entityCount: number;
98
+ relationshipCount: number;
99
+ };
100
+ }>;
91
101
  }
92
102
  /**
93
103
  * SmartExcelImporter - Extracts structured knowledge from Excel files
@@ -96,6 +106,7 @@ export declare class SmartExcelImporter {
96
106
  private brain;
97
107
  private extractor;
98
108
  private nlp;
109
+ private relationshipExtractor;
99
110
  private excelHandler;
100
111
  constructor(brain: Brainy);
101
112
  /**
@@ -119,7 +130,7 @@ export declare class SmartExcelImporter {
119
130
  */
120
131
  private mapTypeString;
121
132
  /**
122
- * Infer relationship type from context
133
+ * Infer relationship type from context using SmartRelationshipExtractor
123
134
  */
124
135
  private inferRelationship;
125
136
  /**
@@ -10,6 +10,7 @@
10
10
  */
11
11
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
12
12
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
13
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
13
14
  import { NounType, VerbType } from '../types/graphTypes.js';
14
15
  import { ExcelHandler } from '../augmentations/intelligentImport/handlers/excelHandler.js';
15
16
  /**
@@ -20,6 +21,7 @@ export class SmartExcelImporter {
20
21
  this.brain = brain;
21
22
  this.extractor = new NeuralEntityExtractor(brain);
22
23
  this.nlp = new NaturalLanguageProcessor(brain);
24
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
23
25
  this.excelHandler = new ExcelHandler();
24
26
  }
25
27
  /**
@@ -131,7 +133,9 @@ export class SmartExcelImporter {
131
133
  if (opts.enableRelationshipInference) {
132
134
  // Extract relationships from definition text
133
135
  for (const relEntity of relatedEntities) {
134
- const verbType = await this.inferRelationship(term, relEntity.text, definition);
136
+ const verbType = await this.inferRelationship(term, relEntity.text, definition, mainEntityType, // Pass subject type hint
137
+ relEntity.type // Pass object type hint
138
+ );
135
139
  relationships.push({
136
140
  from: entityId,
137
141
  to: relEntity.text,
@@ -145,10 +149,13 @@ export class SmartExcelImporter {
145
149
  const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
146
150
  for (const relTerm of terms) {
147
151
  if (relTerm.toLowerCase() !== term.toLowerCase()) {
152
+ // Use SmartRelationshipExtractor even for explicit relationships
153
+ const verbType = await this.inferRelationship(term, relTerm, `${term} related to ${relTerm}. ${definition}`, // Combine for better context
154
+ mainEntityType);
148
155
  relationships.push({
149
156
  from: entityId,
150
157
  to: relTerm,
151
- type: VerbType.RelatedTo,
158
+ type: verbType,
152
159
  confidence: 0.9,
153
160
  evidence: `Explicitly listed in "Related" column`
154
161
  });
@@ -203,6 +210,26 @@ export class SmartExcelImporter {
203
210
  phase: 'extracting'
204
211
  });
205
212
  }
213
+ // Group rows by sheet for VFS extraction (v4.2.0)
214
+ const sheetGroups = new Map();
215
+ extractedRows.forEach((extractedRow, index) => {
216
+ const originalRow = rows[index];
217
+ const sheetName = originalRow._sheet || 'Sheet1';
218
+ if (!sheetGroups.has(sheetName)) {
219
+ sheetGroups.set(sheetName, []);
220
+ }
221
+ sheetGroups.get(sheetName).push(extractedRow);
222
+ });
223
+ // Build sheet-specific statistics
224
+ const sheets = Array.from(sheetGroups.entries()).map(([name, sheetRows]) => ({
225
+ name,
226
+ rows: sheetRows,
227
+ stats: {
228
+ rowCount: sheetRows.length,
229
+ entityCount: sheetRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
230
+ relationshipCount: sheetRows.reduce((sum, row) => sum + row.relationships.length, 0)
231
+ }
232
+ }));
206
233
  return {
207
234
  rowsProcessed: rows.length,
208
235
  entitiesExtracted: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
@@ -210,7 +237,8 @@ export class SmartExcelImporter {
210
237
  rows: extractedRows,
211
238
  entityMap,
212
239
  processingTime: Date.now() - startTime,
213
- stats
240
+ stats,
241
+ sheets
214
242
  };
215
243
  }
216
244
  /**
@@ -270,29 +298,16 @@ export class SmartExcelImporter {
270
298
  return mapping[normalized] || NounType.Thing;
271
299
  }
272
300
  /**
273
- * Infer relationship type from context
301
+ * Infer relationship type from context using SmartRelationshipExtractor
274
302
  */
275
- async inferRelationship(fromTerm, toTerm, context) {
276
- const lowerContext = context.toLowerCase();
277
- // Pattern-based relationship detection
278
- const patterns = [
279
- [new RegExp(`${toTerm}.*of.*${fromTerm}`, 'i'), VerbType.PartOf],
280
- [new RegExp(`${fromTerm}.*contains.*${toTerm}`, 'i'), VerbType.Contains],
281
- [new RegExp(`located in.*${toTerm}`, 'i'), VerbType.LocatedAt],
282
- [new RegExp(`ruled by.*${toTerm}`, 'i'), VerbType.Owns],
283
- [new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
284
- [new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
285
- [new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
286
- [new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
287
- [new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
288
- ];
289
- for (const [pattern, verbType] of patterns) {
290
- if (pattern.test(lowerContext)) {
291
- return verbType;
292
- }
293
- }
294
- // Default to RelatedTo
295
- return VerbType.RelatedTo;
303
+ async inferRelationship(fromTerm, toTerm, context, fromType, toType) {
304
+ // Use SmartRelationshipExtractor for robust relationship classification
305
+ const result = await this.relationshipExtractor.infer(fromTerm, toTerm, context, {
306
+ subjectType: fromType,
307
+ objectType: toType
308
+ });
309
+ // Return inferred type or fallback to RelatedTo
310
+ return result?.type || VerbType.RelatedTo;
296
311
  }
297
312
  /**
298
313
  * Generate consistent entity ID from name
@@ -95,6 +95,7 @@ export declare class SmartJSONImporter {
95
95
  private brain;
96
96
  private extractor;
97
97
  private nlp;
98
+ private relationshipExtractor;
98
99
  constructor(brain: Brainy);
99
100
  /**
100
101
  * Initialize the importer
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
13
13
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
14
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
14
15
  import { NounType, VerbType } from '../types/graphTypes.js';
15
16
  /**
16
17
  * SmartJSONImporter - Extracts structured knowledge from JSON files
@@ -20,6 +21,7 @@ export class SmartJSONImporter {
20
21
  this.brain = brain;
21
22
  this.extractor = new NeuralEntityExtractor(brain);
22
23
  this.nlp = new NaturalLanguageProcessor(brain);
24
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
23
25
  }
24
26
  /**
25
27
  * Initialize the importer
@@ -120,12 +122,20 @@ export class SmartJSONImporter {
120
122
  // Create hierarchical relationship if parent exists
121
123
  if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
122
124
  const parentId = entityMap.get(parentPath);
125
+ // Extract parent and child names from paths
126
+ const parentName = parentPath.split('.').pop()?.replace(/\[(\d+)\]/, 'item $1') || 'parent';
127
+ const childName = entity.name;
128
+ // Infer relationship type using SmartRelationshipExtractor
129
+ const context = `Hierarchical JSON structure: ${parentName} contains ${childName}. Parent path: ${parentPath}, Child path: ${path}`;
130
+ const inferredRelationship = await this.relationshipExtractor.infer(parentName, childName, context, {
131
+ objectType: entity.type // Pass child entity type as hint
132
+ });
123
133
  relationships.push({
124
134
  from: parentId,
125
135
  to: entity.id,
126
- type: VerbType.Contains,
127
- confidence: 0.95,
128
- evidence: `Hierarchical relationship: ${parentPath} contains ${path}`
136
+ type: inferredRelationship?.type || VerbType.Contains, // Fallback to Contains for hierarchical relationships
137
+ confidence: inferredRelationship?.confidence || 0.95,
138
+ evidence: inferredRelationship?.evidence || `Hierarchical relationship: ${parentPath} contains ${path}`
129
139
  });
130
140
  }
131
141
  }
@@ -165,12 +175,21 @@ export class SmartJSONImporter {
165
175
  // Link to parent if exists
166
176
  if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
167
177
  const parentId = entityMap.get(parentPath);
178
+ // Extract parent name from path
179
+ const parentName = parentPath.split('.').pop()?.replace(/\[(\d+)\]/, 'item $1') || 'parent';
180
+ const childName = entity.name;
181
+ // Infer relationship type using SmartRelationshipExtractor
182
+ // Context: entity was extracted from string value within parent container
183
+ const context = `Entity "${childName}" found in text value at path ${path} within parent "${parentName}". Full text: "${node.substring(0, 200)}..."`;
184
+ const inferredRelationship = await this.relationshipExtractor.infer(parentName, childName, context, {
185
+ objectType: entity.type // Pass extracted entity type as hint
186
+ });
168
187
  relationships.push({
169
188
  from: parentId,
170
189
  to: entity.id,
171
- type: VerbType.RelatedTo,
172
- confidence: extracted.confidence * 0.9,
173
- evidence: `Found in: ${path}`
190
+ type: inferredRelationship?.type || VerbType.RelatedTo, // Fallback to RelatedTo for text extraction
191
+ confidence: inferredRelationship?.confidence || (extracted.confidence * 0.9),
192
+ evidence: inferredRelationship?.evidence || `Found in: ${path}`
174
193
  });
175
194
  }
176
195
  }
@@ -107,6 +107,7 @@ export declare class SmartMarkdownImporter {
107
107
  private brain;
108
108
  private extractor;
109
109
  private nlp;
110
+ private relationshipExtractor;
110
111
  constructor(brain: Brainy);
111
112
  /**
112
113
  * Initialize the importer
@@ -145,7 +146,7 @@ export declare class SmartMarkdownImporter {
145
146
  */
146
147
  private entitiesAreRelated;
147
148
  /**
148
- * Infer relationship type from context
149
+ * Infer relationship type from context using SmartRelationshipExtractor
149
150
  */
150
151
  private inferRelationship;
151
152
  /**
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
13
13
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
14
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
14
15
  import { NounType, VerbType } from '../types/graphTypes.js';
15
16
  /**
16
17
  * SmartMarkdownImporter - Extracts structured knowledge from Markdown files
@@ -20,6 +21,7 @@ export class SmartMarkdownImporter {
20
21
  this.brain = brain;
21
22
  this.extractor = new NeuralEntityExtractor(brain);
22
23
  this.nlp = new NaturalLanguageProcessor(brain);
24
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
23
25
  }
24
26
  /**
25
27
  * Initialize the importer
@@ -321,23 +323,16 @@ export class SmartMarkdownImporter {
321
323
  return Math.abs(index1 - index2) < 300;
322
324
  }
323
325
  /**
324
- * Infer relationship type from context
326
+ * Infer relationship type from context using SmartRelationshipExtractor
325
327
  */
326
- async inferRelationship(fromEntity, toEntity, context) {
327
- const lowerContext = context.toLowerCase();
328
- const patterns = [
329
- [new RegExp(`${toEntity}.*of.*${fromEntity}`, 'i'), VerbType.PartOf],
330
- [new RegExp(`${fromEntity}.*contains.*${toEntity}`, 'i'), VerbType.Contains],
331
- [new RegExp(`${fromEntity}.*in.*${toEntity}`, 'i'), VerbType.LocatedAt],
332
- [new RegExp(`${fromEntity}.*created.*${toEntity}`, 'i'), VerbType.Creates],
333
- [new RegExp(`${fromEntity}.*and.*${toEntity}`, 'i'), VerbType.RelatedTo]
334
- ];
335
- for (const [pattern, verbType] of patterns) {
336
- if (pattern.test(lowerContext)) {
337
- return verbType;
338
- }
339
- }
340
- return VerbType.RelatedTo;
328
+ async inferRelationship(fromEntity, toEntity, context, fromType, toType) {
329
+ // Use SmartRelationshipExtractor for robust relationship classification
330
+ const result = await this.relationshipExtractor.infer(fromEntity, toEntity, context, {
331
+ subjectType: fromType,
332
+ objectType: toType
333
+ });
334
+ // Return inferred type or fallback to RelatedTo
335
+ return result?.type || VerbType.RelatedTo;
341
336
  }
342
337
  /**
343
338
  * Generate consistent entity ID
@@ -111,6 +111,7 @@ export declare class SmartPDFImporter {
111
111
  private brain;
112
112
  private extractor;
113
113
  private nlp;
114
+ private relationshipExtractor;
114
115
  private pdfHandler;
115
116
  constructor(brain: Brainy);
116
117
  /**
@@ -142,7 +143,7 @@ export declare class SmartPDFImporter {
142
143
  */
143
144
  private extractRelationshipContext;
144
145
  /**
145
- * Infer relationship type from context
146
+ * Infer relationship type from context using SmartRelationshipExtractor
146
147
  */
147
148
  private inferRelationship;
148
149
  /**
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
13
13
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
14
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
14
15
  import { VerbType } from '../types/graphTypes.js';
15
16
  import { PDFHandler } from '../augmentations/intelligentImport/handlers/pdfHandler.js';
16
17
  /**
@@ -21,6 +22,7 @@ export class SmartPDFImporter {
21
22
  this.brain = brain;
22
23
  this.extractor = new NeuralEntityExtractor(brain);
23
24
  this.nlp = new NaturalLanguageProcessor(brain);
25
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
24
26
  this.pdfHandler = new PDFHandler();
25
27
  }
26
28
  /**
@@ -272,29 +274,16 @@ export class SmartPDFImporter {
272
274
  return text.substring(start, end + 100).trim();
273
275
  }
274
276
  /**
275
- * Infer relationship type from context
277
+ * Infer relationship type from context using SmartRelationshipExtractor
276
278
  */
277
- async inferRelationship(fromEntity, toEntity, context) {
278
- const lowerContext = context.toLowerCase();
279
- // Pattern-based relationship detection
280
- const patterns = [
281
- [new RegExp(`${toEntity}.*of.*${fromEntity}`, 'i'), VerbType.PartOf],
282
- [new RegExp(`${fromEntity}.*contains.*${toEntity}`, 'i'), VerbType.Contains],
283
- [new RegExp(`${fromEntity}.*in.*${toEntity}`, 'i'), VerbType.LocatedAt],
284
- [new RegExp(`${fromEntity}.*by.*${toEntity}`, 'i'), VerbType.CreatedBy],
285
- [new RegExp(`${fromEntity}.*created.*${toEntity}`, 'i'), VerbType.Creates],
286
- [new RegExp(`${fromEntity}.*authored.*${toEntity}`, 'i'), VerbType.CreatedBy],
287
- [new RegExp(`${fromEntity}.*part of.*${toEntity}`, 'i'), VerbType.PartOf],
288
- [new RegExp(`${fromEntity}.*related to.*${toEntity}`, 'i'), VerbType.RelatedTo],
289
- [new RegExp(`${fromEntity}.*and.*${toEntity}`, 'i'), VerbType.RelatedTo]
290
- ];
291
- for (const [pattern, verbType] of patterns) {
292
- if (pattern.test(lowerContext)) {
293
- return verbType;
294
- }
295
- }
296
- // Default to RelatedTo
297
- return VerbType.RelatedTo;
279
+ async inferRelationship(fromEntity, toEntity, context, fromType, toType) {
280
+ // Use SmartRelationshipExtractor for robust relationship classification
281
+ const result = await this.relationshipExtractor.infer(fromEntity, toEntity, context, {
282
+ subjectType: fromType,
283
+ objectType: toType
284
+ });
285
+ // Return inferred type or fallback to RelatedTo
286
+ return result?.type || VerbType.RelatedTo;
298
287
  }
299
288
  /**
300
289
  * Generate consistent entity ID