@soulcraft/brainy 4.1.3 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +100 -7
  2. package/dist/brainy.d.ts +74 -16
  3. package/dist/brainy.js +74 -16
  4. package/dist/import/FormatDetector.d.ts +6 -1
  5. package/dist/import/FormatDetector.js +40 -1
  6. package/dist/import/ImportCoordinator.d.ts +155 -5
  7. package/dist/import/ImportCoordinator.js +346 -6
  8. package/dist/import/InstancePool.d.ts +136 -0
  9. package/dist/import/InstancePool.js +231 -0
  10. package/dist/importers/SmartCSVImporter.d.ts +2 -1
  11. package/dist/importers/SmartCSVImporter.js +11 -22
  12. package/dist/importers/SmartDOCXImporter.d.ts +125 -0
  13. package/dist/importers/SmartDOCXImporter.js +227 -0
  14. package/dist/importers/SmartExcelImporter.d.ts +12 -1
  15. package/dist/importers/SmartExcelImporter.js +40 -25
  16. package/dist/importers/SmartJSONImporter.d.ts +1 -0
  17. package/dist/importers/SmartJSONImporter.js +25 -6
  18. package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
  19. package/dist/importers/SmartMarkdownImporter.js +11 -16
  20. package/dist/importers/SmartPDFImporter.d.ts +2 -1
  21. package/dist/importers/SmartPDFImporter.js +11 -22
  22. package/dist/importers/SmartYAMLImporter.d.ts +121 -0
  23. package/dist/importers/SmartYAMLImporter.js +275 -0
  24. package/dist/importers/VFSStructureGenerator.js +12 -0
  25. package/dist/neural/SmartExtractor.d.ts +279 -0
  26. package/dist/neural/SmartExtractor.js +592 -0
  27. package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
  28. package/dist/neural/SmartRelationshipExtractor.js +396 -0
  29. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  30. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  31. package/dist/neural/entityExtractor.d.ts +3 -0
  32. package/dist/neural/entityExtractor.js +34 -36
  33. package/dist/neural/presets.d.ts +189 -0
  34. package/dist/neural/presets.js +365 -0
  35. package/dist/neural/signals/ContextSignal.d.ts +166 -0
  36. package/dist/neural/signals/ContextSignal.js +646 -0
  37. package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
  38. package/dist/neural/signals/EmbeddingSignal.js +435 -0
  39. package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
  40. package/dist/neural/signals/ExactMatchSignal.js +542 -0
  41. package/dist/neural/signals/PatternSignal.d.ts +159 -0
  42. package/dist/neural/signals/PatternSignal.js +478 -0
  43. package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
  44. package/dist/neural/signals/VerbContextSignal.js +390 -0
  45. package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
  46. package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
  47. package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
  48. package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
  49. package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
  50. package/dist/neural/signals/VerbPatternSignal.js +457 -0
  51. package/dist/types/graphTypes.d.ts +2 -0
  52. package/package.json +4 -1
@@ -0,0 +1,231 @@
1
+ /**
2
+ * InstancePool - Shared instance management for memory efficiency
3
+ *
4
+ * Production-grade instance pooling to prevent memory leaks during imports.
5
+ * Critical for scaling to billions of entities.
6
+ *
7
+ * Problem: Creating new NLP/Extractor instances in loops → memory leak
8
+ * Solution: Reuse shared instances across entire import session
9
+ *
10
+ * Memory savings:
11
+ * - Without pooling: 100K rows × 50MB per instance = 5TB RAM (OOM!)
12
+ * - With pooling: 50MB total (shared across all rows)
13
+ */
14
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
15
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
16
+ /**
17
+ * InstancePool - Manages shared instances for memory efficiency
18
+ *
19
+ * Lifecycle:
20
+ * 1. Create pool at import start
21
+ * 2. Reuse instances across all rows
22
+ * 3. Pool is garbage collected when import completes
23
+ *
24
+ * Thread safety: Not thread-safe (single import session per pool)
25
+ */
26
+ export class InstancePool {
27
+ constructor(brain) {
28
+ // Shared instances (created lazily)
29
+ this.nlpInstance = null;
30
+ this.extractorInstance = null;
31
+ // Initialization state
32
+ this.nlpInitialized = false;
33
+ this.initializationPromise = null;
34
+ // Statistics
35
+ this.stats = {
36
+ nlpReuses: 0,
37
+ extractorReuses: 0,
38
+ creationTime: 0
39
+ };
40
+ this.brain = brain;
41
+ }
42
+ /**
43
+ * Get shared NaturalLanguageProcessor instance
44
+ *
45
+ * Lazy initialization - created on first access
46
+ * All subsequent calls return same instance
47
+ *
48
+ * @returns Shared NLP instance
49
+ */
50
+ async getNLP() {
51
+ if (!this.nlpInstance) {
52
+ const startTime = Date.now();
53
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
54
+ this.stats.creationTime += Date.now() - startTime;
55
+ }
56
+ // Ensure initialized before returning
57
+ if (!this.nlpInitialized) {
58
+ await this.ensureNLPInitialized();
59
+ }
60
+ this.stats.nlpReuses++;
61
+ return this.nlpInstance;
62
+ }
63
+ /**
64
+ * Get shared NeuralEntityExtractor instance
65
+ *
66
+ * Lazy initialization - created on first access
67
+ * All subsequent calls return same instance
68
+ *
69
+ * @returns Shared extractor instance
70
+ */
71
+ getExtractor() {
72
+ if (!this.extractorInstance) {
73
+ const startTime = Date.now();
74
+ this.extractorInstance = new NeuralEntityExtractor(this.brain);
75
+ this.stats.creationTime += Date.now() - startTime;
76
+ }
77
+ this.stats.extractorReuses++;
78
+ return this.extractorInstance;
79
+ }
80
+ /**
81
+ * Get shared NLP instance (synchronous, may return uninitialized)
82
+ *
83
+ * Use when you need NLP synchronously and will handle initialization yourself.
84
+ * Prefer getNLP() for async code.
85
+ *
86
+ * @returns Shared NLP instance (possibly uninitialized)
87
+ */
88
+ getNLPSync() {
89
+ if (!this.nlpInstance) {
90
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
91
+ }
92
+ this.stats.nlpReuses++;
93
+ return this.nlpInstance;
94
+ }
95
+ /**
96
+ * Initialize all instances upfront
97
+ *
98
+ * Call at start of import to avoid lazy initialization overhead
99
+ * during processing. Improves predictability and first-row performance.
100
+ *
101
+ * @returns Promise that resolves when all instances are ready
102
+ */
103
+ async init() {
104
+ // Prevent duplicate initialization
105
+ if (this.initializationPromise) {
106
+ return this.initializationPromise;
107
+ }
108
+ this.initializationPromise = this.initializeInternal();
109
+ return this.initializationPromise;
110
+ }
111
+ /**
112
+ * Internal initialization implementation
113
+ */
114
+ async initializeInternal() {
115
+ const startTime = Date.now();
116
+ // Create instances
117
+ if (!this.nlpInstance) {
118
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
119
+ }
120
+ if (!this.extractorInstance) {
121
+ this.extractorInstance = new NeuralEntityExtractor(this.brain);
122
+ }
123
+ // Initialize NLP (loads pattern library)
124
+ await this.ensureNLPInitialized();
125
+ this.stats.creationTime = Date.now() - startTime;
126
+ }
127
+ /**
128
+ * Ensure NLP is initialized (loads 220 patterns)
129
+ *
130
+ * Handles concurrent initialization requests safely
131
+ */
132
+ async ensureNLPInitialized() {
133
+ if (this.nlpInitialized) {
134
+ return;
135
+ }
136
+ if (!this.nlpInstance) {
137
+ throw new Error('NLP instance not created yet');
138
+ }
139
+ await this.nlpInstance.init();
140
+ this.nlpInitialized = true;
141
+ }
142
+ /**
143
+ * Check if instances are initialized
144
+ *
145
+ * @returns True if NLP is initialized and ready to use
146
+ */
147
+ isInitialized() {
148
+ return this.nlpInitialized && this.nlpInstance !== null;
149
+ }
150
+ /**
151
+ * Get pool statistics
152
+ *
153
+ * Useful for performance monitoring and memory leak detection
154
+ *
155
+ * @returns Statistics about instance reuse
156
+ */
157
+ getStats() {
158
+ return {
159
+ ...this.stats,
160
+ nlpCreated: this.nlpInstance !== null,
161
+ extractorCreated: this.extractorInstance !== null,
162
+ initialized: this.isInitialized(),
163
+ // Memory savings estimate
164
+ memorySaved: this.calculateMemorySaved()
165
+ };
166
+ }
167
+ /**
168
+ * Calculate estimated memory saved by pooling
169
+ *
170
+ * Assumes ~50MB per NLP instance, ~10MB per extractor instance
171
+ *
172
+ * @returns Estimated memory saved in bytes
173
+ */
174
+ calculateMemorySaved() {
175
+ const nlpSize = 50 * 1024 * 1024; // 50MB per instance
176
+ const extractorSize = 10 * 1024 * 1024; // 10MB per instance
177
+ // Without pooling: size × reuses
178
+ // With pooling: size × 1
179
+ // Saved: size × (reuses - 1)
180
+ const nlpSaved = nlpSize * Math.max(0, this.stats.nlpReuses - 1);
181
+ const extractorSaved = extractorSize * Math.max(0, this.stats.extractorReuses - 1);
182
+ return nlpSaved + extractorSaved;
183
+ }
184
+ /**
185
+ * Reset statistics (useful for testing)
186
+ */
187
+ resetStats() {
188
+ this.stats = {
189
+ nlpReuses: 0,
190
+ extractorReuses: 0,
191
+ creationTime: 0
192
+ };
193
+ }
194
+ /**
195
+ * Get string representation (for debugging)
196
+ */
197
+ toString() {
198
+ const stats = this.getStats();
199
+ return `InstancePool(nlp=${stats.nlpCreated}, extractor=${stats.extractorCreated}, initialized=${stats.initialized}, nlpReuses=${stats.nlpReuses}, extractorReuses=${stats.extractorReuses})`;
200
+ }
201
+ /**
202
+ * Cleanup method (for explicit resource management)
203
+ *
204
+ * Note: Usually not needed - pool is garbage collected when import completes.
205
+ * Use only if you need explicit cleanup for some reason.
206
+ */
207
+ cleanup() {
208
+ // Clear references to allow garbage collection
209
+ this.nlpInstance = null;
210
+ this.extractorInstance = null;
211
+ this.nlpInitialized = false;
212
+ this.initializationPromise = null;
213
+ }
214
+ }
215
+ /**
216
+ * Create a new instance pool
217
+ *
218
+ * Convenience factory function
219
+ *
220
+ * @param brain Brainy instance
221
+ * @param autoInit Whether to initialize instances immediately
222
+ * @returns Instance pool
223
+ */
224
+ export async function createInstancePool(brain, autoInit = true) {
225
+ const pool = new InstancePool(brain);
226
+ if (autoInit) {
227
+ await pool.init();
228
+ }
229
+ return pool;
230
+ }
231
+ //# sourceMappingURL=InstancePool.js.map
@@ -101,6 +101,7 @@ export declare class SmartCSVImporter {
101
101
  private brain;
102
102
  private extractor;
103
103
  private nlp;
104
+ private relationshipExtractor;
104
105
  private csvHandler;
105
106
  constructor(brain: Brainy);
106
107
  /**
@@ -124,7 +125,7 @@ export declare class SmartCSVImporter {
124
125
  */
125
126
  private mapTypeString;
126
127
  /**
127
- * Infer relationship type from context
128
+ * Infer relationship type from context using SmartRelationshipExtractor
128
129
  */
129
130
  private inferRelationship;
130
131
  /**
@@ -12,6 +12,7 @@
12
12
  */
13
13
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
14
14
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
15
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
15
16
  import { NounType, VerbType } from '../types/graphTypes.js';
16
17
  import { CSVHandler } from '../augmentations/intelligentImport/handlers/csvHandler.js';
17
18
  /**
@@ -22,6 +23,7 @@ export class SmartCSVImporter {
22
23
  this.brain = brain;
23
24
  this.extractor = new NeuralEntityExtractor(brain);
24
25
  this.nlp = new NaturalLanguageProcessor(brain);
26
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
25
27
  this.csvHandler = new CSVHandler();
26
28
  }
27
29
  /**
@@ -266,29 +268,16 @@ export class SmartCSVImporter {
266
268
  return mapping[normalized] || NounType.Thing;
267
269
  }
268
270
  /**
269
- * Infer relationship type from context
271
+ * Infer relationship type from context using SmartRelationshipExtractor
270
272
  */
271
- async inferRelationship(fromTerm, toTerm, context) {
272
- const lowerContext = context.toLowerCase();
273
- // Pattern-based relationship detection
274
- const patterns = [
275
- [new RegExp(`${toTerm}.*of.*${fromTerm}`, 'i'), VerbType.PartOf],
276
- [new RegExp(`${fromTerm}.*contains.*${toTerm}`, 'i'), VerbType.Contains],
277
- [new RegExp(`located in.*${toTerm}`, 'i'), VerbType.LocatedAt],
278
- [new RegExp(`ruled by.*${toTerm}`, 'i'), VerbType.Owns],
279
- [new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
280
- [new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
281
- [new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
282
- [new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
283
- [new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
284
- ];
285
- for (const [pattern, verbType] of patterns) {
286
- if (pattern.test(lowerContext)) {
287
- return verbType;
288
- }
289
- }
290
- // Default to RelatedTo
291
- return VerbType.RelatedTo;
273
+ async inferRelationship(fromTerm, toTerm, context, fromType, toType) {
274
+ // Use SmartRelationshipExtractor for robust relationship classification
275
+ const result = await this.relationshipExtractor.infer(fromTerm, toTerm, context, {
276
+ subjectType: fromType,
277
+ objectType: toType
278
+ });
279
+ // Return inferred type or fallback to RelatedTo
280
+ return result?.type || VerbType.RelatedTo;
292
281
  }
293
282
  /**
294
283
  * Generate consistent entity ID from name
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Smart DOCX Importer
3
+ *
4
+ * Extracts entities and relationships from Word documents using:
5
+ * - Mammoth parser for DOCX → HTML/text conversion
6
+ * - Heading extraction for document structure
7
+ * - Table extraction for structured data
8
+ * - NeuralEntityExtractor for entity extraction from paragraphs
9
+ * - NaturalLanguageProcessor for relationship inference
10
+ * - Hierarchical relationship creation based on heading hierarchy
11
+ *
12
+ * v4.2.0: New format handler
13
+ * NO MOCKS - Production-ready implementation
14
+ */
15
+ import { Brainy } from '../brainy.js';
16
+ import { NounType, VerbType } from '../types/graphTypes.js';
17
+ export interface SmartDOCXOptions {
18
+ /** Enable neural entity extraction from paragraphs */
19
+ enableNeuralExtraction?: boolean;
20
+ /** Enable hierarchical relationship creation based on headings */
21
+ enableHierarchicalRelationships?: boolean;
22
+ /** Enable concept extraction for tagging */
23
+ enableConceptExtraction?: boolean;
24
+ /** Confidence threshold for entities (0-1) */
25
+ confidenceThreshold?: number;
26
+ /** Minimum paragraph length to process */
27
+ minParagraphLength?: number;
28
+ /** Progress callback */
29
+ onProgress?: (stats: {
30
+ processed: number;
31
+ entities: number;
32
+ relationships: number;
33
+ }) => void;
34
+ }
35
+ export interface ExtractedDOCXEntity {
36
+ /** Entity ID */
37
+ id: string;
38
+ /** Entity name */
39
+ name: string;
40
+ /** Entity type */
41
+ type: NounType;
42
+ /** Entity description/context */
43
+ description: string;
44
+ /** Confidence score */
45
+ confidence: number;
46
+ /** Weight/importance score */
47
+ weight?: number;
48
+ /** Section/heading context */
49
+ section: string | null;
50
+ /** Paragraph index in document */
51
+ paragraphIndex: number;
52
+ /** Metadata */
53
+ metadata: Record<string, any>;
54
+ }
55
+ export interface ExtractedDOCXRelationship {
56
+ from: string;
57
+ to: string;
58
+ type: VerbType;
59
+ confidence: number;
60
+ weight?: number;
61
+ evidence: string;
62
+ }
63
+ export interface SmartDOCXResult {
64
+ /** Total paragraphs processed */
65
+ paragraphsProcessed: number;
66
+ /** Entities extracted */
67
+ entitiesExtracted: number;
68
+ /** Relationships inferred */
69
+ relationshipsInferred: number;
70
+ /** All extracted entities */
71
+ entities: ExtractedDOCXEntity[];
72
+ /** All relationships */
73
+ relationships: ExtractedDOCXRelationship[];
74
+ /** Entity ID mapping (index -> ID) */
75
+ entityMap: Map<string, string>;
76
+ /** Processing time in ms */
77
+ processingTime: number;
78
+ /** Document structure */
79
+ structure: {
80
+ headings: Array<{
81
+ level: number;
82
+ text: string;
83
+ index: number;
84
+ }>;
85
+ paragraphCount: number;
86
+ tableCount: number;
87
+ };
88
+ /** Extraction statistics */
89
+ stats: {
90
+ byType: Record<string, number>;
91
+ bySection: Record<string, number>;
92
+ byConfidence: {
93
+ high: number;
94
+ medium: number;
95
+ low: number;
96
+ };
97
+ };
98
+ }
99
+ /**
100
+ * SmartDOCXImporter - Extracts structured knowledge from Word documents
101
+ */
102
+ export declare class SmartDOCXImporter {
103
+ private brain;
104
+ private extractor;
105
+ private nlp;
106
+ private relationshipExtractor;
107
+ private mammothLoaded;
108
+ constructor(brain: Brainy);
109
+ /**
110
+ * Initialize the importer
111
+ */
112
+ init(): Promise<void>;
113
+ /**
114
+ * Extract entities and relationships from DOCX buffer
115
+ */
116
+ extract(buffer: Buffer, options?: SmartDOCXOptions): Promise<SmartDOCXResult>;
117
+ /**
118
+ * Extract entities and relationships from parsed DOCX content
119
+ */
120
+ private extractFromContent;
121
+ /**
122
+ * Parse document structure from HTML
123
+ */
124
+ private parseStructure;
125
+ }
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Smart DOCX Importer
3
+ *
4
+ * Extracts entities and relationships from Word documents using:
5
+ * - Mammoth parser for DOCX → HTML/text conversion
6
+ * - Heading extraction for document structure
7
+ * - Table extraction for structured data
8
+ * - NeuralEntityExtractor for entity extraction from paragraphs
9
+ * - NaturalLanguageProcessor for relationship inference
10
+ * - Hierarchical relationship creation based on heading hierarchy
11
+ *
12
+ * v4.2.0: New format handler
13
+ * NO MOCKS - Production-ready implementation
14
+ */
15
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
16
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
17
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
18
+ import { VerbType } from '../types/graphTypes.js';
19
+ // Dynamic import for mammoth (ESM compatibility)
20
+ let mammoth;
21
+ /**
22
+ * SmartDOCXImporter - Extracts structured knowledge from Word documents
23
+ */
24
+ export class SmartDOCXImporter {
25
+ constructor(brain) {
26
+ this.mammothLoaded = false;
27
+ this.brain = brain;
28
+ this.extractor = new NeuralEntityExtractor(brain);
29
+ this.nlp = new NaturalLanguageProcessor(brain);
30
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
31
+ }
32
+ /**
33
+ * Initialize the importer
34
+ */
35
+ async init() {
36
+ await this.nlp.init();
37
+ // Lazy load mammoth
38
+ if (!this.mammothLoaded) {
39
+ try {
40
+ mammoth = await import('mammoth');
41
+ this.mammothLoaded = true;
42
+ }
43
+ catch (error) {
44
+ throw new Error(`Failed to load mammoth parser: ${error.message}`);
45
+ }
46
+ }
47
+ }
48
+ /**
49
+ * Extract entities and relationships from DOCX buffer
50
+ */
51
+ async extract(buffer, options = {}) {
52
+ const startTime = Date.now();
53
+ // Ensure mammoth is loaded
54
+ if (!this.mammothLoaded) {
55
+ await this.init();
56
+ }
57
+ // Extract raw text for entity extraction
58
+ const textResult = await mammoth.extractRawText({ buffer });
59
+ // Extract HTML for structure analysis (headings, tables)
60
+ const htmlResult = await mammoth.convertToHtml({ buffer });
61
+ // Process the document
62
+ const result = await this.extractFromContent(textResult.value, htmlResult.value, options);
63
+ result.processingTime = Date.now() - startTime;
64
+ return result;
65
+ }
66
+ /**
67
+ * Extract entities and relationships from parsed DOCX content
68
+ */
69
+ async extractFromContent(rawText, html, options) {
70
+ const opts = {
71
+ enableNeuralExtraction: options.enableNeuralExtraction !== false,
72
+ enableHierarchicalRelationships: options.enableHierarchicalRelationships !== false,
73
+ enableConceptExtraction: options.enableConceptExtraction !== false,
74
+ confidenceThreshold: options.confidenceThreshold || 0.6,
75
+ minParagraphLength: options.minParagraphLength || 20
76
+ };
77
+ const entities = [];
78
+ const relationships = [];
79
+ const entityMap = new Map();
80
+ const stats = {
81
+ byType: {},
82
+ bySection: {},
83
+ byConfidence: { high: 0, medium: 0, low: 0 }
84
+ };
85
+ // Parse document structure from HTML
86
+ const structure = this.parseStructure(html);
87
+ // Split into paragraphs
88
+ const paragraphs = rawText.split(/\n\n+/).filter(p => p.trim().length >= opts.minParagraphLength);
89
+ let currentSection = 'Introduction';
90
+ let headingIndex = 0;
91
+ // Process each paragraph
92
+ for (let i = 0; i < paragraphs.length; i++) {
93
+ const paragraph = paragraphs[i].trim();
94
+ // Check if this paragraph is a heading
95
+ if (headingIndex < structure.headings.length) {
96
+ const heading = structure.headings[headingIndex];
97
+ if (paragraph.startsWith(heading.text) || heading.text.includes(paragraph.substring(0, 50))) {
98
+ currentSection = heading.text;
99
+ headingIndex++;
100
+ stats.bySection[currentSection] = 0;
101
+ continue;
102
+ }
103
+ }
104
+ // Extract entities from paragraph
105
+ if (opts.enableNeuralExtraction) {
106
+ const extractedEntities = await this.extractor.extract(paragraph, {
107
+ confidence: opts.confidenceThreshold
108
+ });
109
+ for (const extracted of extractedEntities) {
110
+ const entityId = `para${i}:${extracted.text}`;
111
+ const entity = {
112
+ id: entityId,
113
+ name: extracted.text,
114
+ type: extracted.type,
115
+ description: paragraph,
116
+ confidence: extracted.confidence,
117
+ weight: extracted.weight || 1.0,
118
+ section: currentSection,
119
+ paragraphIndex: i,
120
+ metadata: {
121
+ position: extracted.position,
122
+ headingContext: currentSection
123
+ }
124
+ };
125
+ entities.push(entity);
126
+ entityMap.set(entityId, entityId);
127
+ // Update stats
128
+ stats.byType[entity.type] = (stats.byType[entity.type] || 0) + 1;
129
+ stats.bySection[currentSection] = (stats.bySection[currentSection] || 0) + 1;
130
+ if (entity.confidence > 0.8)
131
+ stats.byConfidence.high++;
132
+ else if (entity.confidence >= 0.6)
133
+ stats.byConfidence.medium++;
134
+ else
135
+ stats.byConfidence.low++;
136
+ }
137
+ }
138
+ // Report progress
139
+ if (options.onProgress && i % 10 === 0) {
140
+ options.onProgress({
141
+ processed: i,
142
+ entities: entities.length,
143
+ relationships: relationships.length
144
+ });
145
+ }
146
+ }
147
+ // Create hierarchical relationships based on sections
148
+ if (opts.enableHierarchicalRelationships) {
149
+ const entitiesBySection = new Map();
150
+ for (const entity of entities) {
151
+ const section = entity.section || 'Unknown';
152
+ if (!entitiesBySection.has(section)) {
153
+ entitiesBySection.set(section, []);
154
+ }
155
+ entitiesBySection.get(section).push(entity);
156
+ }
157
+ // Create relationships within sections
158
+ for (const [section, sectionEntities] of entitiesBySection) {
159
+ for (let i = 0; i < sectionEntities.length - 1; i++) {
160
+ for (let j = i + 1; j < Math.min(i + 3, sectionEntities.length); j++) {
161
+ const entityA = sectionEntities[i];
162
+ const entityB = sectionEntities[j];
163
+ // Infer relationship type using SmartRelationshipExtractor
164
+ // Combine entity descriptions for better context
165
+ const context = `In section "${section}": ${entityA.description.substring(0, 150)}... ${entityB.description.substring(0, 150)}...`;
166
+ const inferredRelationship = await this.relationshipExtractor.infer(entityA.name, entityB.name, context, {
167
+ subjectType: entityA.type,
168
+ objectType: entityB.type
169
+ });
170
+ relationships.push({
171
+ from: entityA.id,
172
+ to: entityB.id,
173
+ type: inferredRelationship?.type || VerbType.RelatedTo, // Fallback to RelatedTo for co-occurrence
174
+ confidence: inferredRelationship?.confidence || 0.7,
175
+ weight: inferredRelationship?.weight || 0.8,
176
+ evidence: inferredRelationship?.evidence || `Both entities appear in section: ${section}`
177
+ });
178
+ }
179
+ }
180
+ }
181
+ }
182
+ // Final progress report
183
+ if (options.onProgress) {
184
+ options.onProgress({
185
+ processed: paragraphs.length,
186
+ entities: entities.length,
187
+ relationships: relationships.length
188
+ });
189
+ }
190
+ return {
191
+ paragraphsProcessed: paragraphs.length,
192
+ entitiesExtracted: entities.length,
193
+ relationshipsInferred: relationships.length,
194
+ entities,
195
+ relationships,
196
+ entityMap,
197
+ processingTime: 0, // Will be set by caller
198
+ structure,
199
+ stats
200
+ };
201
+ }
202
+ /**
203
+ * Parse document structure from HTML
204
+ */
205
+ parseStructure(html) {
206
+ const headings = [];
207
+ // Extract headings (h1-h6)
208
+ const headingRegex = /<h([1-6])>(.*?)<\/h\1>/gi;
209
+ let match;
210
+ let index = 0;
211
+ while ((match = headingRegex.exec(html)) !== null) {
212
+ const level = parseInt(match[1]);
213
+ const text = match[2].replace(/<[^>]+>/g, '').trim(); // Strip HTML tags
214
+ headings.push({ level, text, index: index++ });
215
+ }
216
+ // Count paragraphs
217
+ const paragraphCount = (html.match(/<p>/g) || []).length;
218
+ // Count tables
219
+ const tableCount = (html.match(/<table>/g) || []).length;
220
+ return {
221
+ headings,
222
+ paragraphCount,
223
+ tableCount
224
+ };
225
+ }
226
+ }
227
+ //# sourceMappingURL=SmartDOCXImporter.js.map
@@ -88,6 +88,16 @@ export interface SmartExcelResult {
88
88
  low: number;
89
89
  };
90
90
  };
91
+ /** Sheet-specific data for VFS extraction (v4.2.0) */
92
+ sheets?: Array<{
93
+ name: string;
94
+ rows: ExtractedRow[];
95
+ stats: {
96
+ rowCount: number;
97
+ entityCount: number;
98
+ relationshipCount: number;
99
+ };
100
+ }>;
91
101
  }
92
102
  /**
93
103
  * SmartExcelImporter - Extracts structured knowledge from Excel files
@@ -96,6 +106,7 @@ export declare class SmartExcelImporter {
96
106
  private brain;
97
107
  private extractor;
98
108
  private nlp;
109
+ private relationshipExtractor;
99
110
  private excelHandler;
100
111
  constructor(brain: Brainy);
101
112
  /**
@@ -119,7 +130,7 @@ export declare class SmartExcelImporter {
119
130
  */
120
131
  private mapTypeString;
121
132
  /**
122
- * Infer relationship type from context
133
+ * Infer relationship type from context using SmartRelationshipExtractor
123
134
  */
124
135
  private inferRelationship;
125
136
  /**