@soulcraft/brainy 4.1.4 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/import/FormatDetector.d.ts +6 -1
  2. package/dist/import/FormatDetector.js +40 -1
  3. package/dist/import/ImportCoordinator.d.ts +102 -4
  4. package/dist/import/ImportCoordinator.js +248 -6
  5. package/dist/import/InstancePool.d.ts +136 -0
  6. package/dist/import/InstancePool.js +231 -0
  7. package/dist/importers/SmartCSVImporter.d.ts +2 -1
  8. package/dist/importers/SmartCSVImporter.js +11 -22
  9. package/dist/importers/SmartDOCXImporter.d.ts +125 -0
  10. package/dist/importers/SmartDOCXImporter.js +227 -0
  11. package/dist/importers/SmartExcelImporter.d.ts +12 -1
  12. package/dist/importers/SmartExcelImporter.js +40 -25
  13. package/dist/importers/SmartJSONImporter.d.ts +1 -0
  14. package/dist/importers/SmartJSONImporter.js +25 -6
  15. package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
  16. package/dist/importers/SmartMarkdownImporter.js +11 -16
  17. package/dist/importers/SmartPDFImporter.d.ts +2 -1
  18. package/dist/importers/SmartPDFImporter.js +11 -22
  19. package/dist/importers/SmartYAMLImporter.d.ts +121 -0
  20. package/dist/importers/SmartYAMLImporter.js +275 -0
  21. package/dist/importers/VFSStructureGenerator.js +12 -0
  22. package/dist/neural/SmartExtractor.d.ts +279 -0
  23. package/dist/neural/SmartExtractor.js +592 -0
  24. package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
  25. package/dist/neural/SmartRelationshipExtractor.js +396 -0
  26. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  27. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  28. package/dist/neural/entityExtractor.d.ts +3 -0
  29. package/dist/neural/entityExtractor.js +34 -36
  30. package/dist/neural/presets.d.ts +189 -0
  31. package/dist/neural/presets.js +365 -0
  32. package/dist/neural/signals/ContextSignal.d.ts +166 -0
  33. package/dist/neural/signals/ContextSignal.js +646 -0
  34. package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
  35. package/dist/neural/signals/EmbeddingSignal.js +435 -0
  36. package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
  37. package/dist/neural/signals/ExactMatchSignal.js +542 -0
  38. package/dist/neural/signals/PatternSignal.d.ts +159 -0
  39. package/dist/neural/signals/PatternSignal.js +478 -0
  40. package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
  41. package/dist/neural/signals/VerbContextSignal.js +390 -0
  42. package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
  43. package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
  44. package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
  45. package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
  46. package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
  47. package/dist/neural/signals/VerbPatternSignal.js +457 -0
  48. package/dist/types/graphTypes.d.ts +2 -0
  49. package/package.json +4 -1
@@ -0,0 +1,136 @@
1
+ /**
2
+ * InstancePool - Shared instance management for memory efficiency
3
+ *
4
+ * Production-grade instance pooling to prevent memory leaks during imports.
5
+ * Critical for scaling to billions of entities.
6
+ *
7
+ * Problem: Creating new NLP/Extractor instances in loops → memory leak
8
+ * Solution: Reuse shared instances across entire import session
9
+ *
10
+ * Memory savings:
11
+ * - Without pooling: 100K rows × 50MB per instance = 5TB RAM (OOM!)
12
+ * - With pooling: 50MB total (shared across all rows)
13
+ */
14
+ import { Brainy } from '../brainy.js';
15
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
16
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
17
+ /**
18
+ * InstancePool - Manages shared instances for memory efficiency
19
+ *
20
+ * Lifecycle:
21
+ * 1. Create pool at import start
22
+ * 2. Reuse instances across all rows
23
+ * 3. Pool is garbage collected when import completes
24
+ *
25
+ * Thread safety: Not thread-safe (single import session per pool)
26
+ */
27
+ export declare class InstancePool {
28
+ private brain;
29
+ private nlpInstance;
30
+ private extractorInstance;
31
+ private nlpInitialized;
32
+ private initializationPromise;
33
+ private stats;
34
+ constructor(brain: Brainy);
35
+ /**
36
+ * Get shared NaturalLanguageProcessor instance
37
+ *
38
+ * Lazy initialization - created on first access
39
+ * All subsequent calls return same instance
40
+ *
41
+ * @returns Shared NLP instance
42
+ */
43
+ getNLP(): Promise<NaturalLanguageProcessor>;
44
+ /**
45
+ * Get shared NeuralEntityExtractor instance
46
+ *
47
+ * Lazy initialization - created on first access
48
+ * All subsequent calls return same instance
49
+ *
50
+ * @returns Shared extractor instance
51
+ */
52
+ getExtractor(): NeuralEntityExtractor;
53
+ /**
54
+ * Get shared NLP instance (synchronous, may return uninitialized)
55
+ *
56
+ * Use when you need NLP synchronously and will handle initialization yourself.
57
+ * Prefer getNLP() for async code.
58
+ *
59
+ * @returns Shared NLP instance (possibly uninitialized)
60
+ */
61
+ getNLPSync(): NaturalLanguageProcessor;
62
+ /**
63
+ * Initialize all instances upfront
64
+ *
65
+ * Call at start of import to avoid lazy initialization overhead
66
+ * during processing. Improves predictability and first-row performance.
67
+ *
68
+ * @returns Promise that resolves when all instances are ready
69
+ */
70
+ init(): Promise<void>;
71
+ /**
72
+ * Internal initialization implementation
73
+ */
74
+ private initializeInternal;
75
+ /**
76
+ * Ensure NLP is initialized (loads 220 patterns)
77
+ *
78
+ * Handles concurrent initialization requests safely
79
+ */
80
+ private ensureNLPInitialized;
81
+ /**
82
+ * Check if instances are initialized
83
+ *
84
+ * @returns True if NLP is initialized and ready to use
85
+ */
86
+ isInitialized(): boolean;
87
+ /**
88
+ * Get pool statistics
89
+ *
90
+ * Useful for performance monitoring and memory leak detection
91
+ *
92
+ * @returns Statistics about instance reuse
93
+ */
94
+ getStats(): {
95
+ nlpCreated: boolean;
96
+ extractorCreated: boolean;
97
+ initialized: boolean;
98
+ memorySaved: number;
99
+ nlpReuses: number;
100
+ extractorReuses: number;
101
+ creationTime: number;
102
+ };
103
+ /**
104
+ * Calculate estimated memory saved by pooling
105
+ *
106
+ * Assumes ~50MB per NLP instance, ~10MB per extractor instance
107
+ *
108
+ * @returns Estimated memory saved in bytes
109
+ */
110
+ private calculateMemorySaved;
111
+ /**
112
+ * Reset statistics (useful for testing)
113
+ */
114
+ resetStats(): void;
115
+ /**
116
+ * Get string representation (for debugging)
117
+ */
118
+ toString(): string;
119
+ /**
120
+ * Cleanup method (for explicit resource management)
121
+ *
122
+ * Note: Usually not needed - pool is garbage collected when import completes.
123
+ * Use only if you need explicit cleanup for some reason.
124
+ */
125
+ cleanup(): void;
126
+ }
127
+ /**
128
+ * Create a new instance pool
129
+ *
130
+ * Convenience factory function
131
+ *
132
+ * @param brain Brainy instance
133
+ * @param autoInit Whether to initialize instances immediately
134
+ * @returns Instance pool
135
+ */
136
+ export declare function createInstancePool(brain: Brainy, autoInit?: boolean): Promise<InstancePool>;
@@ -0,0 +1,231 @@
1
+ /**
2
+ * InstancePool - Shared instance management for memory efficiency
3
+ *
4
+ * Production-grade instance pooling to prevent memory leaks during imports.
5
+ * Critical for scaling to billions of entities.
6
+ *
7
+ * Problem: Creating new NLP/Extractor instances in loops → memory leak
8
+ * Solution: Reuse shared instances across entire import session
9
+ *
10
+ * Memory savings:
11
+ * - Without pooling: 100K rows × 50MB per instance = 5TB RAM (OOM!)
12
+ * - With pooling: 50MB total (shared across all rows)
13
+ */
14
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
15
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
16
+ /**
17
+ * InstancePool - Manages shared instances for memory efficiency
18
+ *
19
+ * Lifecycle:
20
+ * 1. Create pool at import start
21
+ * 2. Reuse instances across all rows
22
+ * 3. Pool is garbage collected when import completes
23
+ *
24
+ * Thread safety: Not thread-safe (single import session per pool)
25
+ */
26
+ export class InstancePool {
27
+ constructor(brain) {
28
+ // Shared instances (created lazily)
29
+ this.nlpInstance = null;
30
+ this.extractorInstance = null;
31
+ // Initialization state
32
+ this.nlpInitialized = false;
33
+ this.initializationPromise = null;
34
+ // Statistics
35
+ this.stats = {
36
+ nlpReuses: 0,
37
+ extractorReuses: 0,
38
+ creationTime: 0
39
+ };
40
+ this.brain = brain;
41
+ }
42
+ /**
43
+ * Get shared NaturalLanguageProcessor instance
44
+ *
45
+ * Lazy initialization - created on first access
46
+ * All subsequent calls return same instance
47
+ *
48
+ * @returns Shared NLP instance
49
+ */
50
+ async getNLP() {
51
+ if (!this.nlpInstance) {
52
+ const startTime = Date.now();
53
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
54
+ this.stats.creationTime += Date.now() - startTime;
55
+ }
56
+ // Ensure initialized before returning
57
+ if (!this.nlpInitialized) {
58
+ await this.ensureNLPInitialized();
59
+ }
60
+ this.stats.nlpReuses++;
61
+ return this.nlpInstance;
62
+ }
63
+ /**
64
+ * Get shared NeuralEntityExtractor instance
65
+ *
66
+ * Lazy initialization - created on first access
67
+ * All subsequent calls return same instance
68
+ *
69
+ * @returns Shared extractor instance
70
+ */
71
+ getExtractor() {
72
+ if (!this.extractorInstance) {
73
+ const startTime = Date.now();
74
+ this.extractorInstance = new NeuralEntityExtractor(this.brain);
75
+ this.stats.creationTime += Date.now() - startTime;
76
+ }
77
+ this.stats.extractorReuses++;
78
+ return this.extractorInstance;
79
+ }
80
+ /**
81
+ * Get shared NLP instance (synchronous, may return uninitialized)
82
+ *
83
+ * Use when you need NLP synchronously and will handle initialization yourself.
84
+ * Prefer getNLP() for async code.
85
+ *
86
+ * @returns Shared NLP instance (possibly uninitialized)
87
+ */
88
+ getNLPSync() {
89
+ if (!this.nlpInstance) {
90
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
91
+ }
92
+ this.stats.nlpReuses++;
93
+ return this.nlpInstance;
94
+ }
95
+ /**
96
+ * Initialize all instances upfront
97
+ *
98
+ * Call at start of import to avoid lazy initialization overhead
99
+ * during processing. Improves predictability and first-row performance.
100
+ *
101
+ * @returns Promise that resolves when all instances are ready
102
+ */
103
+ async init() {
104
+ // Prevent duplicate initialization
105
+ if (this.initializationPromise) {
106
+ return this.initializationPromise;
107
+ }
108
+ this.initializationPromise = this.initializeInternal();
109
+ return this.initializationPromise;
110
+ }
111
+ /**
112
+ * Internal initialization implementation
113
+ */
114
+ async initializeInternal() {
115
+ const startTime = Date.now();
116
+ // Create instances
117
+ if (!this.nlpInstance) {
118
+ this.nlpInstance = new NaturalLanguageProcessor(this.brain);
119
+ }
120
+ if (!this.extractorInstance) {
121
+ this.extractorInstance = new NeuralEntityExtractor(this.brain);
122
+ }
123
+ // Initialize NLP (loads pattern library)
124
+ await this.ensureNLPInitialized();
125
+ this.stats.creationTime = Date.now() - startTime;
126
+ }
127
+ /**
128
+ * Ensure NLP is initialized (loads 220 patterns)
129
+ *
130
+ * Handles concurrent initialization requests safely
131
+ */
132
+ async ensureNLPInitialized() {
133
+ if (this.nlpInitialized) {
134
+ return;
135
+ }
136
+ if (!this.nlpInstance) {
137
+ throw new Error('NLP instance not created yet');
138
+ }
139
+ await this.nlpInstance.init();
140
+ this.nlpInitialized = true;
141
+ }
142
+ /**
143
+ * Check if instances are initialized
144
+ *
145
+ * @returns True if NLP is initialized and ready to use
146
+ */
147
+ isInitialized() {
148
+ return this.nlpInitialized && this.nlpInstance !== null;
149
+ }
150
+ /**
151
+ * Get pool statistics
152
+ *
153
+ * Useful for performance monitoring and memory leak detection
154
+ *
155
+ * @returns Statistics about instance reuse
156
+ */
157
+ getStats() {
158
+ return {
159
+ ...this.stats,
160
+ nlpCreated: this.nlpInstance !== null,
161
+ extractorCreated: this.extractorInstance !== null,
162
+ initialized: this.isInitialized(),
163
+ // Memory savings estimate
164
+ memorySaved: this.calculateMemorySaved()
165
+ };
166
+ }
167
+ /**
168
+ * Calculate estimated memory saved by pooling
169
+ *
170
+ * Assumes ~50MB per NLP instance, ~10MB per extractor instance
171
+ *
172
+ * @returns Estimated memory saved in bytes
173
+ */
174
+ calculateMemorySaved() {
175
+ const nlpSize = 50 * 1024 * 1024; // 50MB per instance
176
+ const extractorSize = 10 * 1024 * 1024; // 10MB per instance
177
+ // Without pooling: size × reuses
178
+ // With pooling: size × 1
179
+ // Saved: size × (reuses - 1)
180
+ const nlpSaved = nlpSize * Math.max(0, this.stats.nlpReuses - 1);
181
+ const extractorSaved = extractorSize * Math.max(0, this.stats.extractorReuses - 1);
182
+ return nlpSaved + extractorSaved;
183
+ }
184
+ /**
185
+ * Reset statistics (useful for testing)
186
+ */
187
+ resetStats() {
188
+ this.stats = {
189
+ nlpReuses: 0,
190
+ extractorReuses: 0,
191
+ creationTime: 0
192
+ };
193
+ }
194
+ /**
195
+ * Get string representation (for debugging)
196
+ */
197
+ toString() {
198
+ const stats = this.getStats();
199
+ return `InstancePool(nlp=${stats.nlpCreated}, extractor=${stats.extractorCreated}, initialized=${stats.initialized}, nlpReuses=${stats.nlpReuses}, extractorReuses=${stats.extractorReuses})`;
200
+ }
201
+ /**
202
+ * Cleanup method (for explicit resource management)
203
+ *
204
+ * Note: Usually not needed - pool is garbage collected when import completes.
205
+ * Use only if you need explicit cleanup for some reason.
206
+ */
207
+ cleanup() {
208
+ // Clear references to allow garbage collection
209
+ this.nlpInstance = null;
210
+ this.extractorInstance = null;
211
+ this.nlpInitialized = false;
212
+ this.initializationPromise = null;
213
+ }
214
+ }
215
+ /**
216
+ * Create a new instance pool
217
+ *
218
+ * Convenience factory function
219
+ *
220
+ * @param brain Brainy instance
221
+ * @param autoInit Whether to initialize instances immediately
222
+ * @returns Instance pool
223
+ */
224
+ export async function createInstancePool(brain, autoInit = true) {
225
+ const pool = new InstancePool(brain);
226
+ if (autoInit) {
227
+ await pool.init();
228
+ }
229
+ return pool;
230
+ }
231
+ //# sourceMappingURL=InstancePool.js.map
@@ -101,6 +101,7 @@ export declare class SmartCSVImporter {
101
101
  private brain;
102
102
  private extractor;
103
103
  private nlp;
104
+ private relationshipExtractor;
104
105
  private csvHandler;
105
106
  constructor(brain: Brainy);
106
107
  /**
@@ -124,7 +125,7 @@ export declare class SmartCSVImporter {
124
125
  */
125
126
  private mapTypeString;
126
127
  /**
127
- * Infer relationship type from context
128
+ * Infer relationship type from context using SmartRelationshipExtractor
128
129
  */
129
130
  private inferRelationship;
130
131
  /**
@@ -12,6 +12,7 @@
12
12
  */
13
13
  import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
14
14
  import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
15
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
15
16
  import { NounType, VerbType } from '../types/graphTypes.js';
16
17
  import { CSVHandler } from '../augmentations/intelligentImport/handlers/csvHandler.js';
17
18
  /**
@@ -22,6 +23,7 @@ export class SmartCSVImporter {
22
23
  this.brain = brain;
23
24
  this.extractor = new NeuralEntityExtractor(brain);
24
25
  this.nlp = new NaturalLanguageProcessor(brain);
26
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
25
27
  this.csvHandler = new CSVHandler();
26
28
  }
27
29
  /**
@@ -266,29 +268,16 @@ export class SmartCSVImporter {
266
268
  return mapping[normalized] || NounType.Thing;
267
269
  }
268
270
  /**
269
- * Infer relationship type from context
271
+ * Infer relationship type from context using SmartRelationshipExtractor
270
272
  */
271
- async inferRelationship(fromTerm, toTerm, context) {
272
- const lowerContext = context.toLowerCase();
273
- // Pattern-based relationship detection
274
- const patterns = [
275
- [new RegExp(`${toTerm}.*of.*${fromTerm}`, 'i'), VerbType.PartOf],
276
- [new RegExp(`${fromTerm}.*contains.*${toTerm}`, 'i'), VerbType.Contains],
277
- [new RegExp(`located in.*${toTerm}`, 'i'), VerbType.LocatedAt],
278
- [new RegExp(`ruled by.*${toTerm}`, 'i'), VerbType.Owns],
279
- [new RegExp(`capital.*${toTerm}`, 'i'), VerbType.Contains],
280
- [new RegExp(`created by.*${toTerm}`, 'i'), VerbType.CreatedBy],
281
- [new RegExp(`authored by.*${toTerm}`, 'i'), VerbType.CreatedBy],
282
- [new RegExp(`part of.*${toTerm}`, 'i'), VerbType.PartOf],
283
- [new RegExp(`related to.*${toTerm}`, 'i'), VerbType.RelatedTo]
284
- ];
285
- for (const [pattern, verbType] of patterns) {
286
- if (pattern.test(lowerContext)) {
287
- return verbType;
288
- }
289
- }
290
- // Default to RelatedTo
291
- return VerbType.RelatedTo;
273
+ async inferRelationship(fromTerm, toTerm, context, fromType, toType) {
274
+ // Use SmartRelationshipExtractor for robust relationship classification
275
+ const result = await this.relationshipExtractor.infer(fromTerm, toTerm, context, {
276
+ subjectType: fromType,
277
+ objectType: toType
278
+ });
279
+ // Return inferred type or fallback to RelatedTo
280
+ return result?.type || VerbType.RelatedTo;
292
281
  }
293
282
  /**
294
283
  * Generate consistent entity ID from name
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Smart DOCX Importer
3
+ *
4
+ * Extracts entities and relationships from Word documents using:
5
+ * - Mammoth parser for DOCX → HTML/text conversion
6
+ * - Heading extraction for document structure
7
+ * - Table extraction for structured data
8
+ * - NeuralEntityExtractor for entity extraction from paragraphs
9
+ * - NaturalLanguageProcessor for relationship inference
10
+ * - Hierarchical relationship creation based on heading hierarchy
11
+ *
12
+ * v4.2.0: New format handler
13
+ * NO MOCKS - Production-ready implementation
14
+ */
15
+ import { Brainy } from '../brainy.js';
16
+ import { NounType, VerbType } from '../types/graphTypes.js';
17
+ export interface SmartDOCXOptions {
18
+ /** Enable neural entity extraction from paragraphs */
19
+ enableNeuralExtraction?: boolean;
20
+ /** Enable hierarchical relationship creation based on headings */
21
+ enableHierarchicalRelationships?: boolean;
22
+ /** Enable concept extraction for tagging */
23
+ enableConceptExtraction?: boolean;
24
+ /** Confidence threshold for entities (0-1) */
25
+ confidenceThreshold?: number;
26
+ /** Minimum paragraph length to process */
27
+ minParagraphLength?: number;
28
+ /** Progress callback */
29
+ onProgress?: (stats: {
30
+ processed: number;
31
+ entities: number;
32
+ relationships: number;
33
+ }) => void;
34
+ }
35
+ export interface ExtractedDOCXEntity {
36
+ /** Entity ID */
37
+ id: string;
38
+ /** Entity name */
39
+ name: string;
40
+ /** Entity type */
41
+ type: NounType;
42
+ /** Entity description/context */
43
+ description: string;
44
+ /** Confidence score */
45
+ confidence: number;
46
+ /** Weight/importance score */
47
+ weight?: number;
48
+ /** Section/heading context */
49
+ section: string | null;
50
+ /** Paragraph index in document */
51
+ paragraphIndex: number;
52
+ /** Metadata */
53
+ metadata: Record<string, any>;
54
+ }
55
+ export interface ExtractedDOCXRelationship {
56
+ from: string;
57
+ to: string;
58
+ type: VerbType;
59
+ confidence: number;
60
+ weight?: number;
61
+ evidence: string;
62
+ }
63
+ export interface SmartDOCXResult {
64
+ /** Total paragraphs processed */
65
+ paragraphsProcessed: number;
66
+ /** Entities extracted */
67
+ entitiesExtracted: number;
68
+ /** Relationships inferred */
69
+ relationshipsInferred: number;
70
+ /** All extracted entities */
71
+ entities: ExtractedDOCXEntity[];
72
+ /** All relationships */
73
+ relationships: ExtractedDOCXRelationship[];
74
+ /** Entity ID mapping (index -> ID) */
75
+ entityMap: Map<string, string>;
76
+ /** Processing time in ms */
77
+ processingTime: number;
78
+ /** Document structure */
79
+ structure: {
80
+ headings: Array<{
81
+ level: number;
82
+ text: string;
83
+ index: number;
84
+ }>;
85
+ paragraphCount: number;
86
+ tableCount: number;
87
+ };
88
+ /** Extraction statistics */
89
+ stats: {
90
+ byType: Record<string, number>;
91
+ bySection: Record<string, number>;
92
+ byConfidence: {
93
+ high: number;
94
+ medium: number;
95
+ low: number;
96
+ };
97
+ };
98
+ }
99
+ /**
100
+ * SmartDOCXImporter - Extracts structured knowledge from Word documents
101
+ */
102
+ export declare class SmartDOCXImporter {
103
+ private brain;
104
+ private extractor;
105
+ private nlp;
106
+ private relationshipExtractor;
107
+ private mammothLoaded;
108
+ constructor(brain: Brainy);
109
+ /**
110
+ * Initialize the importer
111
+ */
112
+ init(): Promise<void>;
113
+ /**
114
+ * Extract entities and relationships from DOCX buffer
115
+ */
116
+ extract(buffer: Buffer, options?: SmartDOCXOptions): Promise<SmartDOCXResult>;
117
+ /**
118
+ * Extract entities and relationships from parsed DOCX content
119
+ */
120
+ private extractFromContent;
121
+ /**
122
+ * Parse document structure from HTML
123
+ */
124
+ private parseStructure;
125
+ }