@soulcraft/brainy 4.1.4 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/import/FormatDetector.d.ts +6 -1
  2. package/dist/import/FormatDetector.js +40 -1
  3. package/dist/import/ImportCoordinator.d.ts +102 -4
  4. package/dist/import/ImportCoordinator.js +248 -6
  5. package/dist/import/InstancePool.d.ts +136 -0
  6. package/dist/import/InstancePool.js +231 -0
  7. package/dist/importers/SmartCSVImporter.d.ts +2 -1
  8. package/dist/importers/SmartCSVImporter.js +11 -22
  9. package/dist/importers/SmartDOCXImporter.d.ts +125 -0
  10. package/dist/importers/SmartDOCXImporter.js +227 -0
  11. package/dist/importers/SmartExcelImporter.d.ts +12 -1
  12. package/dist/importers/SmartExcelImporter.js +40 -25
  13. package/dist/importers/SmartJSONImporter.d.ts +1 -0
  14. package/dist/importers/SmartJSONImporter.js +25 -6
  15. package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
  16. package/dist/importers/SmartMarkdownImporter.js +11 -16
  17. package/dist/importers/SmartPDFImporter.d.ts +2 -1
  18. package/dist/importers/SmartPDFImporter.js +11 -22
  19. package/dist/importers/SmartYAMLImporter.d.ts +121 -0
  20. package/dist/importers/SmartYAMLImporter.js +275 -0
  21. package/dist/importers/VFSStructureGenerator.js +12 -0
  22. package/dist/neural/SmartExtractor.d.ts +279 -0
  23. package/dist/neural/SmartExtractor.js +592 -0
  24. package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
  25. package/dist/neural/SmartRelationshipExtractor.js +396 -0
  26. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  27. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  28. package/dist/neural/entityExtractor.d.ts +3 -0
  29. package/dist/neural/entityExtractor.js +34 -36
  30. package/dist/neural/presets.d.ts +189 -0
  31. package/dist/neural/presets.js +365 -0
  32. package/dist/neural/signals/ContextSignal.d.ts +166 -0
  33. package/dist/neural/signals/ContextSignal.js +646 -0
  34. package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
  35. package/dist/neural/signals/EmbeddingSignal.js +435 -0
  36. package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
  37. package/dist/neural/signals/ExactMatchSignal.js +542 -0
  38. package/dist/neural/signals/PatternSignal.d.ts +159 -0
  39. package/dist/neural/signals/PatternSignal.js +478 -0
  40. package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
  41. package/dist/neural/signals/VerbContextSignal.js +390 -0
  42. package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
  43. package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
  44. package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
  45. package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
  46. package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
  47. package/dist/neural/signals/VerbPatternSignal.js +457 -0
  48. package/dist/types/graphTypes.d.ts +2 -0
  49. package/package.json +4 -1
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Smart YAML Importer
3
+ *
4
+ * Extracts entities and relationships from YAML files using:
5
+ * - YAML parsing to JSON-like structure
6
+ * - Recursive traversal of nested structures
7
+ * - NeuralEntityExtractor for entity extraction from text values
8
+ * - NaturalLanguageProcessor for relationship inference
9
+ * - Hierarchical relationship creation (parent-child, contains, etc.)
10
+ *
11
+ * v4.2.0: New format handler
12
+ * NO MOCKS - Production-ready implementation
13
+ */
14
+ import { Brainy } from '../brainy.js';
15
+ import { NounType, VerbType } from '../types/graphTypes.js';
16
+ export interface SmartYAMLOptions {
17
+ /** Enable neural entity extraction from string values */
18
+ enableNeuralExtraction?: boolean;
19
+ /** Enable hierarchical relationship creation */
20
+ enableHierarchicalRelationships?: boolean;
21
+ /** Enable concept extraction for tagging */
22
+ enableConceptExtraction?: boolean;
23
+ /** Confidence threshold for entities (0-1) */
24
+ confidenceThreshold?: number;
25
+ /** Maximum depth to traverse */
26
+ maxDepth?: number;
27
+ /** Minimum string length to process for entity extraction */
28
+ minStringLength?: number;
29
+ /** Keys that indicate entity names */
30
+ nameKeys?: string[];
31
+ /** Keys that indicate entity descriptions */
32
+ descriptionKeys?: string[];
33
+ /** Keys that indicate entity types */
34
+ typeKeys?: string[];
35
+ /** Progress callback */
36
+ onProgress?: (stats: {
37
+ processed: number;
38
+ entities: number;
39
+ relationships: number;
40
+ }) => void;
41
+ }
42
+ export interface ExtractedYAMLEntity {
43
+ /** Entity ID */
44
+ id: string;
45
+ /** Entity name */
46
+ name: string;
47
+ /** Entity type */
48
+ type: NounType;
49
+ /** Entity description/value */
50
+ description: string;
51
+ /** Confidence score */
52
+ confidence: number;
53
+ /** Weight/importance score */
54
+ weight?: number;
55
+ /** YAML path to this entity */
56
+ path: string;
57
+ /** Parent path in YAML hierarchy */
58
+ parentPath: string | null;
59
+ /** Metadata */
60
+ metadata: Record<string, any>;
61
+ }
62
+ export interface ExtractedYAMLRelationship {
63
+ from: string;
64
+ to: string;
65
+ type: VerbType;
66
+ confidence: number;
67
+ weight?: number;
68
+ evidence: string;
69
+ }
70
+ export interface SmartYAMLResult {
71
+ /** Total nodes processed */
72
+ nodesProcessed: number;
73
+ /** Entities extracted */
74
+ entitiesExtracted: number;
75
+ /** Relationships inferred */
76
+ relationshipsInferred: number;
77
+ /** All extracted entities */
78
+ entities: ExtractedYAMLEntity[];
79
+ /** All relationships */
80
+ relationships: ExtractedYAMLRelationship[];
81
+ /** Entity ID mapping (path -> ID) */
82
+ entityMap: Map<string, string>;
83
+ /** Processing time in ms */
84
+ processingTime: number;
85
+ /** Extraction statistics */
86
+ stats: {
87
+ byType: Record<string, number>;
88
+ byDepth: Record<number, number>;
89
+ byConfidence: {
90
+ high: number;
91
+ medium: number;
92
+ low: number;
93
+ };
94
+ };
95
+ }
96
+ /**
97
+ * SmartYAMLImporter - Extracts structured knowledge from YAML files
98
+ */
99
+ export declare class SmartYAMLImporter {
100
+ private brain;
101
+ private extractor;
102
+ private nlp;
103
+ private relationshipExtractor;
104
+ constructor(brain: Brainy);
105
+ /**
106
+ * Initialize the importer
107
+ */
108
+ init(): Promise<void>;
109
+ /**
110
+ * Extract entities and relationships from YAML string or buffer
111
+ */
112
+ extract(yamlContent: string | Buffer, options?: SmartYAMLOptions): Promise<SmartYAMLResult>;
113
+ /**
114
+ * Extract entities and relationships from parsed YAML data
115
+ */
116
+ private extractFromData;
117
+ /**
118
+ * Extract an entity from a YAML object node
119
+ */
120
+ private extractEntityFromObject;
121
+ }
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Smart YAML Importer
3
+ *
4
+ * Extracts entities and relationships from YAML files using:
5
+ * - YAML parsing to JSON-like structure
6
+ * - Recursive traversal of nested structures
7
+ * - NeuralEntityExtractor for entity extraction from text values
8
+ * - NaturalLanguageProcessor for relationship inference
9
+ * - Hierarchical relationship creation (parent-child, contains, etc.)
10
+ *
11
+ * v4.2.0: New format handler
12
+ * NO MOCKS - Production-ready implementation
13
+ */
14
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
15
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
16
+ import { SmartRelationshipExtractor } from '../neural/SmartRelationshipExtractor.js';
17
+ import { NounType, VerbType } from '../types/graphTypes.js';
18
+ import * as yaml from 'js-yaml';
19
+ /**
20
+ * SmartYAMLImporter - Extracts structured knowledge from YAML files
21
+ */
22
+ export class SmartYAMLImporter {
23
+ constructor(brain) {
24
+ this.brain = brain;
25
+ this.extractor = new NeuralEntityExtractor(brain);
26
+ this.nlp = new NaturalLanguageProcessor(brain);
27
+ this.relationshipExtractor = new SmartRelationshipExtractor(brain);
28
+ }
29
+ /**
30
+ * Initialize the importer
31
+ */
32
+ async init() {
33
+ await this.nlp.init();
34
+ }
35
+ /**
36
+ * Extract entities and relationships from YAML string or buffer
37
+ */
38
+ async extract(yamlContent, options = {}) {
39
+ const startTime = Date.now();
40
+ // Parse YAML to JavaScript object
41
+ const yamlString = typeof yamlContent === 'string'
42
+ ? yamlContent
43
+ : yamlContent.toString('utf-8');
44
+ let data;
45
+ try {
46
+ data = yaml.load(yamlString);
47
+ }
48
+ catch (error) {
49
+ throw new Error(`Failed to parse YAML: ${error.message}`);
50
+ }
51
+ // Process as JSON-like structure
52
+ const result = await this.extractFromData(data, options);
53
+ result.processingTime = Date.now() - startTime;
54
+ return result;
55
+ }
56
+ /**
57
+ * Extract entities and relationships from parsed YAML data
58
+ */
59
+ async extractFromData(data, options) {
60
+ const opts = {
61
+ enableNeuralExtraction: options.enableNeuralExtraction !== false,
62
+ enableHierarchicalRelationships: options.enableHierarchicalRelationships !== false,
63
+ enableConceptExtraction: options.enableConceptExtraction !== false,
64
+ confidenceThreshold: options.confidenceThreshold || 0.6,
65
+ maxDepth: options.maxDepth || 10,
66
+ minStringLength: options.minStringLength || 3,
67
+ nameKeys: options.nameKeys || ['name', 'title', 'label', 'id'],
68
+ descriptionKeys: options.descriptionKeys || ['description', 'desc', 'summary', 'value'],
69
+ typeKeys: options.typeKeys || ['type', 'kind', 'category'],
70
+ onProgress: options.onProgress
71
+ };
72
+ const entities = [];
73
+ const relationships = [];
74
+ const entityMap = new Map();
75
+ let nodesProcessed = 0;
76
+ const stats = {
77
+ byType: {},
78
+ byDepth: {},
79
+ byConfidence: { high: 0, medium: 0, low: 0 }
80
+ };
81
+ // Traverse YAML structure recursively
82
+ const traverse = async (obj, path = '$', depth = 0, parentPath = null) => {
83
+ if (depth > opts.maxDepth)
84
+ return;
85
+ nodesProcessed++;
86
+ stats.byDepth[depth] = (stats.byDepth[depth] || 0) + 1;
87
+ // Report progress
88
+ if (options.onProgress && nodesProcessed % 10 === 0) {
89
+ options.onProgress({
90
+ processed: nodesProcessed,
91
+ entities: entities.length,
92
+ relationships: relationships.length
93
+ });
94
+ }
95
+ // Handle different value types
96
+ if (obj === null || obj === undefined) {
97
+ return;
98
+ }
99
+ // Handle arrays
100
+ if (Array.isArray(obj)) {
101
+ for (let i = 0; i < obj.length; i++) {
102
+ await traverse(obj[i], `${path}[${i}]`, depth + 1, path);
103
+ }
104
+ return;
105
+ }
106
+ // Handle objects
107
+ if (typeof obj === 'object') {
108
+ // Extract entity from object
109
+ const entity = await this.extractEntityFromObject(obj, path, parentPath, depth, opts);
110
+ if (entity) {
111
+ entities.push(entity);
112
+ entityMap.set(path, entity.id);
113
+ // Update stats
114
+ stats.byType[entity.type] = (stats.byType[entity.type] || 0) + 1;
115
+ if (entity.confidence > 0.8)
116
+ stats.byConfidence.high++;
117
+ else if (entity.confidence >= 0.6)
118
+ stats.byConfidence.medium++;
119
+ else
120
+ stats.byConfidence.low++;
121
+ // Create hierarchical relationship
122
+ if (opts.enableHierarchicalRelationships && parentPath) {
123
+ const parentId = entityMap.get(parentPath);
124
+ if (parentId) {
125
+ // Extract parent name from path for better context
126
+ const parentName = parentPath.split('.').pop()?.replace(/\[(\d+)\]/, 'item $1') || 'parent';
127
+ const childName = entity.name;
128
+ // Infer relationship type using SmartRelationshipExtractor
129
+ const context = `Hierarchical YAML structure: ${parentName} contains ${childName}. Parent path: ${parentPath}, Child path: ${entity.path}`;
130
+ const inferredRelationship = await this.relationshipExtractor.infer(parentName, childName, context, {
131
+ objectType: entity.type // Pass child entity type as hint
132
+ });
133
+ relationships.push({
134
+ from: parentId,
135
+ to: entity.id,
136
+ type: inferredRelationship?.type || VerbType.Contains, // Fallback to Contains for hierarchical relationships
137
+ confidence: inferredRelationship?.confidence || 0.9,
138
+ weight: inferredRelationship?.weight || 1.0,
139
+ evidence: inferredRelationship?.evidence || 'Hierarchical parent-child relationship in YAML structure'
140
+ });
141
+ }
142
+ }
143
+ }
144
+ // Traverse nested objects
145
+ for (const [key, value] of Object.entries(obj)) {
146
+ await traverse(value, `${path}.${key}`, depth + 1, path);
147
+ }
148
+ return;
149
+ }
150
+ // Handle primitive values (strings, numbers, booleans)
151
+ if (typeof obj === 'string' && obj.length >= opts.minStringLength) {
152
+ // Extract entities from string values
153
+ if (opts.enableNeuralExtraction) {
154
+ const extractedEntities = await this.extractor.extract(obj, {
155
+ confidence: opts.confidenceThreshold
156
+ });
157
+ for (const extracted of extractedEntities) {
158
+ const entityId = `${path}:${extracted.text}`;
159
+ const entity = {
160
+ id: entityId,
161
+ name: extracted.text,
162
+ type: extracted.type,
163
+ description: obj,
164
+ confidence: extracted.confidence,
165
+ weight: extracted.weight || 1.0,
166
+ path,
167
+ parentPath,
168
+ metadata: {
169
+ position: extracted.position,
170
+ extractedFrom: 'string-value'
171
+ }
172
+ };
173
+ entities.push(entity);
174
+ entityMap.set(entityId, entityId);
175
+ // Update stats
176
+ stats.byType[entity.type] = (stats.byType[entity.type] || 0) + 1;
177
+ if (entity.confidence > 0.8)
178
+ stats.byConfidence.high++;
179
+ else if (entity.confidence >= 0.6)
180
+ stats.byConfidence.medium++;
181
+ else
182
+ stats.byConfidence.low++;
183
+ }
184
+ }
185
+ }
186
+ };
187
+ // Start traversal
188
+ await traverse(data);
189
+ // Final progress report
190
+ if (options.onProgress) {
191
+ options.onProgress({
192
+ processed: nodesProcessed,
193
+ entities: entities.length,
194
+ relationships: relationships.length
195
+ });
196
+ }
197
+ return {
198
+ nodesProcessed,
199
+ entitiesExtracted: entities.length,
200
+ relationshipsInferred: relationships.length,
201
+ entities,
202
+ relationships,
203
+ entityMap,
204
+ processingTime: 0, // Will be set by caller
205
+ stats
206
+ };
207
+ }
208
+ /**
209
+ * Extract an entity from a YAML object node
210
+ */
211
+ async extractEntityFromObject(obj, path, parentPath, depth, opts) {
212
+ // Try to find name
213
+ let name = null;
214
+ for (const key of opts.nameKeys) {
215
+ if (obj[key] && typeof obj[key] === 'string') {
216
+ name = obj[key];
217
+ break;
218
+ }
219
+ }
220
+ // If no explicit name, use path segment
221
+ if (!name) {
222
+ const segments = path.split('.');
223
+ name = segments[segments.length - 1];
224
+ if (name === '$')
225
+ name = 'root';
226
+ }
227
+ // Try to find description
228
+ let description = name;
229
+ for (const key of opts.descriptionKeys) {
230
+ if (obj[key] && typeof obj[key] === 'string') {
231
+ description = obj[key];
232
+ break;
233
+ }
234
+ }
235
+ // Try to find explicit type
236
+ let explicitType = null;
237
+ for (const key of opts.typeKeys) {
238
+ if (obj[key] && typeof obj[key] === 'string') {
239
+ explicitType = obj[key];
240
+ break;
241
+ }
242
+ }
243
+ // Classify entity type using SmartExtractor
244
+ const classification = await this.extractor.extract(description, {
245
+ confidence: opts.confidenceThreshold
246
+ });
247
+ const entityType = classification.length > 0
248
+ ? classification[0].type
249
+ : NounType.Thing;
250
+ const confidence = classification.length > 0
251
+ ? classification[0].confidence
252
+ : 0.5;
253
+ const weight = classification.length > 0
254
+ ? classification[0].weight || 1.0
255
+ : 1.0;
256
+ // Create entity
257
+ const entity = {
258
+ id: path,
259
+ name,
260
+ type: entityType,
261
+ description,
262
+ confidence,
263
+ weight,
264
+ path,
265
+ parentPath,
266
+ metadata: {
267
+ depth,
268
+ explicitType,
269
+ yamlKeys: Object.keys(obj)
270
+ }
271
+ };
272
+ return entity;
273
+ }
274
+ }
275
+ //# sourceMappingURL=SmartYAMLImporter.js.map
@@ -187,6 +187,14 @@ export class VFSStructureGenerator {
187
187
  */
188
188
  groupEntities(importResult, options) {
189
189
  const groups = new Map();
190
+ // Handle sheet-based grouping (v4.2.0)
191
+ if (options.groupBy === 'sheet' && importResult.sheets && importResult.sheets.length > 0) {
192
+ for (const sheet of importResult.sheets) {
193
+ groups.set(sheet.name, sheet.rows);
194
+ }
195
+ return groups;
196
+ }
197
+ // Handle other grouping strategies
190
198
  for (const extracted of importResult.rows) {
191
199
  let groupName;
192
200
  switch (options.groupBy) {
@@ -201,6 +209,10 @@ export class VFSStructureGenerator {
201
209
  options.customGrouping(extracted.entity) :
202
210
  'entities';
203
211
  break;
212
+ case 'sheet':
213
+ // Fallback if sheets data not available
214
+ groupName = 'entities';
215
+ break;
204
216
  default:
205
217
  groupName = 'entities';
206
218
  }
@@ -0,0 +1,279 @@
1
+ /**
2
+ * SmartExtractor - Unified entity type extraction using ensemble of neural signals
3
+ *
4
+ * PRODUCTION-READY: Single orchestration class for all entity type classification
5
+ *
6
+ * Design Philosophy:
7
+ * - Simplicity over complexity (KISS principle)
8
+ * - One class instead of multiple strategy layers
9
+ * - Clear execution path for debugging
10
+ * - Comprehensive format intelligence built-in
11
+ *
12
+ * Ensemble Architecture:
13
+ * - ExactMatchSignal (40%) - Explicit patterns and exact keywords
14
+ * - EmbeddingSignal (35%) - Neural similarity with type embeddings
15
+ * - PatternSignal (20%) - Regex patterns and naming conventions
16
+ * - ContextSignal (5%) - Relationship-based inference
17
+ *
18
+ * Format Intelligence:
19
+ * Supports 7 major formats with automatic hint extraction:
20
+ * - Excel (.xlsx): Column headers, sheet names, "Related Terms" detection
21
+ * - CSV (.csv): Header row patterns, naming conventions
22
+ * - PDF (.pdf): Form field names and labels
23
+ * - YAML (.yaml, .yml): Semantic key names
24
+ * - DOCX (.docx): Heading levels and structure
25
+ * - JSON (.json): Field name patterns
26
+ * - Markdown (.md): Heading hierarchy
27
+ *
28
+ * Performance:
29
+ * - Parallel signal execution (~15ms total)
30
+ * - LRU caching for hot entities
31
+ * - Confidence boosting when signals agree
32
+ * - Graceful degradation on errors
33
+ */
34
+ import type { Brainy } from '../brainy.js';
35
+ import type { NounType } from '../types/graphTypes.js';
36
+ /**
37
+ * Extraction result with full traceability
38
+ */
39
+ export interface ExtractionResult {
40
+ type: NounType;
41
+ confidence: number;
42
+ source: 'ensemble' | 'exact-match' | 'pattern' | 'embedding' | 'context';
43
+ evidence: string;
44
+ metadata?: {
45
+ signalResults?: Array<{
46
+ signal: string;
47
+ type: NounType;
48
+ confidence: number;
49
+ weight: number;
50
+ }>;
51
+ agreementBoost?: number;
52
+ formatHints?: string[];
53
+ formatContext?: FormatContext;
54
+ };
55
+ }
56
+ /**
57
+ * Format context for classification
58
+ */
59
+ export interface FormatContext {
60
+ format?: 'excel' | 'csv' | 'pdf' | 'yaml' | 'docx' | 'json' | 'markdown';
61
+ columnHeader?: string;
62
+ fieldName?: string;
63
+ yamlKey?: string;
64
+ headingLevel?: number;
65
+ sheetName?: string;
66
+ metadata?: Record<string, any>;
67
+ }
68
+ /**
69
+ * Options for SmartExtractor
70
+ */
71
+ export interface SmartExtractorOptions {
72
+ minConfidence?: number;
73
+ enableFormatHints?: boolean;
74
+ enableEnsemble?: boolean;
75
+ cacheSize?: number;
76
+ weights?: {
77
+ exactMatch?: number;
78
+ embedding?: number;
79
+ pattern?: number;
80
+ context?: number;
81
+ };
82
+ }
83
+ /**
84
+ * SmartExtractor - Unified entity type classification
85
+ *
86
+ * This is the single entry point for all entity type extraction.
87
+ * It orchestrates all 4 signals, applies format intelligence,
88
+ * and combines results using ensemble weighting.
89
+ *
90
+ * Production features:
91
+ * - Parallel signal execution for performance
92
+ * - Format-specific hint extraction
93
+ * - Ensemble voting with confidence boosting
94
+ * - Comprehensive statistics and observability
95
+ * - LRU caching for hot paths
96
+ * - Graceful error handling
97
+ */
98
+ export declare class SmartExtractor {
99
+ private brain;
100
+ private options;
101
+ private exactMatchSignal;
102
+ private patternSignal;
103
+ private embeddingSignal;
104
+ private contextSignal;
105
+ private cache;
106
+ private cacheOrder;
107
+ private stats;
108
+ constructor(brain: Brainy, options?: SmartExtractorOptions);
109
+ /**
110
+ * Extract entity type using ensemble of signals
111
+ *
112
+ * Main entry point - orchestrates all signals and combines results
113
+ *
114
+ * @param candidate Entity text to classify
115
+ * @param context Classification context with format hints
116
+ * @returns ExtractionResult with type and confidence
117
+ */
118
+ extract(candidate: string, context?: {
119
+ definition?: string;
120
+ formatContext?: FormatContext;
121
+ allTerms?: string[];
122
+ metadata?: any;
123
+ }): Promise<ExtractionResult | null>;
124
+ /**
125
+ * Extract format-specific hints from context
126
+ *
127
+ * Returns array of hint strings that can help with classification
128
+ */
129
+ private extractFormatHints;
130
+ /**
131
+ * Extract Excel-specific hints
132
+ */
133
+ private extractExcelHints;
134
+ /**
135
+ * Extract CSV-specific hints
136
+ */
137
+ private extractCsvHints;
138
+ /**
139
+ * Extract PDF-specific hints
140
+ */
141
+ private extractPdfHints;
142
+ /**
143
+ * Extract YAML-specific hints
144
+ */
145
+ private extractYamlHints;
146
+ /**
147
+ * Extract DOCX-specific hints
148
+ */
149
+ private extractDocxHints;
150
+ /**
151
+ * Extract JSON-specific hints
152
+ */
153
+ private extractJsonHints;
154
+ /**
155
+ * Extract Markdown-specific hints
156
+ */
157
+ private extractMarkdownHints;
158
+ /**
159
+ * Combine signal results using ensemble voting
160
+ *
161
+ * Applies weighted voting with confidence boosting when signals agree
162
+ */
163
+ private combineEnsemble;
164
+ /**
165
+ * Select best single signal (when ensemble is disabled)
166
+ */
167
+ private selectBestSignal;
168
+ /**
169
+ * Update statistics based on result
170
+ */
171
+ private updateStatistics;
172
+ /**
173
+ * Get cache key from candidate and context
174
+ */
175
+ private getCacheKey;
176
+ /**
177
+ * Get from LRU cache
178
+ */
179
+ private getFromCache;
180
+ /**
181
+ * Add to LRU cache with eviction
182
+ */
183
+ private addToCache;
184
+ /**
185
+ * Get comprehensive statistics
186
+ */
187
+ getStats(): {
188
+ cacheSize: number;
189
+ cacheHitRate: number;
190
+ ensembleRate: number;
191
+ formatHintRate: number;
192
+ signalStats: {
193
+ exactMatch: {
194
+ indexSize: number;
195
+ cacheSize: number;
196
+ cacheHitRate: number;
197
+ termMatchRate: number;
198
+ metadataMatchRate: number;
199
+ formatMatchRate: number;
200
+ calls: number;
201
+ cacheHits: number;
202
+ termMatches: number;
203
+ metadataMatches: number;
204
+ formatMatches: number;
205
+ };
206
+ pattern: {
207
+ cacheSize: number;
208
+ patternCount: number;
209
+ cacheHitRate: number;
210
+ regexMatchRate: number;
211
+ namingMatchRate: number;
212
+ structuralMatchRate: number;
213
+ calls: number;
214
+ cacheHits: number;
215
+ regexMatches: number;
216
+ namingMatches: number;
217
+ structuralMatches: number;
218
+ };
219
+ embedding: {
220
+ cacheSize: number;
221
+ historySize: number;
222
+ cacheHitRate: number;
223
+ typeMatchRate: number;
224
+ graphMatchRate: number;
225
+ historyMatchRate: number;
226
+ calls: number;
227
+ cacheHits: number;
228
+ typeMatches: number;
229
+ graphMatches: number;
230
+ historyMatches: number;
231
+ combinedBoosts: number;
232
+ };
233
+ context: {
234
+ cacheSize: number;
235
+ cacheHitRate: number;
236
+ relationshipMatchRate: number;
237
+ attributeMatchRate: number;
238
+ calls: number;
239
+ cacheHits: number;
240
+ relationshipMatches: number;
241
+ attributeMatches: number;
242
+ combinedMatches: number;
243
+ };
244
+ };
245
+ calls: number;
246
+ cacheHits: number;
247
+ exactMatchWins: number;
248
+ patternWins: number;
249
+ embeddingWins: number;
250
+ contextWins: number;
251
+ ensembleWins: number;
252
+ agreementBoosts: number;
253
+ formatHintsUsed: number;
254
+ averageConfidence: number;
255
+ averageSignalsUsed: number;
256
+ };
257
+ /**
258
+ * Reset all statistics
259
+ */
260
+ resetStats(): void;
261
+ /**
262
+ * Clear all caches
263
+ */
264
+ clearCache(): void;
265
+ /**
266
+ * Add entity to historical data (for embedding signal temporal boosting)
267
+ */
268
+ addToHistory(text: string, type: NounType, vector: number[]): void;
269
+ /**
270
+ * Clear historical data
271
+ */
272
+ clearHistory(): void;
273
+ }
274
+ /**
275
+ * Create a new SmartExtractor instance
276
+ *
277
+ * Convenience factory function
278
+ */
279
+ export declare function createSmartExtractor(brain: Brainy, options?: SmartExtractorOptions): SmartExtractor;