@soulcraft/brainy 4.1.3 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -7
- package/dist/brainy.d.ts +74 -16
- package/dist/brainy.js +74 -16
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +155 -5
- package/dist/import/ImportCoordinator.js +346 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/package.json +4 -1
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SmartRelationshipExtractor - Unified relationship type extraction using ensemble of neural signals
|
|
3
|
+
*
|
|
4
|
+
* PRODUCTION-READY: Parallel to SmartExtractor but for verbs/relationships
|
|
5
|
+
*
|
|
6
|
+
* Design Philosophy:
|
|
7
|
+
* - Simplicity over complexity (KISS principle)
|
|
8
|
+
* - One class instead of multiple strategy layers
|
|
9
|
+
* - Clear execution path for debugging
|
|
10
|
+
* - Comprehensive relationship intelligence built-in
|
|
11
|
+
*
|
|
12
|
+
* Ensemble Architecture:
|
|
13
|
+
* - VerbExactMatchSignal (40%) - Explicit keywords and phrases
|
|
14
|
+
* - VerbEmbeddingSignal (35%) - Neural similarity with verb embeddings
|
|
15
|
+
* - VerbPatternSignal (20%) - Regex patterns and structures
|
|
16
|
+
* - VerbContextSignal (5%) - Entity type pair hints
|
|
17
|
+
*
|
|
18
|
+
* Performance:
|
|
19
|
+
* - Parallel signal execution (~15-20ms total)
|
|
20
|
+
* - LRU caching for hot relationships
|
|
21
|
+
* - Confidence boosting when signals agree
|
|
22
|
+
* - Graceful degradation on errors
|
|
23
|
+
*/
|
|
24
|
+
import type { Brainy } from '../brainy.js';
|
|
25
|
+
import type { VerbType, NounType } from '../types/graphTypes.js';
|
|
26
|
+
/**
|
|
27
|
+
* Extraction result with full traceability
|
|
28
|
+
*/
|
|
29
|
+
export interface RelationshipExtractionResult {
|
|
30
|
+
type: VerbType;
|
|
31
|
+
confidence: number;
|
|
32
|
+
weight: number;
|
|
33
|
+
source: 'ensemble' | 'exact-match' | 'pattern' | 'embedding' | 'context';
|
|
34
|
+
evidence: string;
|
|
35
|
+
metadata?: {
|
|
36
|
+
signalResults?: Array<{
|
|
37
|
+
signal: string;
|
|
38
|
+
type: VerbType;
|
|
39
|
+
confidence: number;
|
|
40
|
+
weight: number;
|
|
41
|
+
}>;
|
|
42
|
+
agreementBoost?: number;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Options for SmartRelationshipExtractor
|
|
47
|
+
*/
|
|
48
|
+
export interface SmartRelationshipExtractorOptions {
|
|
49
|
+
minConfidence?: number;
|
|
50
|
+
enableEnsemble?: boolean;
|
|
51
|
+
cacheSize?: number;
|
|
52
|
+
weights?: {
|
|
53
|
+
exactMatch?: number;
|
|
54
|
+
embedding?: number;
|
|
55
|
+
pattern?: number;
|
|
56
|
+
context?: number;
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* SmartRelationshipExtractor - Unified relationship type classification
|
|
61
|
+
*
|
|
62
|
+
* This is the single entry point for all relationship type extraction.
|
|
63
|
+
* It orchestrates all 4 signals, and combines results using ensemble weighting.
|
|
64
|
+
*
|
|
65
|
+
* Production features:
|
|
66
|
+
* - Parallel signal execution for performance
|
|
67
|
+
* - Ensemble voting with confidence boosting
|
|
68
|
+
* - Comprehensive statistics and observability
|
|
69
|
+
* - LRU caching for hot paths
|
|
70
|
+
* - Graceful error handling
|
|
71
|
+
*/
|
|
72
|
+
export declare class SmartRelationshipExtractor {
|
|
73
|
+
private brain;
|
|
74
|
+
private options;
|
|
75
|
+
private exactMatchSignal;
|
|
76
|
+
private embeddingSignal;
|
|
77
|
+
private patternSignal;
|
|
78
|
+
private contextSignal;
|
|
79
|
+
private cache;
|
|
80
|
+
private cacheOrder;
|
|
81
|
+
private stats;
|
|
82
|
+
constructor(brain: Brainy, options?: SmartRelationshipExtractorOptions);
|
|
83
|
+
/**
|
|
84
|
+
* Infer relationship type using ensemble of signals
|
|
85
|
+
*
|
|
86
|
+
* Main entry point - orchestrates all signals and combines results
|
|
87
|
+
*
|
|
88
|
+
* @param subject Subject entity name (e.g., "Alice")
|
|
89
|
+
* @param object Object entity name (e.g., "UCSF")
|
|
90
|
+
* @param context Full context text (sentence or paragraph)
|
|
91
|
+
* @param options Additional context for inference
|
|
92
|
+
* @returns RelationshipExtractionResult with type and confidence
|
|
93
|
+
*/
|
|
94
|
+
infer(subject: string, object: string, context: string, options?: {
|
|
95
|
+
subjectType?: NounType;
|
|
96
|
+
objectType?: NounType;
|
|
97
|
+
contextVector?: number[];
|
|
98
|
+
}): Promise<RelationshipExtractionResult | null>;
|
|
99
|
+
/**
|
|
100
|
+
* Combine signal results using ensemble voting
|
|
101
|
+
*
|
|
102
|
+
* Applies weighted voting with confidence boosting when signals agree
|
|
103
|
+
*/
|
|
104
|
+
private combineEnsemble;
|
|
105
|
+
/**
|
|
106
|
+
* Select best single signal (when ensemble is disabled)
|
|
107
|
+
*/
|
|
108
|
+
private selectBestSignal;
|
|
109
|
+
/**
|
|
110
|
+
* Update statistics based on result
|
|
111
|
+
*/
|
|
112
|
+
private updateStatistics;
|
|
113
|
+
/**
|
|
114
|
+
* Get cache key from parameters
|
|
115
|
+
*/
|
|
116
|
+
private getCacheKey;
|
|
117
|
+
/**
|
|
118
|
+
* Get from LRU cache
|
|
119
|
+
*/
|
|
120
|
+
private getFromCache;
|
|
121
|
+
/**
|
|
122
|
+
* Add to LRU cache with eviction
|
|
123
|
+
*/
|
|
124
|
+
private addToCache;
|
|
125
|
+
/**
|
|
126
|
+
* Get comprehensive statistics
|
|
127
|
+
*/
|
|
128
|
+
getStats(): {
|
|
129
|
+
cacheSize: number;
|
|
130
|
+
cacheHitRate: number;
|
|
131
|
+
ensembleRate: number;
|
|
132
|
+
signalStats: {
|
|
133
|
+
exactMatch: {
|
|
134
|
+
keywordCount: number;
|
|
135
|
+
cacheSize: number;
|
|
136
|
+
cacheHitRate: number;
|
|
137
|
+
calls: number;
|
|
138
|
+
cacheHits: number;
|
|
139
|
+
exactMatches: number;
|
|
140
|
+
phraseMatches: number;
|
|
141
|
+
partialMatches: number;
|
|
142
|
+
};
|
|
143
|
+
embedding: {
|
|
144
|
+
verbTypeCount: number;
|
|
145
|
+
historySize: number;
|
|
146
|
+
cacheSize: number;
|
|
147
|
+
cacheHitRate: number;
|
|
148
|
+
matchRate: number;
|
|
149
|
+
calls: number;
|
|
150
|
+
cacheHits: number;
|
|
151
|
+
matches: number;
|
|
152
|
+
temporalBoosts: number;
|
|
153
|
+
averageSimilarity: number;
|
|
154
|
+
};
|
|
155
|
+
pattern: {
|
|
156
|
+
patternCount: number;
|
|
157
|
+
cacheSize: number;
|
|
158
|
+
cacheHitRate: number;
|
|
159
|
+
matchRate: number;
|
|
160
|
+
topPatterns: {
|
|
161
|
+
pattern: string;
|
|
162
|
+
hits: number;
|
|
163
|
+
}[];
|
|
164
|
+
calls: number;
|
|
165
|
+
cacheHits: number;
|
|
166
|
+
matches: number;
|
|
167
|
+
patternHits: Map<string, number>;
|
|
168
|
+
};
|
|
169
|
+
context: {
|
|
170
|
+
hintCount: number;
|
|
171
|
+
cacheSize: number;
|
|
172
|
+
cacheHitRate: number;
|
|
173
|
+
matchRate: number;
|
|
174
|
+
topHints: {
|
|
175
|
+
hint: string;
|
|
176
|
+
hits: number;
|
|
177
|
+
}[];
|
|
178
|
+
calls: number;
|
|
179
|
+
cacheHits: number;
|
|
180
|
+
matches: number;
|
|
181
|
+
hintHits: Map<string, number>;
|
|
182
|
+
};
|
|
183
|
+
};
|
|
184
|
+
calls: number;
|
|
185
|
+
cacheHits: number;
|
|
186
|
+
exactMatchWins: number;
|
|
187
|
+
embeddingWins: number;
|
|
188
|
+
patternWins: number;
|
|
189
|
+
contextWins: number;
|
|
190
|
+
ensembleWins: number;
|
|
191
|
+
agreementBoosts: number;
|
|
192
|
+
averageConfidence: number;
|
|
193
|
+
averageSignalsUsed: number;
|
|
194
|
+
};
|
|
195
|
+
/**
|
|
196
|
+
* Reset all statistics
|
|
197
|
+
*/
|
|
198
|
+
resetStats(): void;
|
|
199
|
+
/**
|
|
200
|
+
* Clear all caches
|
|
201
|
+
*/
|
|
202
|
+
clearCache(): void;
|
|
203
|
+
/**
|
|
204
|
+
* Add relationship to historical data (for embedding signal temporal boosting)
|
|
205
|
+
*/
|
|
206
|
+
addToHistory(context: string, type: VerbType, vector: number[]): void;
|
|
207
|
+
/**
|
|
208
|
+
* Clear historical data
|
|
209
|
+
*/
|
|
210
|
+
clearHistory(): void;
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Create a new SmartRelationshipExtractor instance
|
|
214
|
+
*
|
|
215
|
+
* Convenience factory function
|
|
216
|
+
*/
|
|
217
|
+
export declare function createSmartRelationshipExtractor(brain: Brainy, options?: SmartRelationshipExtractorOptions): SmartRelationshipExtractor;
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SmartRelationshipExtractor - Unified relationship type extraction using ensemble of neural signals
|
|
3
|
+
*
|
|
4
|
+
* PRODUCTION-READY: Parallel to SmartExtractor but for verbs/relationships
|
|
5
|
+
*
|
|
6
|
+
* Design Philosophy:
|
|
7
|
+
* - Simplicity over complexity (KISS principle)
|
|
8
|
+
* - One class instead of multiple strategy layers
|
|
9
|
+
* - Clear execution path for debugging
|
|
10
|
+
* - Comprehensive relationship intelligence built-in
|
|
11
|
+
*
|
|
12
|
+
* Ensemble Architecture:
|
|
13
|
+
* - VerbExactMatchSignal (40%) - Explicit keywords and phrases
|
|
14
|
+
* - VerbEmbeddingSignal (35%) - Neural similarity with verb embeddings
|
|
15
|
+
* - VerbPatternSignal (20%) - Regex patterns and structures
|
|
16
|
+
* - VerbContextSignal (5%) - Entity type pair hints
|
|
17
|
+
*
|
|
18
|
+
* Performance:
|
|
19
|
+
* - Parallel signal execution (~15-20ms total)
|
|
20
|
+
* - LRU caching for hot relationships
|
|
21
|
+
* - Confidence boosting when signals agree
|
|
22
|
+
* - Graceful degradation on errors
|
|
23
|
+
*/
|
|
24
|
+
import { VerbExactMatchSignal } from './signals/VerbExactMatchSignal.js';
|
|
25
|
+
import { VerbEmbeddingSignal } from './signals/VerbEmbeddingSignal.js';
|
|
26
|
+
import { VerbPatternSignal } from './signals/VerbPatternSignal.js';
|
|
27
|
+
import { VerbContextSignal } from './signals/VerbContextSignal.js';
|
|
28
|
+
/**
|
|
29
|
+
* SmartRelationshipExtractor - Unified relationship type classification
|
|
30
|
+
*
|
|
31
|
+
* This is the single entry point for all relationship type extraction.
|
|
32
|
+
* It orchestrates all 4 signals, and combines results using ensemble weighting.
|
|
33
|
+
*
|
|
34
|
+
* Production features:
|
|
35
|
+
* - Parallel signal execution for performance
|
|
36
|
+
* - Ensemble voting with confidence boosting
|
|
37
|
+
* - Comprehensive statistics and observability
|
|
38
|
+
* - LRU caching for hot paths
|
|
39
|
+
* - Graceful error handling
|
|
40
|
+
*/
|
|
41
|
+
export class SmartRelationshipExtractor {
|
|
42
|
+
constructor(brain, options) {
|
|
43
|
+
// LRU cache
|
|
44
|
+
this.cache = new Map();
|
|
45
|
+
this.cacheOrder = [];
|
|
46
|
+
// Statistics
|
|
47
|
+
this.stats = {
|
|
48
|
+
calls: 0,
|
|
49
|
+
cacheHits: 0,
|
|
50
|
+
exactMatchWins: 0,
|
|
51
|
+
embeddingWins: 0,
|
|
52
|
+
patternWins: 0,
|
|
53
|
+
contextWins: 0,
|
|
54
|
+
ensembleWins: 0,
|
|
55
|
+
agreementBoosts: 0,
|
|
56
|
+
averageConfidence: 0,
|
|
57
|
+
averageSignalsUsed: 0
|
|
58
|
+
};
|
|
59
|
+
this.brain = brain;
|
|
60
|
+
// Set default options
|
|
61
|
+
this.options = {
|
|
62
|
+
minConfidence: options?.minConfidence ?? 0.60,
|
|
63
|
+
enableEnsemble: options?.enableEnsemble ?? true,
|
|
64
|
+
cacheSize: options?.cacheSize ?? 2000,
|
|
65
|
+
weights: {
|
|
66
|
+
exactMatch: options?.weights?.exactMatch ?? 0.40,
|
|
67
|
+
embedding: options?.weights?.embedding ?? 0.35,
|
|
68
|
+
pattern: options?.weights?.pattern ?? 0.20,
|
|
69
|
+
context: options?.weights?.context ?? 0.05
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
// Validate weights sum to 1.0
|
|
73
|
+
const weightSum = Object.values(this.options.weights).reduce((a, b) => a + b, 0);
|
|
74
|
+
if (Math.abs(weightSum - 1.0) > 0.01) {
|
|
75
|
+
throw new Error(`Signal weights must sum to 1.0, got ${weightSum}`);
|
|
76
|
+
}
|
|
77
|
+
// Initialize signals
|
|
78
|
+
this.exactMatchSignal = new VerbExactMatchSignal(brain, {
|
|
79
|
+
minConfidence: 0.50, // Lower threshold, ensemble will filter
|
|
80
|
+
cacheSize: Math.floor(this.options.cacheSize / 4)
|
|
81
|
+
});
|
|
82
|
+
this.embeddingSignal = new VerbEmbeddingSignal(brain, {
|
|
83
|
+
minConfidence: 0.50,
|
|
84
|
+
cacheSize: Math.floor(this.options.cacheSize / 4)
|
|
85
|
+
});
|
|
86
|
+
this.patternSignal = new VerbPatternSignal(brain, {
|
|
87
|
+
minConfidence: 0.50,
|
|
88
|
+
cacheSize: Math.floor(this.options.cacheSize / 4)
|
|
89
|
+
});
|
|
90
|
+
this.contextSignal = new VerbContextSignal(brain, {
|
|
91
|
+
minConfidence: 0.50,
|
|
92
|
+
cacheSize: Math.floor(this.options.cacheSize / 4)
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Infer relationship type using ensemble of signals
|
|
97
|
+
*
|
|
98
|
+
* Main entry point - orchestrates all signals and combines results
|
|
99
|
+
*
|
|
100
|
+
* @param subject Subject entity name (e.g., "Alice")
|
|
101
|
+
* @param object Object entity name (e.g., "UCSF")
|
|
102
|
+
* @param context Full context text (sentence or paragraph)
|
|
103
|
+
* @param options Additional context for inference
|
|
104
|
+
* @returns RelationshipExtractionResult with type and confidence
|
|
105
|
+
*/
|
|
106
|
+
async infer(subject, object, context, options) {
|
|
107
|
+
this.stats.calls++;
|
|
108
|
+
// Check cache first
|
|
109
|
+
const cacheKey = this.getCacheKey(subject, object, context);
|
|
110
|
+
const cached = this.getFromCache(cacheKey);
|
|
111
|
+
if (cached !== undefined) {
|
|
112
|
+
this.stats.cacheHits++;
|
|
113
|
+
return cached;
|
|
114
|
+
}
|
|
115
|
+
try {
|
|
116
|
+
// Execute all signals in parallel
|
|
117
|
+
const [exactMatch, embeddingMatch, patternMatch, contextMatch] = await Promise.all([
|
|
118
|
+
this.exactMatchSignal.classify(context).catch(() => null),
|
|
119
|
+
this.embeddingSignal.classify(context, options?.contextVector).catch(() => null),
|
|
120
|
+
this.patternSignal.classify(subject, object, context).catch(() => null),
|
|
121
|
+
this.contextSignal.classify(options?.subjectType, options?.objectType).catch(() => null)
|
|
122
|
+
]);
|
|
123
|
+
// Wrap results with weights
|
|
124
|
+
const signalResults = [
|
|
125
|
+
{
|
|
126
|
+
signal: 'exact-match',
|
|
127
|
+
type: exactMatch?.type || null,
|
|
128
|
+
confidence: exactMatch?.confidence || 0,
|
|
129
|
+
weight: this.options.weights.exactMatch,
|
|
130
|
+
evidence: exactMatch?.evidence || ''
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
signal: 'embedding',
|
|
134
|
+
type: embeddingMatch?.type || null,
|
|
135
|
+
confidence: embeddingMatch?.confidence || 0,
|
|
136
|
+
weight: this.options.weights.embedding,
|
|
137
|
+
evidence: embeddingMatch?.evidence || ''
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
signal: 'pattern',
|
|
141
|
+
type: patternMatch?.type || null,
|
|
142
|
+
confidence: patternMatch?.confidence || 0,
|
|
143
|
+
weight: this.options.weights.pattern,
|
|
144
|
+
evidence: patternMatch?.evidence || ''
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
signal: 'context',
|
|
148
|
+
type: contextMatch?.type || null,
|
|
149
|
+
confidence: contextMatch?.confidence || 0,
|
|
150
|
+
weight: this.options.weights.context,
|
|
151
|
+
evidence: contextMatch?.evidence || ''
|
|
152
|
+
}
|
|
153
|
+
];
|
|
154
|
+
// Combine using ensemble or best signal
|
|
155
|
+
const result = this.options.enableEnsemble
|
|
156
|
+
? this.combineEnsemble(signalResults)
|
|
157
|
+
: this.selectBestSignal(signalResults);
|
|
158
|
+
// Cache result (including nulls to avoid recomputation)
|
|
159
|
+
this.addToCache(cacheKey, result);
|
|
160
|
+
// Update statistics
|
|
161
|
+
if (result) {
|
|
162
|
+
this.updateStatistics(result);
|
|
163
|
+
}
|
|
164
|
+
return result;
|
|
165
|
+
}
|
|
166
|
+
catch (error) {
|
|
167
|
+
// Graceful degradation
|
|
168
|
+
console.warn(`SmartRelationshipExtractor error for "${subject} → ${object}":`, error);
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Combine signal results using ensemble voting
|
|
174
|
+
*
|
|
175
|
+
* Applies weighted voting with confidence boosting when signals agree
|
|
176
|
+
*/
|
|
177
|
+
combineEnsemble(signalResults) {
|
|
178
|
+
// Filter out null results
|
|
179
|
+
const validResults = signalResults.filter(r => r.type !== null);
|
|
180
|
+
if (validResults.length === 0) {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
// Count votes by type with weighted confidence
|
|
184
|
+
const typeScores = new Map();
|
|
185
|
+
for (const result of validResults) {
|
|
186
|
+
if (!result.type)
|
|
187
|
+
continue;
|
|
188
|
+
const weighted = result.confidence * result.weight;
|
|
189
|
+
const existing = typeScores.get(result.type);
|
|
190
|
+
if (existing) {
|
|
191
|
+
existing.score += weighted;
|
|
192
|
+
existing.signals.push(result);
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
typeScores.set(result.type, { score: weighted, signals: [result] });
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
// Find best type
|
|
199
|
+
let bestType = null;
|
|
200
|
+
let bestScore = 0;
|
|
201
|
+
let bestSignals = [];
|
|
202
|
+
for (const [type, data] of typeScores.entries()) {
|
|
203
|
+
// Apply agreement boost (multiple signals agree)
|
|
204
|
+
let finalScore = data.score;
|
|
205
|
+
if (data.signals.length > 1) {
|
|
206
|
+
const agreementBoost = 0.05 * (data.signals.length - 1);
|
|
207
|
+
finalScore += agreementBoost;
|
|
208
|
+
this.stats.agreementBoosts++;
|
|
209
|
+
}
|
|
210
|
+
if (finalScore > bestScore) {
|
|
211
|
+
bestScore = finalScore;
|
|
212
|
+
bestType = type;
|
|
213
|
+
bestSignals = data.signals;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
// Check minimum confidence threshold
|
|
217
|
+
if (!bestType || bestScore < this.options.minConfidence) {
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
// Track signal contributions
|
|
221
|
+
const usedSignals = bestSignals.length;
|
|
222
|
+
this.stats.averageSignalsUsed =
|
|
223
|
+
(this.stats.averageSignalsUsed * (this.stats.calls - 1) + usedSignals) / this.stats.calls;
|
|
224
|
+
// Build evidence string
|
|
225
|
+
const signalNames = bestSignals.map(s => s.signal).join(' + ');
|
|
226
|
+
const evidence = `Ensemble: ${signalNames} (${bestSignals.length} signal${bestSignals.length > 1 ? 's' : ''} agree)`;
|
|
227
|
+
return {
|
|
228
|
+
type: bestType,
|
|
229
|
+
confidence: Math.min(bestScore, 1.0), // Cap at 1.0
|
|
230
|
+
weight: Math.min(bestScore, 1.0),
|
|
231
|
+
source: 'ensemble',
|
|
232
|
+
evidence,
|
|
233
|
+
metadata: {
|
|
234
|
+
signalResults: bestSignals.map(s => ({
|
|
235
|
+
signal: s.signal,
|
|
236
|
+
type: s.type,
|
|
237
|
+
confidence: s.confidence,
|
|
238
|
+
weight: s.weight
|
|
239
|
+
})),
|
|
240
|
+
agreementBoost: bestSignals.length > 1 ? 0.05 * (bestSignals.length - 1) : 0
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Select best single signal (when ensemble is disabled)
|
|
246
|
+
*/
|
|
247
|
+
selectBestSignal(signalResults) {
|
|
248
|
+
// Filter valid results and sort by weighted confidence
|
|
249
|
+
const validResults = signalResults
|
|
250
|
+
.filter(r => r.type !== null)
|
|
251
|
+
.map(r => ({ ...r, weightedScore: r.confidence * r.weight }))
|
|
252
|
+
.sort((a, b) => b.weightedScore - a.weightedScore);
|
|
253
|
+
if (validResults.length === 0) {
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
const best = validResults[0];
|
|
257
|
+
if (best.weightedScore < this.options.minConfidence) {
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
return {
|
|
261
|
+
type: best.type,
|
|
262
|
+
confidence: best.confidence,
|
|
263
|
+
weight: best.confidence,
|
|
264
|
+
source: best.signal,
|
|
265
|
+
evidence: best.evidence,
|
|
266
|
+
metadata: undefined
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Update statistics based on result
|
|
271
|
+
*/
|
|
272
|
+
updateStatistics(result) {
|
|
273
|
+
// Track win counts
|
|
274
|
+
if (result.source === 'ensemble') {
|
|
275
|
+
this.stats.ensembleWins++;
|
|
276
|
+
}
|
|
277
|
+
else if (result.source === 'exact-match') {
|
|
278
|
+
this.stats.exactMatchWins++;
|
|
279
|
+
}
|
|
280
|
+
else if (result.source === 'embedding') {
|
|
281
|
+
this.stats.embeddingWins++;
|
|
282
|
+
}
|
|
283
|
+
else if (result.source === 'pattern') {
|
|
284
|
+
this.stats.patternWins++;
|
|
285
|
+
}
|
|
286
|
+
else if (result.source === 'context') {
|
|
287
|
+
this.stats.contextWins++;
|
|
288
|
+
}
|
|
289
|
+
// Update rolling average confidence
|
|
290
|
+
this.stats.averageConfidence =
|
|
291
|
+
(this.stats.averageConfidence * (this.stats.calls - 1) + result.confidence) / this.stats.calls;
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Get cache key from parameters
|
|
295
|
+
*/
|
|
296
|
+
getCacheKey(subject, object, context) {
|
|
297
|
+
const normalized = `${subject}:${object}:${context.substring(0, 100)}`.toLowerCase().trim();
|
|
298
|
+
return normalized;
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Get from LRU cache
|
|
302
|
+
*/
|
|
303
|
+
getFromCache(key) {
|
|
304
|
+
if (!this.cache.has(key))
|
|
305
|
+
return undefined;
|
|
306
|
+
const cached = this.cache.get(key);
|
|
307
|
+
// Move to end (most recently used)
|
|
308
|
+
this.cacheOrder = this.cacheOrder.filter(k => k !== key);
|
|
309
|
+
this.cacheOrder.push(key);
|
|
310
|
+
return cached ?? null;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Add to LRU cache with eviction
|
|
314
|
+
*/
|
|
315
|
+
addToCache(key, value) {
|
|
316
|
+
this.cache.set(key, value);
|
|
317
|
+
this.cacheOrder.push(key);
|
|
318
|
+
// Evict oldest if over limit
|
|
319
|
+
if (this.cache.size > this.options.cacheSize) {
|
|
320
|
+
const oldest = this.cacheOrder.shift();
|
|
321
|
+
if (oldest) {
|
|
322
|
+
this.cache.delete(oldest);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get comprehensive statistics
|
|
328
|
+
*/
|
|
329
|
+
getStats() {
|
|
330
|
+
return {
|
|
331
|
+
...this.stats,
|
|
332
|
+
cacheSize: this.cache.size,
|
|
333
|
+
cacheHitRate: this.stats.calls > 0 ? this.stats.cacheHits / this.stats.calls : 0,
|
|
334
|
+
ensembleRate: this.stats.calls > 0 ? this.stats.ensembleWins / this.stats.calls : 0,
|
|
335
|
+
signalStats: {
|
|
336
|
+
exactMatch: this.exactMatchSignal.getStats(),
|
|
337
|
+
embedding: this.embeddingSignal.getStats(),
|
|
338
|
+
pattern: this.patternSignal.getStats(),
|
|
339
|
+
context: this.contextSignal.getStats()
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Reset all statistics
|
|
345
|
+
*/
|
|
346
|
+
resetStats() {
|
|
347
|
+
this.stats = {
|
|
348
|
+
calls: 0,
|
|
349
|
+
cacheHits: 0,
|
|
350
|
+
exactMatchWins: 0,
|
|
351
|
+
embeddingWins: 0,
|
|
352
|
+
patternWins: 0,
|
|
353
|
+
contextWins: 0,
|
|
354
|
+
ensembleWins: 0,
|
|
355
|
+
agreementBoosts: 0,
|
|
356
|
+
averageConfidence: 0,
|
|
357
|
+
averageSignalsUsed: 0
|
|
358
|
+
};
|
|
359
|
+
this.exactMatchSignal.resetStats();
|
|
360
|
+
this.embeddingSignal.resetStats();
|
|
361
|
+
this.patternSignal.resetStats();
|
|
362
|
+
this.contextSignal.resetStats();
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Clear all caches
|
|
366
|
+
*/
|
|
367
|
+
clearCache() {
|
|
368
|
+
this.cache.clear();
|
|
369
|
+
this.cacheOrder = [];
|
|
370
|
+
this.exactMatchSignal.clearCache();
|
|
371
|
+
this.embeddingSignal.clearCache();
|
|
372
|
+
this.patternSignal.clearCache();
|
|
373
|
+
this.contextSignal.clearCache();
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Add relationship to historical data (for embedding signal temporal boosting)
|
|
377
|
+
*/
|
|
378
|
+
addToHistory(context, type, vector) {
|
|
379
|
+
this.embeddingSignal.addToHistory(context, type, vector);
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Clear historical data
|
|
383
|
+
*/
|
|
384
|
+
clearHistory() {
|
|
385
|
+
this.embeddingSignal.clearHistory();
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Create a new SmartRelationshipExtractor instance
|
|
390
|
+
*
|
|
391
|
+
* Convenience factory function
|
|
392
|
+
*/
|
|
393
|
+
export function createSmartRelationshipExtractor(brain, options) {
|
|
394
|
+
return new SmartRelationshipExtractor(brain, options);
|
|
395
|
+
}
|
|
396
|
+
//# sourceMappingURL=SmartRelationshipExtractor.js.map
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* 🧠 BRAINY EMBEDDED TYPE EMBEDDINGS
|
|
3
3
|
*
|
|
4
4
|
* AUTO-GENERATED - DO NOT EDIT
|
|
5
|
-
* Generated: 2025-10-
|
|
5
|
+
* Generated: 2025-10-22T19:25:47.026Z
|
|
6
6
|
* Noun Types: 31
|
|
7
7
|
* Verb Types: 40
|
|
8
8
|
*
|
|
@@ -15,7 +15,7 @@ export const TYPE_METADATA = {
|
|
|
15
15
|
verbTypes: 40,
|
|
16
16
|
totalTypes: 71,
|
|
17
17
|
embeddingDimensions: 384,
|
|
18
|
-
generatedAt: "2025-10-
|
|
18
|
+
generatedAt: "2025-10-22T19:25:47.026Z",
|
|
19
19
|
sizeBytes: {
|
|
20
20
|
embeddings: 109056,
|
|
21
21
|
base64: 145408
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Neural Entity Extractor using Brainy's NounTypes
|
|
3
3
|
* Uses embeddings and similarity matching for accurate type detection
|
|
4
4
|
*
|
|
5
|
+
* v4.2.0: Now powered by SmartExtractor for ultra-neural classification
|
|
5
6
|
* PRODUCTION-READY with caching support
|
|
6
7
|
*/
|
|
7
8
|
import { NounType } from '../types/graphTypes.js';
|
|
@@ -16,6 +17,7 @@ export interface ExtractedEntity {
|
|
|
16
17
|
end: number;
|
|
17
18
|
};
|
|
18
19
|
confidence: number;
|
|
20
|
+
weight?: number;
|
|
19
21
|
vector?: Vector;
|
|
20
22
|
metadata?: any;
|
|
21
23
|
}
|
|
@@ -26,6 +28,7 @@ export declare class NeuralEntityExtractor {
|
|
|
26
28
|
private cache;
|
|
27
29
|
private embeddingCache;
|
|
28
30
|
private embeddingCacheStats;
|
|
31
|
+
private smartExtractor;
|
|
29
32
|
constructor(brain: Brainy | Brainy<any>, cacheOptions?: EntityCacheOptions);
|
|
30
33
|
/**
|
|
31
34
|
* Initialize type embeddings for neural matching
|