@soulcraft/brainy 3.47.1 → 3.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ /**
2
+ * Type-Aware Query Planner - Phase 3: Type-First Query Optimization
3
+ *
4
+ * Generates optimized query execution plans by inferring entity types from
5
+ * natural language queries using semantic similarity and routing to specific
6
+ * TypeAwareHNSWIndex graphs.
7
+ *
8
+ * Performance Impact:
9
+ * - Single-type queries: 31x speedup (search 1/31 graphs)
10
+ * - Multi-type queries: 6-15x speedup (search 2-5/31 graphs)
11
+ * - Overall: 40% latency reduction @ 1B scale
12
+ *
13
+ * Examples:
14
+ * - "Find engineers" → single-type → [Person] → 31x speedup
15
+ * - "People at Tesla" → multi-type → [Person, Organization] → 15.5x speedup
16
+ * - "Everything about AI" → all-types → [all 31 types] → no speedup
17
+ */
18
+ import { NounType, NOUN_TYPE_COUNT } from '../types/graphTypes.js';
19
+ import { inferNouns } from './semanticTypeInference.js';
20
+ import { prodLog } from '../utils/logger.js';
21
+ /**
22
+ * Type-Aware Query Planner
23
+ *
24
+ * Generates optimized query plans using semantic type inference to route queries
25
+ * to specific TypeAwareHNSWIndex graphs for billion-scale performance.
26
+ */
27
+ export class TypeAwareQueryPlanner {
28
+ constructor(config) {
29
+ this.config = {
30
+ singleTypeThreshold: config?.singleTypeThreshold ?? 0.8,
31
+ multiTypeThreshold: config?.multiTypeThreshold ?? 0.6,
32
+ maxMultiTypes: config?.maxMultiTypes ?? 5,
33
+ debug: config?.debug ?? false
34
+ };
35
+ this.stats = {
36
+ totalQueries: 0,
37
+ singleTypeQueries: 0,
38
+ multiTypeQueries: 0,
39
+ allTypesQueries: 0,
40
+ avgConfidence: 0
41
+ };
42
+ prodLog.info(`TypeAwareQueryPlanner initialized: thresholds single=${this.config.singleTypeThreshold}, multi=${this.config.multiTypeThreshold}`);
43
+ }
44
+ /**
45
+ * Plan an optimized query execution strategy using semantic type inference
46
+ *
47
+ * @param query - Natural language query string
48
+ * @returns Promise resolving to optimized query plan with routing strategy
49
+ */
50
+ async planQuery(query) {
51
+ const startTime = performance.now();
52
+ if (!query || query.trim().length === 0) {
53
+ return this.createAllTypesPlan(query, 'Empty query');
54
+ }
55
+ // Infer noun types for graph routing (nouns only, verbs not used for routing)
56
+ const inferences = await inferNouns(query, {
57
+ maxResults: this.config.maxMultiTypes,
58
+ minConfidence: this.config.multiTypeThreshold
59
+ });
60
+ if (inferences.length === 0) {
61
+ return this.createAllTypesPlan(query, 'No types inferred from query');
62
+ }
63
+ // Determine routing strategy based on inference confidence
64
+ const plan = this.selectRoutingStrategy(query, inferences);
65
+ // Update statistics
66
+ this.updateStats(plan);
67
+ const elapsed = performance.now() - startTime;
68
+ if (this.config.debug) {
69
+ prodLog.debug(`Query plan: ${plan.routing} with ${plan.targetTypes.length} types (${elapsed.toFixed(2)}ms)`);
70
+ }
71
+ // Performance assertion
72
+ if (elapsed > 10) {
73
+ prodLog.warn(`Query planning slow: ${elapsed.toFixed(2)}ms (target: < 10ms)`);
74
+ }
75
+ return plan;
76
+ }
77
+ /**
78
+ * Select routing strategy based on semantic inference results
79
+ */
80
+ selectRoutingStrategy(query, inferences) {
81
+ const topInference = inferences[0];
82
+ // Strategy 1: Single-type routing (highest confidence)
83
+ if (topInference.confidence >= this.config.singleTypeThreshold &&
84
+ (inferences.length === 1 ||
85
+ inferences[1].confidence < this.config.multiTypeThreshold)) {
86
+ return {
87
+ originalQuery: query,
88
+ inferredTypes: inferences,
89
+ routing: 'single-type',
90
+ targetTypes: [topInference.type],
91
+ estimatedSpeedup: NOUN_TYPE_COUNT / 1,
92
+ confidence: topInference.confidence,
93
+ reasoning: `High confidence (${(topInference.confidence * 100).toFixed(0)}%) for single type: ${topInference.type}`
94
+ };
95
+ }
96
+ // Strategy 2: Multi-type routing (moderate confidence, multiple types)
97
+ if (topInference.confidence >= this.config.multiTypeThreshold) {
98
+ const relevantTypes = inferences
99
+ .filter(inf => inf.confidence >= this.config.multiTypeThreshold)
100
+ .slice(0, this.config.maxMultiTypes)
101
+ .map(inf => inf.type);
102
+ const avgConfidence = relevantTypes.reduce((sum, type) => {
103
+ const inf = inferences.find(i => i.type === type);
104
+ return sum + (inf?.confidence || 0);
105
+ }, 0) / relevantTypes.length;
106
+ return {
107
+ originalQuery: query,
108
+ inferredTypes: inferences,
109
+ routing: 'multi-type',
110
+ targetTypes: relevantTypes,
111
+ estimatedSpeedup: NOUN_TYPE_COUNT / relevantTypes.length,
112
+ confidence: avgConfidence,
113
+ reasoning: `Multiple types detected with moderate confidence (avg ${(avgConfidence * 100).toFixed(0)}%): ${relevantTypes.join(', ')}`
114
+ };
115
+ }
116
+ // Strategy 3: All-types fallback (low confidence)
117
+ return this.createAllTypesPlan(query, `Low confidence (${(topInference.confidence * 100).toFixed(0)}%) - searching all types for safety`);
118
+ }
119
+ /**
120
+ * Create an all-types plan (fallback strategy)
121
+ */
122
+ createAllTypesPlan(query, reasoning) {
123
+ return {
124
+ originalQuery: query,
125
+ inferredTypes: [],
126
+ routing: 'all-types',
127
+ targetTypes: this.getAllNounTypes(),
128
+ estimatedSpeedup: 1.0,
129
+ confidence: 0.0,
130
+ reasoning
131
+ };
132
+ }
133
+ /**
134
+ * Get all noun types (for all-types routing)
135
+ */
136
+ getAllNounTypes() {
137
+ return [
138
+ NounType.Person,
139
+ NounType.Organization,
140
+ NounType.Location,
141
+ NounType.Thing,
142
+ NounType.Concept,
143
+ NounType.Event,
144
+ NounType.Document,
145
+ NounType.Media,
146
+ NounType.File,
147
+ NounType.Message,
148
+ NounType.Content,
149
+ NounType.Collection,
150
+ NounType.Dataset,
151
+ NounType.Product,
152
+ NounType.Service,
153
+ NounType.User,
154
+ NounType.Task,
155
+ NounType.Project,
156
+ NounType.Process,
157
+ NounType.State,
158
+ NounType.Role,
159
+ NounType.Topic,
160
+ NounType.Language,
161
+ NounType.Currency,
162
+ NounType.Measurement,
163
+ NounType.Hypothesis,
164
+ NounType.Experiment,
165
+ NounType.Contract,
166
+ NounType.Regulation,
167
+ NounType.Interface,
168
+ NounType.Resource
169
+ ];
170
+ }
171
+ /**
172
+ * Update query statistics
173
+ */
174
+ updateStats(plan) {
175
+ this.stats.totalQueries++;
176
+ switch (plan.routing) {
177
+ case 'single-type':
178
+ this.stats.singleTypeQueries++;
179
+ break;
180
+ case 'multi-type':
181
+ this.stats.multiTypeQueries++;
182
+ break;
183
+ case 'all-types':
184
+ this.stats.allTypesQueries++;
185
+ break;
186
+ }
187
+ // Update rolling average confidence
188
+ this.stats.avgConfidence =
189
+ (this.stats.avgConfidence * (this.stats.totalQueries - 1) + plan.confidence) /
190
+ this.stats.totalQueries;
191
+ }
192
+ /**
193
+ * Get query statistics
194
+ */
195
+ getStats() {
196
+ return { ...this.stats };
197
+ }
198
+ /**
199
+ * Get detailed statistics report
200
+ */
201
+ getStatsReport() {
202
+ const total = this.stats.totalQueries;
203
+ if (total === 0) {
204
+ return 'No queries processed yet';
205
+ }
206
+ const singlePct = ((this.stats.singleTypeQueries / total) * 100).toFixed(1);
207
+ const multiPct = ((this.stats.multiTypeQueries / total) * 100).toFixed(1);
208
+ const allPct = ((this.stats.allTypesQueries / total) * 100).toFixed(1);
209
+ const avgConf = (this.stats.avgConfidence * 100).toFixed(1);
210
+ // Calculate weighted average speedup
211
+ const avgSpeedup = ((this.stats.singleTypeQueries * 31.0 +
212
+ this.stats.multiTypeQueries * 10.0 +
213
+ this.stats.allTypesQueries * 1.0) /
214
+ total).toFixed(1);
215
+ return `
216
+ Query Statistics (${total} total):
217
+ - Single-type: ${this.stats.singleTypeQueries} (${singlePct}%) - 31x speedup
218
+ - Multi-type: ${this.stats.multiTypeQueries} (${multiPct}%) - ~10x speedup
219
+ - All-types: ${this.stats.allTypesQueries} (${allPct}%) - 1x speedup
220
+ - Avg confidence: ${avgConf}%
221
+ - Avg speedup: ${avgSpeedup}x
222
+ `.trim();
223
+ }
224
+ /**
225
+ * Reset statistics
226
+ */
227
+ resetStats() {
228
+ this.stats = {
229
+ totalQueries: 0,
230
+ singleTypeQueries: 0,
231
+ multiTypeQueries: 0,
232
+ allTypesQueries: 0,
233
+ avgConfidence: 0
234
+ };
235
+ }
236
+ /**
237
+ * Analyze a batch of queries to understand distribution
238
+ *
239
+ * Useful for optimizing thresholds and understanding usage patterns
240
+ */
241
+ async analyzeQueries(queries) {
242
+ const distribution = {
243
+ 'single-type': 0,
244
+ 'multi-type': 0,
245
+ 'all-types': 0
246
+ };
247
+ let totalSpeedup = 0;
248
+ for (const query of queries) {
249
+ const plan = await this.planQuery(query);
250
+ distribution[plan.routing]++;
251
+ totalSpeedup += plan.estimatedSpeedup;
252
+ }
253
+ const avgSpeedup = totalSpeedup / queries.length;
254
+ // Generate recommendations
255
+ const recommendations = [];
256
+ const singlePct = (distribution['single-type'] / queries.length) * 100;
257
+ const multiPct = (distribution['multi-type'] / queries.length) * 100;
258
+ const allPct = (distribution['all-types'] / queries.length) * 100;
259
+ if (allPct > 30) {
260
+ recommendations.push(`High all-types usage (${allPct.toFixed(0)}%) - consider lowering multiTypeThreshold or expanding keyword dictionary`);
261
+ }
262
+ if (singlePct > 70) {
263
+ recommendations.push(`High single-type usage (${singlePct.toFixed(0)}%) - excellent! Type inference is working well`);
264
+ }
265
+ if (avgSpeedup < 5) {
266
+ recommendations.push(`Low average speedup (${avgSpeedup.toFixed(1)}x) - consider adjusting confidence thresholds`);
267
+ }
268
+ else if (avgSpeedup > 15) {
269
+ recommendations.push(`Excellent average speedup (${avgSpeedup.toFixed(1)}x) - type-first routing is highly effective`);
270
+ }
271
+ return {
272
+ distribution,
273
+ avgSpeedup,
274
+ recommendations
275
+ };
276
+ }
277
+ }
278
+ /**
279
+ * Global singleton instance for convenience
280
+ */
281
+ let globalPlanner = null;
282
+ /**
283
+ * Get or create the global TypeAwareQueryPlanner instance
284
+ */
285
+ export function getQueryPlanner(config) {
286
+ if (!globalPlanner) {
287
+ globalPlanner = new TypeAwareQueryPlanner(config);
288
+ }
289
+ return globalPlanner;
290
+ }
291
+ /**
292
+ * Convenience function to plan a query
293
+ */
294
+ export async function planQuery(query, config) {
295
+ return getQueryPlanner(config).planQuery(query);
296
+ }
297
+ //# sourceMappingURL=typeAwareQueryPlanner.js.map
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Type Inference System - Phase 3: Type-First Query Optimization
3
+ *
4
+ * Automatically infers NounTypes from natural language queries using keyword-based
5
+ * heuristics for fast O(1) type detection.
6
+ *
7
+ * Performance Guarantee: < 1ms per query
8
+ * Accuracy Target: > 80%
9
+ *
10
+ * Examples:
11
+ * - "Find engineers in San Francisco" → [Person, Location]
12
+ * - "Show documents about AI" → [Document, Concept]
13
+ * - "List companies in tech sector" → [Organization, Topic]
14
+ */
15
+ import { NounType } from '../types/graphTypes.js';
16
+ /**
17
+ * Result of type inference with confidence score
18
+ */
19
+ export interface TypeInference {
20
+ type: NounType;
21
+ confidence: number;
22
+ matchedKeywords: string[];
23
+ }
24
+ /**
25
+ * Configuration for type inference behavior
26
+ */
27
+ export interface TypeInferenceConfig {
28
+ /**
29
+ * Minimum confidence threshold to include a type (default: 0.4)
30
+ */
31
+ minConfidence?: number;
32
+ /**
33
+ * Maximum number of types to return (default: 5)
34
+ */
35
+ maxTypes?: number;
36
+ /**
37
+ * Enable debug logging (default: false)
38
+ */
39
+ debug?: boolean;
40
+ /**
41
+ * Enable vector similarity fallback for unknown words (default: false)
42
+ * When enabled, queries with low keyword confidence trigger vector-based type inference
43
+ */
44
+ enableVectorFallback?: boolean;
45
+ /**
46
+ * Minimum confidence threshold to trigger vector fallback (default: 0.7)
47
+ * If keyword matching produces confidence below this, vector fallback is used
48
+ */
49
+ fallbackConfidenceThreshold?: number;
50
+ /**
51
+ * Minimum similarity score for vector-based type matches (default: 0.5)
52
+ */
53
+ vectorThreshold?: number;
54
+ }
55
+ /**
56
+ * Type Inference System
57
+ *
58
+ * Uses keyword matching for fast type detection from natural language.
59
+ * Designed for billion-scale performance with minimal latency.
60
+ */
61
+ export declare class TypeInferenceSystem {
62
+ private keywordMap;
63
+ private phraseMap;
64
+ private config;
65
+ private typeEmbeddings;
66
+ private embedder;
67
+ constructor(config?: TypeInferenceConfig);
68
+ /**
69
+ * Infer noun types from a natural language query (synchronous keyword matching only)
70
+ * For hybrid mode with vector fallback, use inferTypesAsync()
71
+ *
72
+ * @param query - Natural language query string
73
+ * @returns Array of type inferences sorted by confidence (highest first)
74
+ */
75
+ inferTypes(query: string): TypeInference[];
76
+ /**
77
+ * Infer noun types with hybrid approach: keyword matching + optional vector fallback
78
+ * This is the async version that supports vector similarity fallback
79
+ *
80
+ * @param query - Natural language query string
81
+ * @returns Promise resolving to array of type inferences
82
+ */
83
+ inferTypesAsync(query: string): Promise<TypeInference[]>;
84
+ /**
85
+ * Internal: Keyword-based type inference (synchronous, fast)
86
+ */
87
+ private inferTypesViaKeywords;
88
+ /**
89
+ * Internal: Hybrid inference with vector fallback (asynchronous)
90
+ */
91
+ private inferTypesWithFallback;
92
+ /**
93
+ * Match multi-word phrases in query
94
+ */
95
+ private matchPhrases;
96
+ /**
97
+ * Match individual keywords in query
98
+ */
99
+ private matchKeywords;
100
+ /**
101
+ * Find closest keyword using edit distance (for typo correction)
102
+ * Allows edit distance 1-2 depending on word length
103
+ */
104
+ private findFuzzyKeywordMatch;
105
+ /**
106
+ * Calculate Levenshtein (edit) distance between two strings
107
+ */
108
+ private levenshteinDistance;
109
+ /**
110
+ * Update type score with new match
111
+ */
112
+ private updateTypeScore;
113
+ /**
114
+ * Load pre-compiled type embeddings from embeddedTypeEmbeddings.ts
115
+ */
116
+ private loadTypeEmbeddings;
117
+ /**
118
+ * Lazy-load TransformerEmbedding model (only when vector fallback is triggered)
119
+ */
120
+ private loadEmbedder;
121
+ /**
122
+ * Calculate cosine similarity between two vectors
123
+ */
124
+ private cosineSimilarity;
125
+ /**
126
+ * Infer types using vector similarity against pre-compiled type embeddings
127
+ */
128
+ private inferTypesViaVectorSimilarity;
129
+ /**
130
+ * Merge keyword-based and vector-based results
131
+ * Prioritizes keyword results (explicit matches) over vector results (semantic matches)
132
+ */
133
+ private mergeResults;
134
+ /**
135
+ * Build keyword dictionary for single-word matching
136
+ */
137
+ private buildKeywordMap;
138
+ /**
139
+ * Build phrase dictionary for multi-word matching
140
+ */
141
+ private buildPhraseMap;
142
+ /**
143
+ * Get statistics about the inference system
144
+ */
145
+ getStats(): {
146
+ keywordCount: number;
147
+ phraseCount: number;
148
+ config: Required<TypeInferenceConfig>;
149
+ };
150
+ }
151
+ /**
152
+ * Get or create the global TypeInferenceSystem instance
153
+ */
154
+ export declare function getTypeInferenceSystem(config?: TypeInferenceConfig): TypeInferenceSystem;
155
+ /**
156
+ * Convenience function to infer types from a query
157
+ */
158
+ export declare function inferTypes(query: string, config?: TypeInferenceConfig): TypeInference[];