@soulcraft/brainy 3.47.1 → 3.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/UniversalImportAPI.d.ts +11 -1
- package/dist/api/UniversalImportAPI.js +93 -24
- package/dist/augmentations/storageAugmentations.js +4 -3
- package/dist/brainy.d.ts +5 -1
- package/dist/brainy.js +20 -0
- package/dist/data/expandedKeywordDictionary.d.ts +22 -0
- package/dist/data/expandedKeywordDictionary.js +171 -0
- package/dist/import/ImportCoordinator.d.ts +5 -1
- package/dist/import/ImportCoordinator.js +13 -1
- package/dist/importers/SmartImportOrchestrator.d.ts +1 -1
- package/dist/importers/SmartImportOrchestrator.js +65 -12
- package/dist/index.d.ts +7 -2
- package/dist/index.js +9 -1
- package/dist/neural/embeddedKeywordEmbeddings.d.ts +29 -0
- package/dist/neural/embeddedKeywordEmbeddings.js +412683 -0
- package/dist/query/semanticTypeInference.d.ts +217 -0
- package/dist/query/semanticTypeInference.js +341 -0
- package/dist/query/typeAwareQueryPlanner.d.ts +152 -0
- package/dist/query/typeAwareQueryPlanner.js +297 -0
- package/dist/query/typeInference.d.ts +158 -0
- package/dist/query/typeInference.js +760 -0
- package/dist/storage/adapters/r2Storage.d.ts +213 -0
- package/dist/storage/adapters/r2Storage.js +876 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +1 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +0 -2
- package/dist/storage/storageFactory.d.ts +2 -1
- package/dist/storage/storageFactory.js +4 -5
- package/dist/triple/TripleIntelligenceSystem.d.ts +4 -0
- package/dist/triple/TripleIntelligenceSystem.js +33 -4
- package/package.json +5 -2
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Type Inference - THE ONE unified function for all type inference
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth using semantic similarity against pre-computed keyword embeddings.
|
|
5
|
+
*
|
|
6
|
+
* Used by:
|
|
7
|
+
* - TypeAwareQueryPlanner (query routing to specific HNSW graphs)
|
|
8
|
+
* - Import pipeline (entity extraction during indexing)
|
|
9
|
+
* - Neural operations (concept extraction)
|
|
10
|
+
* - Public API (developer integrations)
|
|
11
|
+
*
|
|
12
|
+
* Performance: 1-2ms (uncached embedding), 0.2-0.5ms (cached embedding)
|
|
13
|
+
* Accuracy: 95%+ (handles exact matches, synonyms, typos, semantic similarity)
|
|
14
|
+
*/
|
|
15
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
16
|
+
/**
|
|
17
|
+
* Type inference result (unified nouns + verbs)
|
|
18
|
+
*/
|
|
19
|
+
export interface TypeInference {
|
|
20
|
+
type: NounType | VerbType;
|
|
21
|
+
typeCategory: 'noun' | 'verb';
|
|
22
|
+
confidence: number;
|
|
23
|
+
matchedKeywords: string[];
|
|
24
|
+
similarity: number;
|
|
25
|
+
baseConfidence: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Options for semantic type inference
|
|
29
|
+
*/
|
|
30
|
+
export interface SemanticTypeInferenceOptions {
|
|
31
|
+
/** Maximum number of results to return (default: 5) */
|
|
32
|
+
maxResults?: number;
|
|
33
|
+
/** Minimum confidence threshold (default: 0.5) */
|
|
34
|
+
minConfidence?: number;
|
|
35
|
+
/** Filter by specific types (default: all types) */
|
|
36
|
+
filterTypes?: (NounType | VerbType)[];
|
|
37
|
+
/** Filter by type category (default: both) */
|
|
38
|
+
filterCategory?: 'noun' | 'verb';
|
|
39
|
+
/** Use embedding cache (default: true) */
|
|
40
|
+
useCache?: boolean;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Semantic Type Inference - THE ONE unified system
|
|
44
|
+
*
|
|
45
|
+
* Infers entity types using semantic similarity against 700+ pre-computed keyword embeddings.
|
|
46
|
+
*/
|
|
47
|
+
export declare class SemanticTypeInference {
|
|
48
|
+
private keywordEmbeddings;
|
|
49
|
+
private keywordHNSW;
|
|
50
|
+
private embedder;
|
|
51
|
+
private embeddingCache;
|
|
52
|
+
private readonly CACHE_MAX_SIZE;
|
|
53
|
+
private initPromise;
|
|
54
|
+
constructor();
|
|
55
|
+
/**
|
|
56
|
+
* Initialize HNSW index with keyword embeddings
|
|
57
|
+
*/
|
|
58
|
+
private initializeHNSW;
|
|
59
|
+
/**
|
|
60
|
+
* THE ONE FUNCTION - Infer entity types from natural language text
|
|
61
|
+
*
|
|
62
|
+
* Uses semantic similarity to match text against 700+ keyword embeddings.
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* // Query routing
|
|
67
|
+
* const types = await inferTypes("Find cardiologists")
|
|
68
|
+
* // → [{type: Person, confidence: 0.92, keyword: "cardiologist"}]
|
|
69
|
+
*
|
|
70
|
+
* // Entity extraction
|
|
71
|
+
* const entities = await inferTypes("Dr. Sarah Chen")
|
|
72
|
+
* // → [{type: Person, confidence: 0.90, keyword: "doctor"}]
|
|
73
|
+
*
|
|
74
|
+
* // Concept extraction
|
|
75
|
+
* const concepts = await inferTypes("machine learning")
|
|
76
|
+
* // → [{type: Concept, confidence: 0.95, keyword: "machine learning"}]
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
inferTypes(text: string, options?: SemanticTypeInferenceOptions): Promise<TypeInference[]>;
|
|
80
|
+
/**
|
|
81
|
+
* Get embedding from cache or compute
|
|
82
|
+
*/
|
|
83
|
+
private getOrComputeEmbedding;
|
|
84
|
+
/**
|
|
85
|
+
* Compute text embedding using TransformerEmbedding
|
|
86
|
+
*/
|
|
87
|
+
private computeEmbedding;
|
|
88
|
+
/**
|
|
89
|
+
* Get statistics about the inference system
|
|
90
|
+
*/
|
|
91
|
+
getStats(): {
|
|
92
|
+
totalKeywords: number;
|
|
93
|
+
canonicalKeywords: number;
|
|
94
|
+
synonymKeywords: number;
|
|
95
|
+
cacheSize: number;
|
|
96
|
+
cacheMaxSize: number;
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Clear embedding cache
|
|
100
|
+
*/
|
|
101
|
+
clearCache(): void;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Get or create the global SemanticTypeInference instance
|
|
105
|
+
*/
|
|
106
|
+
export declare function getSemanticTypeInference(): SemanticTypeInference;
|
|
107
|
+
/**
|
|
108
|
+
* THE ONE FUNCTION - Public API for semantic type inference
|
|
109
|
+
*
|
|
110
|
+
* Infer entity types from natural language text using semantic similarity.
|
|
111
|
+
*
|
|
112
|
+
* @param text - Natural language text (query, entity name, concept)
|
|
113
|
+
* @param options - Configuration options
|
|
114
|
+
* @returns Array of type inferences sorted by confidence (highest first)
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* import { inferTypes } from '@soulcraft/brainy'
|
|
119
|
+
*
|
|
120
|
+
* // Query routing
|
|
121
|
+
* const types = await inferTypes("Find cardiologists in San Francisco")
|
|
122
|
+
* // → [
|
|
123
|
+
* // {type: "person", confidence: 0.92, keyword: "cardiologist"},
|
|
124
|
+
* // {type: "location", confidence: 0.88, keyword: "san francisco"}
|
|
125
|
+
* // ]
|
|
126
|
+
*
|
|
127
|
+
* // Entity extraction
|
|
128
|
+
* const entities = await inferTypes("Dr. Sarah Chen works at UCSF")
|
|
129
|
+
* // → [
|
|
130
|
+
* // {type: "person", confidence: 0.90, keyword: "doctor"},
|
|
131
|
+
* // {type: "organization", confidence: 0.82, keyword: "ucsf"}
|
|
132
|
+
* // ]
|
|
133
|
+
*
|
|
134
|
+
* // Concept extraction
|
|
135
|
+
* const concepts = await inferTypes("machine learning algorithms")
|
|
136
|
+
* // → [{type: "concept", confidence: 0.95, keyword: "machine learning"}]
|
|
137
|
+
*
|
|
138
|
+
* // Filter by specific types
|
|
139
|
+
* const people = await inferTypes("Find doctors", {
|
|
140
|
+
* filterTypes: [NounType.Person],
|
|
141
|
+
* maxResults: 3
|
|
142
|
+
* })
|
|
143
|
+
* ```
|
|
144
|
+
*/
|
|
145
|
+
export declare function inferTypes(text: string, options?: SemanticTypeInferenceOptions): Promise<TypeInference[]>;
|
|
146
|
+
/**
|
|
147
|
+
* Convenience function - Infer noun types only
|
|
148
|
+
*
|
|
149
|
+
* Filters results to noun types (Person, Organization, Location, etc.)
|
|
150
|
+
*
|
|
151
|
+
* @param text - Natural language text
|
|
152
|
+
* @param options - Configuration options
|
|
153
|
+
* @returns Array of noun type inferences
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* import { inferNouns } from '@soulcraft/brainy'
|
|
158
|
+
*
|
|
159
|
+
* const entities = await inferNouns("Dr. Sarah Chen works at UCSF")
|
|
160
|
+
* // → [
|
|
161
|
+
* // {type: "person", typeCategory: "noun", confidence: 0.90},
|
|
162
|
+
* // {type: "organization", typeCategory: "noun", confidence: 0.82}
|
|
163
|
+
* // ]
|
|
164
|
+
* ```
|
|
165
|
+
*/
|
|
166
|
+
export declare function inferNouns(text: string, options?: Omit<SemanticTypeInferenceOptions, 'filterCategory'>): Promise<TypeInference[]>;
|
|
167
|
+
/**
|
|
168
|
+
* Convenience function - Infer verb types only
|
|
169
|
+
*
|
|
170
|
+
* Filters results to verb types (Creates, Transforms, MemberOf, etc.)
|
|
171
|
+
*
|
|
172
|
+
* @param text - Natural language text
|
|
173
|
+
* @param options - Configuration options
|
|
174
|
+
* @returns Array of verb type inferences
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```typescript
|
|
178
|
+
* import { inferVerbs } from '@soulcraft/brainy'
|
|
179
|
+
*
|
|
180
|
+
* const actions = await inferVerbs("creates and transforms data")
|
|
181
|
+
* // → [
|
|
182
|
+
* // {type: "creates", typeCategory: "verb", confidence: 0.95},
|
|
183
|
+
* // {type: "transforms", typeCategory: "verb", confidence: 0.93}
|
|
184
|
+
* // ]
|
|
185
|
+
* ```
|
|
186
|
+
*/
|
|
187
|
+
export declare function inferVerbs(text: string, options?: Omit<SemanticTypeInferenceOptions, 'filterCategory'>): Promise<TypeInference[]>;
|
|
188
|
+
/**
|
|
189
|
+
* Infer query intent - Returns both nouns AND verbs separately
|
|
190
|
+
*
|
|
191
|
+
* Best for complete query understanding. Returns structured intent with
|
|
192
|
+
* entities (nouns) and actions (verbs) identified separately.
|
|
193
|
+
*
|
|
194
|
+
* @param text - Natural language query
|
|
195
|
+
* @param options - Configuration options
|
|
196
|
+
* @returns Structured intent with separate noun and verb inferences
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```typescript
|
|
200
|
+
* import { inferIntent } from '@soulcraft/brainy'
|
|
201
|
+
*
|
|
202
|
+
* const intent = await inferIntent("Find doctors who work at UCSF")
|
|
203
|
+
* // → {
|
|
204
|
+
* // nouns: [
|
|
205
|
+
* // {type: "person", confidence: 0.92, matchedKeywords: ["doctors"]},
|
|
206
|
+
* // {type: "organization", confidence: 0.85, matchedKeywords: ["ucsf"]}
|
|
207
|
+
* // ],
|
|
208
|
+
* // verbs: [
|
|
209
|
+
* // {type: "memberOf", confidence: 0.88, matchedKeywords: ["work at"]}
|
|
210
|
+
* // ]
|
|
211
|
+
* // }
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
export declare function inferIntent(text: string, options?: Omit<SemanticTypeInferenceOptions, 'filterCategory'>): Promise<{
|
|
215
|
+
nouns: TypeInference[];
|
|
216
|
+
verbs: TypeInference[];
|
|
217
|
+
}>;
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Type Inference - THE ONE unified function for all type inference
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth using semantic similarity against pre-computed keyword embeddings.
|
|
5
|
+
*
|
|
6
|
+
* Used by:
|
|
7
|
+
* - TypeAwareQueryPlanner (query routing to specific HNSW graphs)
|
|
8
|
+
* - Import pipeline (entity extraction during indexing)
|
|
9
|
+
* - Neural operations (concept extraction)
|
|
10
|
+
* - Public API (developer integrations)
|
|
11
|
+
*
|
|
12
|
+
* Performance: 1-2ms (uncached embedding), 0.2-0.5ms (cached embedding)
|
|
13
|
+
* Accuracy: 95%+ (handles exact matches, synonyms, typos, semantic similarity)
|
|
14
|
+
*/
|
|
15
|
+
import { getKeywordEmbeddings } from '../neural/embeddedKeywordEmbeddings.js';
|
|
16
|
+
import { HNSWIndex } from '../hnsw/hnswIndex.js';
|
|
17
|
+
import { TransformerEmbedding } from '../utils/embedding.js';
|
|
18
|
+
import { prodLog } from '../utils/logger.js';
|
|
19
|
+
/**
|
|
20
|
+
* Semantic Type Inference - THE ONE unified system
|
|
21
|
+
*
|
|
22
|
+
* Infers entity types using semantic similarity against 700+ pre-computed keyword embeddings.
|
|
23
|
+
*/
|
|
24
|
+
export class SemanticTypeInference {
|
|
25
|
+
constructor() {
|
|
26
|
+
this.embedder = null;
|
|
27
|
+
this.CACHE_MAX_SIZE = 1000;
|
|
28
|
+
// Load pre-computed keyword embeddings
|
|
29
|
+
this.keywordEmbeddings = getKeywordEmbeddings();
|
|
30
|
+
prodLog.info(`SemanticTypeInference: Loading ${this.keywordEmbeddings.length} keyword embeddings...`);
|
|
31
|
+
// Build HNSW index for O(log n) semantic search
|
|
32
|
+
this.keywordHNSW = new HNSWIndex({
|
|
33
|
+
M: 16, // Number of bi-directional links per node
|
|
34
|
+
efConstruction: 200, // Higher = better quality, slower build
|
|
35
|
+
efSearch: 50, // Search quality parameter
|
|
36
|
+
ml: 1.0 / Math.log(16) // Level generation factor
|
|
37
|
+
});
|
|
38
|
+
// Initialize embedding cache (LRU-style with size limit)
|
|
39
|
+
this.embeddingCache = new Map();
|
|
40
|
+
// Async initialization of HNSW index
|
|
41
|
+
this.initPromise = this.initializeHNSW();
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Initialize HNSW index with keyword embeddings
|
|
45
|
+
*/
|
|
46
|
+
async initializeHNSW() {
|
|
47
|
+
const vectors = this.keywordEmbeddings.map(k => k.embedding);
|
|
48
|
+
// Add all keyword vectors to HNSW
|
|
49
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
50
|
+
await this.keywordHNSW.addItem({
|
|
51
|
+
id: i.toString(),
|
|
52
|
+
vector: vectors[i]
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
prodLog.info(`SemanticTypeInference initialized: ${this.keywordEmbeddings.length} keywords, ` +
|
|
56
|
+
`HNSW index built (M=16, efConstruction=200)`);
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* THE ONE FUNCTION - Infer entity types from natural language text
|
|
60
|
+
*
|
|
61
|
+
* Uses semantic similarity to match text against 700+ keyword embeddings.
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* // Query routing
|
|
66
|
+
* const types = await inferTypes("Find cardiologists")
|
|
67
|
+
* // → [{type: Person, confidence: 0.92, keyword: "cardiologist"}]
|
|
68
|
+
*
|
|
69
|
+
* // Entity extraction
|
|
70
|
+
* const entities = await inferTypes("Dr. Sarah Chen")
|
|
71
|
+
* // → [{type: Person, confidence: 0.90, keyword: "doctor"}]
|
|
72
|
+
*
|
|
73
|
+
* // Concept extraction
|
|
74
|
+
* const concepts = await inferTypes("machine learning")
|
|
75
|
+
* // → [{type: Concept, confidence: 0.95, keyword: "machine learning"}]
|
|
76
|
+
* ```
|
|
77
|
+
*/
|
|
78
|
+
async inferTypes(text, options = {}) {
|
|
79
|
+
const startTime = performance.now();
|
|
80
|
+
// Ensure HNSW index is initialized
|
|
81
|
+
await this.initPromise;
|
|
82
|
+
// Normalize text
|
|
83
|
+
const normalized = text.toLowerCase().trim();
|
|
84
|
+
if (!normalized) {
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
try {
|
|
88
|
+
// Get or compute embedding
|
|
89
|
+
const embedding = options.useCache !== false
|
|
90
|
+
? await this.getOrComputeEmbedding(normalized)
|
|
91
|
+
: await this.computeEmbedding(normalized);
|
|
92
|
+
// Search HNSW index (O(log n) semantic search)
|
|
93
|
+
const k = options.maxResults ?? 5;
|
|
94
|
+
const candidates = await this.keywordHNSW.search(embedding, k * 3); // Fetch extra for filtering
|
|
95
|
+
// Convert to TypeInference results
|
|
96
|
+
const results = [];
|
|
97
|
+
for (const [idStr, distance] of candidates) {
|
|
98
|
+
const id = parseInt(idStr, 10);
|
|
99
|
+
const keyword = this.keywordEmbeddings[id];
|
|
100
|
+
// Apply category filter
|
|
101
|
+
if (options.filterCategory && keyword.typeCategory !== options.filterCategory) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
// Apply type filter
|
|
105
|
+
if (options.filterTypes && !options.filterTypes.includes(keyword.type)) {
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
// Calculate combined confidence (similarity * base confidence)
|
|
109
|
+
const confidence = distance * keyword.confidence;
|
|
110
|
+
// Apply confidence threshold
|
|
111
|
+
if (confidence < (options.minConfidence ?? 0.5)) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
results.push({
|
|
115
|
+
type: keyword.type,
|
|
116
|
+
typeCategory: keyword.typeCategory,
|
|
117
|
+
confidence,
|
|
118
|
+
matchedKeywords: [keyword.keyword],
|
|
119
|
+
similarity: distance,
|
|
120
|
+
baseConfidence: keyword.confidence
|
|
121
|
+
});
|
|
122
|
+
// Stop once we have enough results
|
|
123
|
+
if (results.length >= k)
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
const elapsed = performance.now() - startTime;
|
|
127
|
+
const cacheHit = this.embeddingCache.has(normalized);
|
|
128
|
+
if (elapsed > 10) {
|
|
129
|
+
prodLog.debug(`Semantic type inference: ${results.length} types in ${elapsed.toFixed(2)}ms ` +
|
|
130
|
+
`(${cacheHit ? 'cached' : 'computed'} embedding)`);
|
|
131
|
+
}
|
|
132
|
+
return results;
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
prodLog.error(`Semantic type inference failed: ${error.message}`);
|
|
136
|
+
return [];
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Get embedding from cache or compute
|
|
141
|
+
*/
|
|
142
|
+
async getOrComputeEmbedding(text) {
|
|
143
|
+
// Check cache
|
|
144
|
+
const cached = this.embeddingCache.get(text);
|
|
145
|
+
if (cached) {
|
|
146
|
+
return cached;
|
|
147
|
+
}
|
|
148
|
+
// Compute embedding
|
|
149
|
+
const embedding = await this.computeEmbedding(text);
|
|
150
|
+
// Add to cache (with size limit)
|
|
151
|
+
if (this.embeddingCache.size >= this.CACHE_MAX_SIZE) {
|
|
152
|
+
// Remove oldest entry (first entry in Map)
|
|
153
|
+
const firstKey = this.embeddingCache.keys().next().value;
|
|
154
|
+
if (firstKey !== undefined) {
|
|
155
|
+
this.embeddingCache.delete(firstKey);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
this.embeddingCache.set(text, embedding);
|
|
159
|
+
return embedding;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Compute text embedding using TransformerEmbedding
|
|
163
|
+
*/
|
|
164
|
+
async computeEmbedding(text) {
|
|
165
|
+
// Lazy-load embedder
|
|
166
|
+
if (!this.embedder) {
|
|
167
|
+
this.embedder = new TransformerEmbedding({ verbose: false });
|
|
168
|
+
await this.embedder.init();
|
|
169
|
+
}
|
|
170
|
+
return await this.embedder.embed(text);
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Get statistics about the inference system
|
|
174
|
+
*/
|
|
175
|
+
getStats() {
|
|
176
|
+
const canonical = this.keywordEmbeddings.filter(k => k.isCanonical).length;
|
|
177
|
+
const synonyms = this.keywordEmbeddings.filter(k => !k.isCanonical).length;
|
|
178
|
+
return {
|
|
179
|
+
totalKeywords: this.keywordEmbeddings.length,
|
|
180
|
+
canonicalKeywords: canonical,
|
|
181
|
+
synonymKeywords: synonyms,
|
|
182
|
+
cacheSize: this.embeddingCache.size,
|
|
183
|
+
cacheMaxSize: this.CACHE_MAX_SIZE
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Clear embedding cache
|
|
188
|
+
*/
|
|
189
|
+
clearCache() {
|
|
190
|
+
this.embeddingCache.clear();
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Global singleton instance
|
|
195
|
+
*/
|
|
196
|
+
let globalInstance = null;
|
|
197
|
+
/**
|
|
198
|
+
* Get or create the global SemanticTypeInference instance
|
|
199
|
+
*/
|
|
200
|
+
export function getSemanticTypeInference() {
|
|
201
|
+
if (!globalInstance) {
|
|
202
|
+
globalInstance = new SemanticTypeInference();
|
|
203
|
+
}
|
|
204
|
+
return globalInstance;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* THE ONE FUNCTION - Public API for semantic type inference
|
|
208
|
+
*
|
|
209
|
+
* Infer entity types from natural language text using semantic similarity.
|
|
210
|
+
*
|
|
211
|
+
* @param text - Natural language text (query, entity name, concept)
|
|
212
|
+
* @param options - Configuration options
|
|
213
|
+
* @returns Array of type inferences sorted by confidence (highest first)
|
|
214
|
+
*
|
|
215
|
+
* @example
|
|
216
|
+
* ```typescript
|
|
217
|
+
* import { inferTypes } from '@soulcraft/brainy'
|
|
218
|
+
*
|
|
219
|
+
* // Query routing
|
|
220
|
+
* const types = await inferTypes("Find cardiologists in San Francisco")
|
|
221
|
+
* // → [
|
|
222
|
+
* // {type: "person", confidence: 0.92, keyword: "cardiologist"},
|
|
223
|
+
* // {type: "location", confidence: 0.88, keyword: "san francisco"}
|
|
224
|
+
* // ]
|
|
225
|
+
*
|
|
226
|
+
* // Entity extraction
|
|
227
|
+
* const entities = await inferTypes("Dr. Sarah Chen works at UCSF")
|
|
228
|
+
* // → [
|
|
229
|
+
* // {type: "person", confidence: 0.90, keyword: "doctor"},
|
|
230
|
+
* // {type: "organization", confidence: 0.82, keyword: "ucsf"}
|
|
231
|
+
* // ]
|
|
232
|
+
*
|
|
233
|
+
* // Concept extraction
|
|
234
|
+
* const concepts = await inferTypes("machine learning algorithms")
|
|
235
|
+
* // → [{type: "concept", confidence: 0.95, keyword: "machine learning"}]
|
|
236
|
+
*
|
|
237
|
+
* // Filter by specific types
|
|
238
|
+
* const people = await inferTypes("Find doctors", {
|
|
239
|
+
* filterTypes: [NounType.Person],
|
|
240
|
+
* maxResults: 3
|
|
241
|
+
* })
|
|
242
|
+
* ```
|
|
243
|
+
*/
|
|
244
|
+
export async function inferTypes(text, options) {
|
|
245
|
+
return getSemanticTypeInference().inferTypes(text, options);
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Convenience function - Infer noun types only
|
|
249
|
+
*
|
|
250
|
+
* Filters results to noun types (Person, Organization, Location, etc.)
|
|
251
|
+
*
|
|
252
|
+
* @param text - Natural language text
|
|
253
|
+
* @param options - Configuration options
|
|
254
|
+
* @returns Array of noun type inferences
|
|
255
|
+
*
|
|
256
|
+
* @example
|
|
257
|
+
* ```typescript
|
|
258
|
+
* import { inferNouns } from '@soulcraft/brainy'
|
|
259
|
+
*
|
|
260
|
+
* const entities = await inferNouns("Dr. Sarah Chen works at UCSF")
|
|
261
|
+
* // → [
|
|
262
|
+
* // {type: "person", typeCategory: "noun", confidence: 0.90},
|
|
263
|
+
* // {type: "organization", typeCategory: "noun", confidence: 0.82}
|
|
264
|
+
* // ]
|
|
265
|
+
* ```
|
|
266
|
+
*/
|
|
267
|
+
export async function inferNouns(text, options) {
|
|
268
|
+
return getSemanticTypeInference().inferTypes(text, {
|
|
269
|
+
...options,
|
|
270
|
+
filterCategory: 'noun'
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Convenience function - Infer verb types only
|
|
275
|
+
*
|
|
276
|
+
* Filters results to verb types (Creates, Transforms, MemberOf, etc.)
|
|
277
|
+
*
|
|
278
|
+
* @param text - Natural language text
|
|
279
|
+
* @param options - Configuration options
|
|
280
|
+
* @returns Array of verb type inferences
|
|
281
|
+
*
|
|
282
|
+
* @example
|
|
283
|
+
* ```typescript
|
|
284
|
+
* import { inferVerbs } from '@soulcraft/brainy'
|
|
285
|
+
*
|
|
286
|
+
* const actions = await inferVerbs("creates and transforms data")
|
|
287
|
+
* // → [
|
|
288
|
+
* // {type: "creates", typeCategory: "verb", confidence: 0.95},
|
|
289
|
+
* // {type: "transforms", typeCategory: "verb", confidence: 0.93}
|
|
290
|
+
* // ]
|
|
291
|
+
* ```
|
|
292
|
+
*/
|
|
293
|
+
export async function inferVerbs(text, options) {
|
|
294
|
+
return getSemanticTypeInference().inferTypes(text, {
|
|
295
|
+
...options,
|
|
296
|
+
filterCategory: 'verb'
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Infer query intent - Returns both nouns AND verbs separately
|
|
301
|
+
*
|
|
302
|
+
* Best for complete query understanding. Returns structured intent with
|
|
303
|
+
* entities (nouns) and actions (verbs) identified separately.
|
|
304
|
+
*
|
|
305
|
+
* @param text - Natural language query
|
|
306
|
+
* @param options - Configuration options
|
|
307
|
+
* @returns Structured intent with separate noun and verb inferences
|
|
308
|
+
*
|
|
309
|
+
* @example
|
|
310
|
+
* ```typescript
|
|
311
|
+
* import { inferIntent } from '@soulcraft/brainy'
|
|
312
|
+
*
|
|
313
|
+
* const intent = await inferIntent("Find doctors who work at UCSF")
|
|
314
|
+
* // → {
|
|
315
|
+
* // nouns: [
|
|
316
|
+
* // {type: "person", confidence: 0.92, matchedKeywords: ["doctors"]},
|
|
317
|
+
* // {type: "organization", confidence: 0.85, matchedKeywords: ["ucsf"]}
|
|
318
|
+
* // ],
|
|
319
|
+
* // verbs: [
|
|
320
|
+
* // {type: "memberOf", confidence: 0.88, matchedKeywords: ["work at"]}
|
|
321
|
+
* // ]
|
|
322
|
+
* // }
|
|
323
|
+
* ```
|
|
324
|
+
*/
|
|
325
|
+
export async function inferIntent(text, options) {
|
|
326
|
+
// Run inference once to get all types
|
|
327
|
+
const allTypes = await getSemanticTypeInference().inferTypes(text, {
|
|
328
|
+
...options,
|
|
329
|
+
maxResults: (options?.maxResults ?? 5) * 2 // Get more results since we're splitting
|
|
330
|
+
});
|
|
331
|
+
// Split into nouns and verbs
|
|
332
|
+
const nouns = allTypes.filter(t => t.typeCategory === 'noun');
|
|
333
|
+
const verbs = allTypes.filter(t => t.typeCategory === 'verb');
|
|
334
|
+
// Limit each category to maxResults
|
|
335
|
+
const limit = options?.maxResults ?? 5;
|
|
336
|
+
return {
|
|
337
|
+
nouns: nouns.slice(0, limit),
|
|
338
|
+
verbs: verbs.slice(0, limit)
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
//# sourceMappingURL=semanticTypeInference.js.map
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type-Aware Query Planner - Phase 3: Type-First Query Optimization
|
|
3
|
+
*
|
|
4
|
+
* Generates optimized query execution plans by inferring entity types from
|
|
5
|
+
* natural language queries using semantic similarity and routing to specific
|
|
6
|
+
* TypeAwareHNSWIndex graphs.
|
|
7
|
+
*
|
|
8
|
+
* Performance Impact:
|
|
9
|
+
* - Single-type queries: 31x speedup (search 1/31 graphs)
|
|
10
|
+
* - Multi-type queries: 6-15x speedup (search 2-5/31 graphs)
|
|
11
|
+
* - Overall: 40% latency reduction @ 1B scale
|
|
12
|
+
*
|
|
13
|
+
* Examples:
|
|
14
|
+
* - "Find engineers" → single-type → [Person] → 31x speedup
|
|
15
|
+
* - "People at Tesla" → multi-type → [Person, Organization] → 15.5x speedup
|
|
16
|
+
* - "Everything about AI" → all-types → [all 31 types] → no speedup
|
|
17
|
+
*/
|
|
18
|
+
import { NounType } from '../types/graphTypes.js';
|
|
19
|
+
import { type TypeInference } from './semanticTypeInference.js';
|
|
20
|
+
/**
|
|
21
|
+
* Query routing strategy
|
|
22
|
+
*/
|
|
23
|
+
export type QueryRoutingStrategy = 'single-type' | 'multi-type' | 'all-types';
|
|
24
|
+
/**
|
|
25
|
+
* Optimized query execution plan
|
|
26
|
+
*/
|
|
27
|
+
export interface TypeAwareQueryPlan {
|
|
28
|
+
/**
|
|
29
|
+
* Original natural language query
|
|
30
|
+
*/
|
|
31
|
+
originalQuery: string;
|
|
32
|
+
/**
|
|
33
|
+
* Inferred types with confidence scores
|
|
34
|
+
*/
|
|
35
|
+
inferredTypes: TypeInference[];
|
|
36
|
+
/**
|
|
37
|
+
* Selected routing strategy
|
|
38
|
+
*/
|
|
39
|
+
routing: QueryRoutingStrategy;
|
|
40
|
+
/**
|
|
41
|
+
* Target types to search (1-31 types)
|
|
42
|
+
*/
|
|
43
|
+
targetTypes: NounType[];
|
|
44
|
+
/**
|
|
45
|
+
* Estimated speedup factor (1.0 = no speedup, 31.0 = 31x faster)
|
|
46
|
+
*/
|
|
47
|
+
estimatedSpeedup: number;
|
|
48
|
+
/**
|
|
49
|
+
* Overall confidence in the plan (0.0-1.0)
|
|
50
|
+
*/
|
|
51
|
+
confidence: number;
|
|
52
|
+
/**
|
|
53
|
+
* Reasoning for the routing decision (for debugging/analytics)
|
|
54
|
+
*/
|
|
55
|
+
reasoning: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Configuration for query planner behavior
|
|
59
|
+
*/
|
|
60
|
+
export interface QueryPlannerConfig {
|
|
61
|
+
/**
|
|
62
|
+
* Minimum confidence for single-type routing (default: 0.8)
|
|
63
|
+
*/
|
|
64
|
+
singleTypeThreshold?: number;
|
|
65
|
+
/**
|
|
66
|
+
* Minimum confidence for multi-type routing (default: 0.6)
|
|
67
|
+
*/
|
|
68
|
+
multiTypeThreshold?: number;
|
|
69
|
+
/**
|
|
70
|
+
* Maximum types for multi-type routing (default: 5)
|
|
71
|
+
*/
|
|
72
|
+
maxMultiTypes?: number;
|
|
73
|
+
/**
|
|
74
|
+
* Enable debug logging (default: false)
|
|
75
|
+
*/
|
|
76
|
+
debug?: boolean;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Query pattern statistics for learning
|
|
80
|
+
*/
|
|
81
|
+
interface QueryStats {
|
|
82
|
+
totalQueries: number;
|
|
83
|
+
singleTypeQueries: number;
|
|
84
|
+
multiTypeQueries: number;
|
|
85
|
+
allTypesQueries: number;
|
|
86
|
+
avgConfidence: number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Type-Aware Query Planner
|
|
90
|
+
*
|
|
91
|
+
* Generates optimized query plans using semantic type inference to route queries
|
|
92
|
+
* to specific TypeAwareHNSWIndex graphs for billion-scale performance.
|
|
93
|
+
*/
|
|
94
|
+
export declare class TypeAwareQueryPlanner {
|
|
95
|
+
private config;
|
|
96
|
+
private stats;
|
|
97
|
+
constructor(config?: QueryPlannerConfig);
|
|
98
|
+
/**
|
|
99
|
+
* Plan an optimized query execution strategy using semantic type inference
|
|
100
|
+
*
|
|
101
|
+
* @param query - Natural language query string
|
|
102
|
+
* @returns Promise resolving to optimized query plan with routing strategy
|
|
103
|
+
*/
|
|
104
|
+
planQuery(query: string): Promise<TypeAwareQueryPlan>;
|
|
105
|
+
/**
|
|
106
|
+
* Select routing strategy based on semantic inference results
|
|
107
|
+
*/
|
|
108
|
+
private selectRoutingStrategy;
|
|
109
|
+
/**
|
|
110
|
+
* Create an all-types plan (fallback strategy)
|
|
111
|
+
*/
|
|
112
|
+
private createAllTypesPlan;
|
|
113
|
+
/**
|
|
114
|
+
* Get all noun types (for all-types routing)
|
|
115
|
+
*/
|
|
116
|
+
private getAllNounTypes;
|
|
117
|
+
/**
|
|
118
|
+
* Update query statistics
|
|
119
|
+
*/
|
|
120
|
+
private updateStats;
|
|
121
|
+
/**
|
|
122
|
+
* Get query statistics
|
|
123
|
+
*/
|
|
124
|
+
getStats(): QueryStats;
|
|
125
|
+
/**
|
|
126
|
+
* Get detailed statistics report
|
|
127
|
+
*/
|
|
128
|
+
getStatsReport(): string;
|
|
129
|
+
/**
|
|
130
|
+
* Reset statistics
|
|
131
|
+
*/
|
|
132
|
+
resetStats(): void;
|
|
133
|
+
/**
|
|
134
|
+
* Analyze a batch of queries to understand distribution
|
|
135
|
+
*
|
|
136
|
+
* Useful for optimizing thresholds and understanding usage patterns
|
|
137
|
+
*/
|
|
138
|
+
analyzeQueries(queries: string[]): Promise<{
|
|
139
|
+
distribution: Record<QueryRoutingStrategy, number>;
|
|
140
|
+
avgSpeedup: number;
|
|
141
|
+
recommendations: string[];
|
|
142
|
+
}>;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Get or create the global TypeAwareQueryPlanner instance
|
|
146
|
+
*/
|
|
147
|
+
export declare function getQueryPlanner(config?: QueryPlannerConfig): TypeAwareQueryPlanner;
|
|
148
|
+
/**
|
|
149
|
+
* Convenience function to plan a query
|
|
150
|
+
*/
|
|
151
|
+
export declare function planQuery(query: string, config?: QueryPlannerConfig): Promise<TypeAwareQueryPlan>;
|
|
152
|
+
export {};
|