@soulcraft/brainy 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +188 -0
- package/LICENSE +2 -2
- package/README.md +201 -596
- package/bin/brainy-interactive.js +564 -0
- package/bin/brainy-ts.js +18 -0
- package/bin/brainy.js +672 -81
- package/dist/augmentationPipeline.d.ts +48 -220
- package/dist/augmentationPipeline.js +60 -508
- package/dist/augmentationRegistry.d.ts +22 -31
- package/dist/augmentationRegistry.js +28 -79
- package/dist/augmentations/apiServerAugmentation.d.ts +108 -0
- package/dist/augmentations/apiServerAugmentation.js +502 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +95 -0
- package/dist/augmentations/batchProcessingAugmentation.js +567 -0
- package/dist/augmentations/brainyAugmentation.d.ts +153 -0
- package/dist/augmentations/brainyAugmentation.js +145 -0
- package/dist/augmentations/cacheAugmentation.d.ts +105 -0
- package/dist/augmentations/cacheAugmentation.js +238 -0
- package/dist/augmentations/conduitAugmentations.d.ts +54 -156
- package/dist/augmentations/conduitAugmentations.js +156 -1082
- package/dist/augmentations/connectionPoolAugmentation.d.ts +62 -0
- package/dist/augmentations/connectionPoolAugmentation.js +316 -0
- package/dist/augmentations/defaultAugmentations.d.ts +53 -0
- package/dist/augmentations/defaultAugmentations.js +88 -0
- package/dist/augmentations/entityRegistryAugmentation.d.ts +126 -0
- package/dist/augmentations/entityRegistryAugmentation.js +386 -0
- package/dist/augmentations/indexAugmentation.d.ts +117 -0
- package/dist/augmentations/indexAugmentation.js +284 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +152 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +554 -0
- package/dist/augmentations/metricsAugmentation.d.ts +202 -0
- package/dist/augmentations/metricsAugmentation.js +291 -0
- package/dist/augmentations/monitoringAugmentation.d.ts +94 -0
- package/dist/augmentations/monitoringAugmentation.js +227 -0
- package/dist/augmentations/neuralImport.d.ts +50 -117
- package/dist/augmentations/neuralImport.js +255 -629
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +52 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.js +162 -0
- package/dist/augmentations/serverSearchAugmentations.d.ts +43 -22
- package/dist/augmentations/serverSearchAugmentations.js +125 -72
- package/dist/augmentations/storageAugmentation.d.ts +54 -0
- package/dist/augmentations/storageAugmentation.js +93 -0
- package/dist/augmentations/storageAugmentations.d.ts +96 -0
- package/dist/augmentations/storageAugmentations.js +182 -0
- package/dist/augmentations/synapseAugmentation.d.ts +156 -0
- package/dist/augmentations/synapseAugmentation.js +312 -0
- package/dist/augmentations/walAugmentation.d.ts +108 -0
- package/dist/augmentations/walAugmentation.js +515 -0
- package/dist/brainyData.d.ts +404 -130
- package/dist/brainyData.js +1331 -853
- package/dist/chat/BrainyChat.d.ts +16 -8
- package/dist/chat/BrainyChat.js +60 -32
- package/dist/chat/ChatCLI.d.ts +1 -1
- package/dist/chat/ChatCLI.js +6 -6
- package/dist/cli/catalog.d.ts +3 -3
- package/dist/cli/catalog.js +116 -70
- package/dist/cli/commands/core.d.ts +61 -0
- package/dist/cli/commands/core.js +348 -0
- package/dist/cli/commands/neural.d.ts +25 -0
- package/dist/cli/commands/neural.js +508 -0
- package/dist/cli/commands/utility.d.ts +37 -0
- package/dist/cli/commands/utility.js +276 -0
- package/dist/cli/index.d.ts +7 -0
- package/dist/cli/index.js +167 -0
- package/dist/cli/interactive.d.ts +164 -0
- package/dist/cli/interactive.js +542 -0
- package/dist/cortex/neuralImport.js +5 -5
- package/dist/critical/model-guardian.js +11 -4
- package/dist/embeddings/lightweight-embedder.d.ts +23 -0
- package/dist/embeddings/lightweight-embedder.js +136 -0
- package/dist/embeddings/universal-memory-manager.d.ts +38 -0
- package/dist/embeddings/universal-memory-manager.js +206 -0
- package/dist/embeddings/worker-embedding.d.ts +7 -0
- package/dist/embeddings/worker-embedding.js +77 -0
- package/dist/embeddings/worker-manager.d.ts +28 -0
- package/dist/embeddings/worker-manager.js +162 -0
- package/dist/examples/basicUsage.js +7 -7
- package/dist/graph/pathfinding.d.ts +78 -0
- package/dist/graph/pathfinding.js +393 -0
- package/dist/hnsw/hnswIndex.d.ts +13 -0
- package/dist/hnsw/hnswIndex.js +35 -0
- package/dist/hnsw/hnswIndexOptimized.d.ts +1 -0
- package/dist/hnsw/hnswIndexOptimized.js +3 -0
- package/dist/index.d.ts +9 -11
- package/dist/index.js +21 -11
- package/dist/indices/fieldIndex.d.ts +76 -0
- package/dist/indices/fieldIndex.js +357 -0
- package/dist/mcp/brainyMCPAdapter.js +3 -2
- package/dist/mcp/mcpAugmentationToolset.js +11 -17
- package/dist/neural/embeddedPatterns.d.ts +41 -0
- package/dist/neural/embeddedPatterns.js +4044 -0
- package/dist/neural/naturalLanguageProcessor.d.ts +94 -0
- package/dist/neural/naturalLanguageProcessor.js +317 -0
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +64 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +151 -0
- package/dist/neural/neuralAPI.d.ts +255 -0
- package/dist/neural/neuralAPI.js +612 -0
- package/dist/neural/patternLibrary.d.ts +101 -0
- package/dist/neural/patternLibrary.js +313 -0
- package/dist/neural/patterns.d.ts +27 -0
- package/dist/neural/patterns.js +68 -0
- package/dist/neural/staticPatternMatcher.d.ts +35 -0
- package/dist/neural/staticPatternMatcher.js +153 -0
- package/dist/scripts/precomputePatternEmbeddings.d.ts +19 -0
- package/dist/scripts/precomputePatternEmbeddings.js +100 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +5 -0
- package/dist/storage/adapters/fileSystemStorage.js +20 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +5 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +16 -0
- package/dist/storage/enhancedClearOperations.d.ts +83 -0
- package/dist/storage/enhancedClearOperations.js +345 -0
- package/dist/storage/storageFactory.js +31 -27
- package/dist/triple/TripleIntelligence.d.ts +134 -0
- package/dist/triple/TripleIntelligence.js +548 -0
- package/dist/types/augmentations.d.ts +45 -344
- package/dist/types/augmentations.js +5 -2
- package/dist/types/brainyDataInterface.d.ts +20 -10
- package/dist/types/graphTypes.d.ts +46 -0
- package/dist/types/graphTypes.js +16 -2
- package/dist/utils/BoundedRegistry.d.ts +29 -0
- package/dist/utils/BoundedRegistry.js +54 -0
- package/dist/utils/embedding.js +20 -3
- package/dist/utils/hybridModelManager.js +10 -5
- package/dist/utils/metadataFilter.d.ts +33 -19
- package/dist/utils/metadataFilter.js +58 -23
- package/dist/utils/metadataIndex.d.ts +37 -6
- package/dist/utils/metadataIndex.js +427 -64
- package/dist/utils/requestDeduplicator.d.ts +10 -0
- package/dist/utils/requestDeduplicator.js +24 -0
- package/dist/utils/unifiedCache.d.ts +103 -0
- package/dist/utils/unifiedCache.js +311 -0
- package/package.json +43 -128
- package/scripts/ensure-models.js +108 -0
- package/scripts/prepare-models.js +387 -0
- package/OFFLINE_MODELS.md +0 -56
- package/dist/intelligence/neuralEngine.d.ts +0 -207
- package/dist/intelligence/neuralEngine.js +0 -706
- package/dist/utils/modelLoader.d.ts +0 -32
- package/dist/utils/modelLoader.js +0 -219
- package/dist/utils/modelManager.d.ts +0 -77
- package/dist/utils/modelManager.js +0 -219
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural API - Unified Semantic Intelligence
|
|
3
|
+
*
|
|
4
|
+
* Best-of-both: Complete functionality + Enterprise performance
|
|
5
|
+
* Combines rich features with O(n) algorithms for millions of items
|
|
6
|
+
*/
|
|
7
|
+
import { cosineDistance } from '../utils/distance.js';
|
|
8
|
+
/**
|
|
9
|
+
* Neural API - Unified best-of-both implementation
|
|
10
|
+
*/
|
|
11
|
+
export class NeuralAPI {
|
|
12
|
+
constructor(brain) {
|
|
13
|
+
this.similarityCache = new Map();
|
|
14
|
+
this.clusterCache = new Map(); // Enhanced for enterprise
|
|
15
|
+
this.hierarchyCache = new Map();
|
|
16
|
+
this.brain = brain;
|
|
17
|
+
}
|
|
18
|
+
// ===== SMART USER-FRIENDLY API =====
|
|
19
|
+
/**
|
|
20
|
+
* Calculate similarity between any two items (smart detection)
|
|
21
|
+
*/
|
|
22
|
+
async similar(a, b, options) {
|
|
23
|
+
// Auto-detect input types
|
|
24
|
+
if (typeof a === 'string' && typeof b === 'string') {
|
|
25
|
+
if (this.isId(a) && this.isId(b)) {
|
|
26
|
+
return this.similarityById(a, b, options);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
return this.similarityByText(a, b, options);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
else if (Array.isArray(a) && Array.isArray(b)) {
|
|
33
|
+
return this.similarityByVector(a, b, options);
|
|
34
|
+
}
|
|
35
|
+
// Handle mixed types
|
|
36
|
+
return this.smartSimilarity(a, b, options);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Find semantic clusters (auto-detects best approach)
|
|
40
|
+
* Now with enterprise performance!
|
|
41
|
+
*/
|
|
42
|
+
async clusters(input) {
|
|
43
|
+
// No input? Use enterprise fast clustering
|
|
44
|
+
if (!input) {
|
|
45
|
+
return this.clusterFast();
|
|
46
|
+
}
|
|
47
|
+
// Array? Cluster these items (use large clustering for big arrays)
|
|
48
|
+
if (Array.isArray(input)) {
|
|
49
|
+
if (input.length > 1000) {
|
|
50
|
+
return this.clusterLarge({ sampleSize: Math.min(input.length, 1000) });
|
|
51
|
+
}
|
|
52
|
+
return this.clusterItems(input);
|
|
53
|
+
}
|
|
54
|
+
// String? Find clusters near this
|
|
55
|
+
if (typeof input === 'string') {
|
|
56
|
+
return this.clustersNear(input);
|
|
57
|
+
}
|
|
58
|
+
// Object? Use as config with enterprise algorithms
|
|
59
|
+
if (typeof input === 'object' && !Array.isArray(input)) {
|
|
60
|
+
return this.clusterWithConfig(input);
|
|
61
|
+
}
|
|
62
|
+
throw new Error('Invalid input for clustering');
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Get semantic hierarchy for an item
|
|
66
|
+
*/
|
|
67
|
+
async hierarchy(id) {
|
|
68
|
+
// Check cache first
|
|
69
|
+
if (this.hierarchyCache.has(id)) {
|
|
70
|
+
return this.hierarchyCache.get(id);
|
|
71
|
+
}
|
|
72
|
+
const item = await this.brain.get(id);
|
|
73
|
+
if (!item) {
|
|
74
|
+
throw new Error(`Item not found: ${id}`);
|
|
75
|
+
}
|
|
76
|
+
// Find semantic relationships
|
|
77
|
+
const hierarchy = await this.buildHierarchy(item);
|
|
78
|
+
// Cache result
|
|
79
|
+
this.hierarchyCache.set(id, hierarchy);
|
|
80
|
+
return hierarchy;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Find semantic neighbors for visualization
|
|
84
|
+
*/
|
|
85
|
+
async neighbors(id, options) {
|
|
86
|
+
const radius = options?.radius ?? 0.3;
|
|
87
|
+
const limit = options?.limit ?? 50;
|
|
88
|
+
// Search for nearby items
|
|
89
|
+
const results = await this.brain.search(id, limit * 2);
|
|
90
|
+
// Filter by semantic radius
|
|
91
|
+
const neighbors = results
|
|
92
|
+
.filter((r) => r.similarity >= (1 - radius))
|
|
93
|
+
.slice(0, limit)
|
|
94
|
+
.map((r) => ({
|
|
95
|
+
id: r.id,
|
|
96
|
+
similarity: r.similarity,
|
|
97
|
+
type: r.metadata?.type,
|
|
98
|
+
connections: r.metadata?.connections?.size || 0
|
|
99
|
+
}));
|
|
100
|
+
const graph = {
|
|
101
|
+
center: id,
|
|
102
|
+
neighbors
|
|
103
|
+
};
|
|
104
|
+
// Add edges if requested
|
|
105
|
+
if (options?.includeEdges) {
|
|
106
|
+
graph.edges = await this.buildEdges(id, neighbors);
|
|
107
|
+
}
|
|
108
|
+
return graph;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Find semantic path between two items
|
|
112
|
+
*/
|
|
113
|
+
async semanticPath(fromId, toId, options) {
|
|
114
|
+
const maxHops = options?.maxHops ?? 5;
|
|
115
|
+
const algorithm = options?.algorithm ?? 'breadth';
|
|
116
|
+
if (algorithm === 'dijkstra') {
|
|
117
|
+
return this.dijkstraPath(fromId, toId, maxHops);
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
return this.breadthFirstPath(fromId, toId, maxHops);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Detect semantic outliers
|
|
125
|
+
*/
|
|
126
|
+
async outliers(threshold = 0.3) {
|
|
127
|
+
// Get all items
|
|
128
|
+
const stats = await this.brain.getStatistics();
|
|
129
|
+
const totalItems = stats.nounCount;
|
|
130
|
+
if (totalItems === 0)
|
|
131
|
+
return [];
|
|
132
|
+
// For large datasets, use sampling
|
|
133
|
+
if (totalItems > 10000) {
|
|
134
|
+
return this.outliersViaSampling(threshold, 1000);
|
|
135
|
+
}
|
|
136
|
+
return this.outliersByDistance(threshold);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Generate visualization data
|
|
140
|
+
*/
|
|
141
|
+
async visualize(options) {
|
|
142
|
+
const maxNodes = options?.maxNodes ?? 100;
|
|
143
|
+
const dimensions = options?.dimensions ?? 2;
|
|
144
|
+
const algorithm = options?.algorithm ?? 'force';
|
|
145
|
+
// Get representative nodes
|
|
146
|
+
const nodes = await this.getVisualizationNodes(maxNodes);
|
|
147
|
+
// Apply layout algorithm
|
|
148
|
+
const positioned = await this.applyLayout(nodes, algorithm, dimensions);
|
|
149
|
+
// Build edges if requested
|
|
150
|
+
const edges = options?.includeEdges !== false ?
|
|
151
|
+
await this.buildVisualizationEdges(positioned) : [];
|
|
152
|
+
// Detect optimal format
|
|
153
|
+
const format = this.detectOptimalFormat(positioned, edges);
|
|
154
|
+
return {
|
|
155
|
+
format,
|
|
156
|
+
nodes: positioned,
|
|
157
|
+
edges,
|
|
158
|
+
layout: {
|
|
159
|
+
dimensions,
|
|
160
|
+
algorithm,
|
|
161
|
+
bounds: this.calculateBounds(positioned, dimensions)
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
// ===== ENTERPRISE PERFORMANCE ALGORITHMS =====
|
|
166
|
+
/**
|
|
167
|
+
* Fast clustering using HNSW levels - O(n) instead of O(n²)
|
|
168
|
+
*/
|
|
169
|
+
async clusterFast(options = {}) {
|
|
170
|
+
const cacheKey = `hierarchical-${options.level}-${options.maxClusters}`;
|
|
171
|
+
if (this.clusterCache.has(cacheKey)) {
|
|
172
|
+
return this.clusterCache.get(cacheKey);
|
|
173
|
+
}
|
|
174
|
+
// Use HNSW's natural hierarchy - auto-select optimal level
|
|
175
|
+
const level = options.level ?? await this.getOptimalClusteringLevel();
|
|
176
|
+
const maxClusters = options.maxClusters ?? 100;
|
|
177
|
+
// Get representative nodes from HNSW level
|
|
178
|
+
const representatives = await this.getHNSWLevelNodes(level);
|
|
179
|
+
// Each representative is a natural cluster center
|
|
180
|
+
const clusters = [];
|
|
181
|
+
for (const rep of representatives.slice(0, maxClusters)) {
|
|
182
|
+
const members = await this.findClusterMembers(rep, level - 1);
|
|
183
|
+
clusters.push({
|
|
184
|
+
id: `cluster-${rep.id}`,
|
|
185
|
+
centroid: rep.vector,
|
|
186
|
+
center: rep,
|
|
187
|
+
members: members.map(m => m.id),
|
|
188
|
+
size: members.length,
|
|
189
|
+
level,
|
|
190
|
+
confidence: 0.8 + (members.length / 100) * 0.2 // Size-based confidence
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
this.clusterCache.set(cacheKey, clusters);
|
|
194
|
+
return clusters;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Large-scale clustering for massive datasets (millions of items)
|
|
198
|
+
*/
|
|
199
|
+
async clusterLarge(options = {}) {
|
|
200
|
+
const sampleSize = options.sampleSize ?? 1000;
|
|
201
|
+
const strategy = options.strategy ?? 'diverse';
|
|
202
|
+
// Get representative sample
|
|
203
|
+
const sample = await this.getSample(sampleSize, strategy);
|
|
204
|
+
// Cluster the sample (fast on small set)
|
|
205
|
+
const sampleClusters = await this.performFastClustering(sample);
|
|
206
|
+
// Project clusters to full dataset
|
|
207
|
+
return this.projectClustersToFullDataset(sampleClusters);
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Streaming clustering for progressive refinement
|
|
211
|
+
*/
|
|
212
|
+
async *clusterStream(options = {}) {
|
|
213
|
+
const batchSize = options.batchSize ?? 1000;
|
|
214
|
+
const maxBatches = options.maxBatches ?? Infinity;
|
|
215
|
+
let offset = 0;
|
|
216
|
+
let batchCount = 0;
|
|
217
|
+
let globalClusters = [];
|
|
218
|
+
while (batchCount < maxBatches) {
|
|
219
|
+
// Get next batch
|
|
220
|
+
const batch = await this.getBatch(offset, batchSize);
|
|
221
|
+
if (batch.length === 0)
|
|
222
|
+
break;
|
|
223
|
+
// Cluster this batch
|
|
224
|
+
const batchClusters = await this.performFastClustering(batch);
|
|
225
|
+
// Merge with global clusters
|
|
226
|
+
globalClusters = await this.mergeClusters(globalClusters, batchClusters);
|
|
227
|
+
// Yield current state
|
|
228
|
+
yield globalClusters;
|
|
229
|
+
offset += batchSize;
|
|
230
|
+
batchCount++;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Level-of-detail for massive visualization
|
|
235
|
+
*/
|
|
236
|
+
async getLOD(zoomLevel, viewport) {
|
|
237
|
+
// Define LOD levels based on zoom
|
|
238
|
+
const lodLevels = [
|
|
239
|
+
{ zoom: 0, maxNodes: 50, clusterLevel: 3 },
|
|
240
|
+
{ zoom: 1, maxNodes: 200, clusterLevel: 2 },
|
|
241
|
+
{ zoom: 2, maxNodes: 1000, clusterLevel: 1 },
|
|
242
|
+
{ zoom: 3, maxNodes: 5000, clusterLevel: 0 }
|
|
243
|
+
];
|
|
244
|
+
const lod = lodLevels.find(l => zoomLevel <= l.zoom) || lodLevels[lodLevels.length - 1];
|
|
245
|
+
if (viewport) {
|
|
246
|
+
return this.getViewportLOD(viewport, lod);
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
return this.getGlobalLOD(lod);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// ===== IMPLEMENTATION HELPERS =====
|
|
253
|
+
isId(str) {
|
|
254
|
+
// Check if string looks like an ID (UUID pattern, etc.)
|
|
255
|
+
return (str.length === 36 && str.includes('-')) || !!str.match(/^[a-f0-9]{24}$/);
|
|
256
|
+
}
|
|
257
|
+
async similarityById(idA, idB, options) {
|
|
258
|
+
const cacheKey = `${idA}-${idB}`;
|
|
259
|
+
if (this.similarityCache.has(cacheKey)) {
|
|
260
|
+
return this.similarityCache.get(cacheKey);
|
|
261
|
+
}
|
|
262
|
+
// Get items
|
|
263
|
+
const [itemA, itemB] = await Promise.all([
|
|
264
|
+
this.brain.get(idA),
|
|
265
|
+
this.brain.get(idB)
|
|
266
|
+
]);
|
|
267
|
+
if (!itemA || !itemB) {
|
|
268
|
+
throw new Error('One or both items not found');
|
|
269
|
+
}
|
|
270
|
+
// Calculate similarity
|
|
271
|
+
const score = cosineDistance(itemA.vector, itemB.vector);
|
|
272
|
+
this.similarityCache.set(cacheKey, score);
|
|
273
|
+
if (options?.explain) {
|
|
274
|
+
return {
|
|
275
|
+
score,
|
|
276
|
+
method: 'cosine',
|
|
277
|
+
confidence: 0.9,
|
|
278
|
+
explanation: `Semantic similarity between ${idA} and ${idB}`
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
return score;
|
|
282
|
+
}
|
|
283
|
+
async similarityByText(textA, textB, options) {
|
|
284
|
+
// Generate embeddings
|
|
285
|
+
const [vectorA, vectorB] = await Promise.all([
|
|
286
|
+
this.brain.embed(textA),
|
|
287
|
+
this.brain.embed(textB)
|
|
288
|
+
]);
|
|
289
|
+
return this.similarityByVector(vectorA, vectorB, options);
|
|
290
|
+
}
|
|
291
|
+
async similarityByVector(vectorA, vectorB, options) {
|
|
292
|
+
const score = cosineDistance(vectorA, vectorB);
|
|
293
|
+
if (options?.explain) {
|
|
294
|
+
return {
|
|
295
|
+
score,
|
|
296
|
+
method: options.method || 'cosine',
|
|
297
|
+
confidence: 0.95,
|
|
298
|
+
explanation: 'Direct vector similarity calculation'
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
return score;
|
|
302
|
+
}
|
|
303
|
+
async smartSimilarity(a, b, options) {
|
|
304
|
+
// Convert both to vectors and compare
|
|
305
|
+
const vectorA = await this.toVector(a);
|
|
306
|
+
const vectorB = await this.toVector(b);
|
|
307
|
+
return this.similarityByVector(vectorA, vectorB, options);
|
|
308
|
+
}
|
|
309
|
+
async toVector(item) {
|
|
310
|
+
if (Array.isArray(item))
|
|
311
|
+
return item;
|
|
312
|
+
if (typeof item === 'string') {
|
|
313
|
+
if (this.isId(item)) {
|
|
314
|
+
const found = await this.brain.get(item);
|
|
315
|
+
return found?.vector || await this.brain.embed(item);
|
|
316
|
+
}
|
|
317
|
+
return await this.brain.embed(item);
|
|
318
|
+
}
|
|
319
|
+
if (typeof item === 'object' && item.vector) {
|
|
320
|
+
return item.vector;
|
|
321
|
+
}
|
|
322
|
+
// Convert object to string and embed
|
|
323
|
+
return await this.brain.embed(JSON.stringify(item));
|
|
324
|
+
}
|
|
325
|
+
// Enterprise clustering implementations
|
|
326
|
+
async getOptimalClusteringLevel() {
|
|
327
|
+
// Analyze dataset size and return optimal HNSW level
|
|
328
|
+
const stats = await this.brain.getStatistics();
|
|
329
|
+
const itemCount = stats.nounCount;
|
|
330
|
+
if (itemCount < 1000)
|
|
331
|
+
return 0;
|
|
332
|
+
if (itemCount < 10000)
|
|
333
|
+
return 1;
|
|
334
|
+
if (itemCount < 100000)
|
|
335
|
+
return 2;
|
|
336
|
+
return 3;
|
|
337
|
+
}
|
|
338
|
+
async getHNSWLevelNodes(level) {
|
|
339
|
+
// Get nodes from specific HNSW level
|
|
340
|
+
// For now, use search to get a representative sample
|
|
341
|
+
const stats = await this.brain.getStatistics();
|
|
342
|
+
const sampleSize = Math.min(100, Math.floor(stats.nounCount / (level + 1)));
|
|
343
|
+
// Use search with a general query to get representative items
|
|
344
|
+
const queryVector = await this.brain.embed('data information content');
|
|
345
|
+
const allItems = await this.brain.search(queryVector, sampleSize * 2);
|
|
346
|
+
return allItems.slice(0, sampleSize);
|
|
347
|
+
}
|
|
348
|
+
async findClusterMembers(center, level) {
|
|
349
|
+
// Find all items that belong to this cluster
|
|
350
|
+
const results = await this.brain.search(center.vector, 50);
|
|
351
|
+
return results.filter((r) => r.similarity > 0.7);
|
|
352
|
+
}
|
|
353
|
+
async getSample(size, strategy) {
|
|
354
|
+
// Use search to get a sample of items
|
|
355
|
+
const stats = await this.brain.getStatistics();
|
|
356
|
+
const maxSize = Math.min(size * 3, stats.nounCount); // Get more than needed for sampling
|
|
357
|
+
const queryVector = await this.brain.embed('sample data content');
|
|
358
|
+
const allItems = await this.brain.search(queryVector, maxSize);
|
|
359
|
+
switch (strategy) {
|
|
360
|
+
case 'random':
|
|
361
|
+
return this.shuffleArray(allItems).slice(0, size);
|
|
362
|
+
case 'diverse':
|
|
363
|
+
return this.getDiverseSample(allItems, size);
|
|
364
|
+
case 'recent':
|
|
365
|
+
return allItems.slice(-size);
|
|
366
|
+
default:
|
|
367
|
+
return allItems.slice(0, size);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
shuffleArray(array) {
|
|
371
|
+
const shuffled = [...array];
|
|
372
|
+
for (let i = shuffled.length - 1; i > 0; i--) {
|
|
373
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
374
|
+
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
375
|
+
}
|
|
376
|
+
return shuffled;
|
|
377
|
+
}
|
|
378
|
+
async getDiverseSample(items, size) {
|
|
379
|
+
// Select diverse items using maximum distance sampling
|
|
380
|
+
if (items.length <= size)
|
|
381
|
+
return items;
|
|
382
|
+
const sample = [items[0]]; // Start with first item
|
|
383
|
+
for (let i = 1; i < size; i++) {
|
|
384
|
+
let maxMinDistance = -1;
|
|
385
|
+
let bestItem = null;
|
|
386
|
+
for (const candidate of items) {
|
|
387
|
+
if (sample.includes(candidate))
|
|
388
|
+
continue;
|
|
389
|
+
// Find minimum distance to existing sample
|
|
390
|
+
let minDistance = Infinity;
|
|
391
|
+
for (const selected of sample) {
|
|
392
|
+
const distance = cosineDistance(candidate.vector, selected.vector);
|
|
393
|
+
minDistance = Math.min(minDistance, distance);
|
|
394
|
+
}
|
|
395
|
+
// Select item with maximum minimum distance
|
|
396
|
+
if (minDistance > maxMinDistance) {
|
|
397
|
+
maxMinDistance = minDistance;
|
|
398
|
+
bestItem = candidate;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
if (bestItem)
|
|
402
|
+
sample.push(bestItem);
|
|
403
|
+
}
|
|
404
|
+
return sample;
|
|
405
|
+
}
|
|
406
|
+
async performFastClustering(items) {
|
|
407
|
+
// Simple k-means clustering for the sample
|
|
408
|
+
const k = Math.min(10, Math.floor(items.length / 3));
|
|
409
|
+
if (k <= 1) {
|
|
410
|
+
return [{
|
|
411
|
+
id: 'cluster-0',
|
|
412
|
+
centroid: items[0]?.vector || [],
|
|
413
|
+
members: items.map(i => i.id),
|
|
414
|
+
confidence: 1.0
|
|
415
|
+
}];
|
|
416
|
+
}
|
|
417
|
+
// Initialize centroids randomly
|
|
418
|
+
const centroids = items.slice(0, k).map(item => item.vector);
|
|
419
|
+
// Run k-means iterations (simplified)
|
|
420
|
+
for (let iter = 0; iter < 10; iter++) {
|
|
421
|
+
const clusters = Array(k).fill(null).map(() => []);
|
|
422
|
+
// Assign items to nearest centroid
|
|
423
|
+
for (const item of items) {
|
|
424
|
+
let bestCluster = 0;
|
|
425
|
+
let bestDistance = Infinity;
|
|
426
|
+
for (let c = 0; c < k; c++) {
|
|
427
|
+
const distance = cosineDistance(item.vector, centroids[c]);
|
|
428
|
+
if (distance < bestDistance) {
|
|
429
|
+
bestDistance = distance;
|
|
430
|
+
bestCluster = c;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
clusters[bestCluster].push(item);
|
|
434
|
+
}
|
|
435
|
+
// Update centroids
|
|
436
|
+
for (let c = 0; c < k; c++) {
|
|
437
|
+
if (clusters[c].length > 0) {
|
|
438
|
+
const newCentroid = this.calculateCentroid(clusters[c]);
|
|
439
|
+
centroids[c] = newCentroid;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
// Convert to SemanticCluster format
|
|
444
|
+
const result = [];
|
|
445
|
+
for (let c = 0; c < k; c++) {
|
|
446
|
+
const members = items.filter(item => {
|
|
447
|
+
let bestCluster = 0;
|
|
448
|
+
let bestDistance = Infinity;
|
|
449
|
+
for (let cc = 0; cc < k; cc++) {
|
|
450
|
+
const distance = cosineDistance(item.vector, centroids[cc]);
|
|
451
|
+
if (distance < bestDistance) {
|
|
452
|
+
bestDistance = distance;
|
|
453
|
+
bestCluster = cc;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
return bestCluster === c;
|
|
457
|
+
});
|
|
458
|
+
if (members.length > 0) {
|
|
459
|
+
result.push({
|
|
460
|
+
id: `cluster-${c}`,
|
|
461
|
+
centroid: centroids[c],
|
|
462
|
+
members: members.map(m => m.id),
|
|
463
|
+
confidence: Math.min(0.9, members.length / items.length * 2)
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
return result;
|
|
468
|
+
}
|
|
469
|
+
calculateCentroid(items) {
|
|
470
|
+
if (items.length === 0)
|
|
471
|
+
return [];
|
|
472
|
+
const dimensions = items[0].vector.length;
|
|
473
|
+
const centroid = new Array(dimensions).fill(0);
|
|
474
|
+
for (const item of items) {
|
|
475
|
+
for (let d = 0; d < dimensions; d++) {
|
|
476
|
+
centroid[d] += item.vector[d];
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
for (let d = 0; d < dimensions; d++) {
|
|
480
|
+
centroid[d] /= items.length;
|
|
481
|
+
}
|
|
482
|
+
return centroid;
|
|
483
|
+
}
|
|
484
|
+
async projectClustersToFullDataset(sampleClusters) {
|
|
485
|
+
// Project sample clusters to full dataset
|
|
486
|
+
const result = [];
|
|
487
|
+
for (const cluster of sampleClusters) {
|
|
488
|
+
// Find all items similar to this cluster's centroid
|
|
489
|
+
const similar = await this.brain.search(cluster.centroid, 1000);
|
|
490
|
+
const members = similar
|
|
491
|
+
.filter((s) => s.similarity > 0.6)
|
|
492
|
+
.map((s) => s.id);
|
|
493
|
+
result.push({
|
|
494
|
+
...cluster,
|
|
495
|
+
members,
|
|
496
|
+
size: members.length
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
return result;
|
|
500
|
+
}
|
|
501
|
+
async mergeClusters(globalClusters, batchClusters) {
|
|
502
|
+
// Simple merge strategy - combine similar clusters
|
|
503
|
+
const result = [...globalClusters];
|
|
504
|
+
for (const batchCluster of batchClusters) {
|
|
505
|
+
let merged = false;
|
|
506
|
+
for (let i = 0; i < result.length; i++) {
|
|
507
|
+
const similarity = cosineDistance(result[i].centroid, batchCluster.centroid);
|
|
508
|
+
if (similarity > 0.8) {
|
|
509
|
+
// Merge clusters
|
|
510
|
+
const newMembers = [...new Set([...result[i].members, ...batchCluster.members])];
|
|
511
|
+
result[i] = {
|
|
512
|
+
...result[i],
|
|
513
|
+
members: newMembers,
|
|
514
|
+
size: newMembers.length,
|
|
515
|
+
centroid: this.averageVectors(result[i].centroid, batchCluster.centroid)
|
|
516
|
+
};
|
|
517
|
+
merged = true;
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
if (!merged) {
|
|
522
|
+
result.push(batchCluster);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
return result;
|
|
526
|
+
}
|
|
527
|
+
averageVectors(v1, v2) {
|
|
528
|
+
const result = new Array(v1.length);
|
|
529
|
+
for (let i = 0; i < v1.length; i++) {
|
|
530
|
+
result[i] = (v1[i] + v2[i]) / 2;
|
|
531
|
+
}
|
|
532
|
+
return result;
|
|
533
|
+
}
|
|
534
|
+
async getBatch(offset, size) {
|
|
535
|
+
// Get batch of items for streaming using search with offset
|
|
536
|
+
const queryVector = await this.brain.embed('batch data content');
|
|
537
|
+
const items = await this.brain.search(queryVector, size, { offset });
|
|
538
|
+
return items;
|
|
539
|
+
}
|
|
540
|
+
// Additional methods needed for full compatibility...
|
|
541
|
+
async clusterAll() {
|
|
542
|
+
return this.clusterFast();
|
|
543
|
+
}
|
|
544
|
+
async clusterItems(items) {
|
|
545
|
+
return this.performFastClustering(items);
|
|
546
|
+
}
|
|
547
|
+
async clustersNear(id) {
|
|
548
|
+
const neighbors = await this.neighbors(id, { limit: 100 });
|
|
549
|
+
return this.performFastClustering(neighbors.neighbors);
|
|
550
|
+
}
|
|
551
|
+
async clusterWithConfig(config) {
|
|
552
|
+
switch (config.algorithm) {
|
|
553
|
+
case 'hierarchical':
|
|
554
|
+
return this.clusterFast(config);
|
|
555
|
+
case 'sample':
|
|
556
|
+
return this.clusterLarge(config);
|
|
557
|
+
case 'stream':
|
|
558
|
+
const generator = this.clusterStream(config);
|
|
559
|
+
const results = [];
|
|
560
|
+
for await (const batch of generator) {
|
|
561
|
+
results.push(...batch);
|
|
562
|
+
}
|
|
563
|
+
return results;
|
|
564
|
+
default:
|
|
565
|
+
return this.clusterFast(config);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
// Placeholder implementations for remaining methods
|
|
569
|
+
async buildHierarchy(item) {
|
|
570
|
+
// Implementation for hierarchy building
|
|
571
|
+
return {
|
|
572
|
+
self: { id: item.id, vector: item.vector }
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
async buildEdges(centerId, neighbors) {
|
|
576
|
+
return [];
|
|
577
|
+
}
|
|
578
|
+
async dijkstraPath(from, to, maxHops) {
|
|
579
|
+
return [];
|
|
580
|
+
}
|
|
581
|
+
async breadthFirstPath(from, to, maxHops) {
|
|
582
|
+
return [];
|
|
583
|
+
}
|
|
584
|
+
async outliersViaSampling(threshold, sampleSize) {
|
|
585
|
+
return [];
|
|
586
|
+
}
|
|
587
|
+
async outliersByDistance(threshold) {
|
|
588
|
+
return [];
|
|
589
|
+
}
|
|
590
|
+
async getVisualizationNodes(maxNodes) {
|
|
591
|
+
return [];
|
|
592
|
+
}
|
|
593
|
+
async applyLayout(nodes, algorithm, dimensions) {
|
|
594
|
+
return nodes;
|
|
595
|
+
}
|
|
596
|
+
async buildVisualizationEdges(nodes) {
|
|
597
|
+
return [];
|
|
598
|
+
}
|
|
599
|
+
detectOptimalFormat(nodes, edges) {
|
|
600
|
+
return 'force-directed';
|
|
601
|
+
}
|
|
602
|
+
calculateBounds(nodes, dimensions) {
|
|
603
|
+
return { width: 100, height: 100 };
|
|
604
|
+
}
|
|
605
|
+
async getViewportLOD(viewport, lod) {
|
|
606
|
+
return {};
|
|
607
|
+
}
|
|
608
|
+
async getGlobalLOD(lod) {
|
|
609
|
+
return {};
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
//# sourceMappingURL=neuralAPI.js.map
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🧠 Pattern Library for Natural Language Processing
|
|
3
|
+
* Manages pre-computed pattern embeddings and smart matching
|
|
4
|
+
*
|
|
5
|
+
* Uses Brainy's own features for self-leveraging intelligence:
|
|
6
|
+
* - Embeddings for semantic similarity
|
|
7
|
+
* - Pattern caching for performance
|
|
8
|
+
* - Progressive learning from usage
|
|
9
|
+
*/
|
|
10
|
+
import { Vector } from '../coreTypes.js';
|
|
11
|
+
import { BrainyData } from '../brainyData.js';
|
|
12
|
+
export interface Pattern {
|
|
13
|
+
id: string;
|
|
14
|
+
category: string;
|
|
15
|
+
examples: string[];
|
|
16
|
+
pattern: string;
|
|
17
|
+
template: any;
|
|
18
|
+
confidence: number;
|
|
19
|
+
embedding?: Vector;
|
|
20
|
+
domain?: string;
|
|
21
|
+
frequency?: number | string;
|
|
22
|
+
}
|
|
23
|
+
export interface SlotExtraction {
|
|
24
|
+
slots: Record<string, any>;
|
|
25
|
+
confidence: number;
|
|
26
|
+
}
|
|
27
|
+
export declare class PatternLibrary {
|
|
28
|
+
private patterns;
|
|
29
|
+
private patternEmbeddings;
|
|
30
|
+
private brain;
|
|
31
|
+
private embeddingCache;
|
|
32
|
+
private successMetrics;
|
|
33
|
+
constructor(brain: BrainyData);
|
|
34
|
+
/**
|
|
35
|
+
* Initialize pattern library with pre-computed embeddings
|
|
36
|
+
*/
|
|
37
|
+
init(): Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Pre-compute embeddings for all patterns for fast matching
|
|
40
|
+
*/
|
|
41
|
+
private precomputeEmbeddings;
|
|
42
|
+
/**
|
|
43
|
+
* Get embedding with caching
|
|
44
|
+
*/
|
|
45
|
+
private getEmbedding;
|
|
46
|
+
/**
|
|
47
|
+
* Find best matching patterns for a query
|
|
48
|
+
*/
|
|
49
|
+
findBestPatterns(queryEmbedding: Vector, k?: number): Promise<Array<{
|
|
50
|
+
pattern: Pattern;
|
|
51
|
+
similarity: number;
|
|
52
|
+
}>>;
|
|
53
|
+
/**
|
|
54
|
+
* Extract slots from query based on pattern
|
|
55
|
+
*/
|
|
56
|
+
extractSlots(query: string, pattern: Pattern): SlotExtraction;
|
|
57
|
+
/**
|
|
58
|
+
* Fill template with extracted slots
|
|
59
|
+
*/
|
|
60
|
+
fillTemplate(template: any, slots: Record<string, any>): any;
|
|
61
|
+
/**
|
|
62
|
+
* Update pattern success metrics based on usage
|
|
63
|
+
*/
|
|
64
|
+
updateSuccessMetric(patternId: string, success: boolean): void;
|
|
65
|
+
/**
|
|
66
|
+
* Learn new pattern from successful query
|
|
67
|
+
*/
|
|
68
|
+
learnPattern(query: string, result: any): Promise<void>;
|
|
69
|
+
/**
|
|
70
|
+
* Helper: Average multiple vectors
|
|
71
|
+
*/
|
|
72
|
+
private averageVectors;
|
|
73
|
+
/**
|
|
74
|
+
* Helper: Calculate cosine similarity
|
|
75
|
+
*/
|
|
76
|
+
private cosineSimilarity;
|
|
77
|
+
/**
|
|
78
|
+
* Helper: Simple tokenization
|
|
79
|
+
*/
|
|
80
|
+
private tokenize;
|
|
81
|
+
/**
|
|
82
|
+
* Helper: Post-process extracted slots
|
|
83
|
+
*/
|
|
84
|
+
private postProcessSlots;
|
|
85
|
+
/**
|
|
86
|
+
* Helper: Generate regex pattern from query
|
|
87
|
+
*/
|
|
88
|
+
private generateRegexFromQuery;
|
|
89
|
+
/**
|
|
90
|
+
* Get pattern statistics for monitoring
|
|
91
|
+
*/
|
|
92
|
+
getStatistics(): {
|
|
93
|
+
totalPatterns: number;
|
|
94
|
+
categories: Record<string, number>;
|
|
95
|
+
averageConfidence: number;
|
|
96
|
+
topPatterns: Array<{
|
|
97
|
+
id: string;
|
|
98
|
+
success: number;
|
|
99
|
+
}>;
|
|
100
|
+
};
|
|
101
|
+
}
|