@soulcraft/brainy 2.11.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import { WebSocketConnection } from './types/augmentations.js';
12
12
  import { BrainyDataInterface } from './types/brainyDataInterface.js';
13
13
  import { DistributedConfig } from './types/distributedTypes.js';
14
14
  import { SearchCacheConfig } from './utils/searchCache.js';
15
+ import { ImprovedNeuralAPI } from './neural/improvedNeuralAPI.js';
15
16
  import { TripleQuery, TripleResult } from './triple/TripleIntelligence.js';
16
17
  export interface BrainyDataConfig {
17
18
  /**
@@ -1687,23 +1688,19 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
1687
1688
  * - brain.neural.clusterStream() - Progressive streaming
1688
1689
  * - brain.neural.getLOD() - Level-of-detail for scale
1689
1690
  */
1690
- get neural(): any;
1691
+ get neural(): ImprovedNeuralAPI;
1691
1692
  /**
1692
1693
  * Simple similarity check (shorthand for neural.similar)
1693
1694
  */
1694
- similar(a: any, b: any): Promise<number>;
1695
+ similar(a: any, b: any, options?: any): Promise<number>;
1695
1696
  /**
1696
1697
  * Get semantic clusters (shorthand for neural.clusters)
1697
1698
  */
1698
- clusters(options?: any): Promise<any[]>;
1699
+ clusters(items?: any, options?: any): Promise<any[]>;
1699
1700
  /**
1700
1701
  * Get related items (shorthand for neural.neighbors)
1701
1702
  */
1702
- related(id: string, limit?: number): Promise<any[]>;
1703
- /**
1704
- * Get visualization data (shorthand for neural.visualize)
1705
- */
1706
- visualize(options?: any): Promise<any>;
1703
+ related(id: string, options?: any): Promise<any[]>;
1707
1704
  /**
1708
1705
  * 🚀 TRIPLE INTELLIGENCE SEARCH - Natural Language & Complex Queries
1709
1706
  * The revolutionary search that combines vector, graph, and metadata intelligence!
@@ -29,7 +29,7 @@ import { EntityRegistryAugmentation, AutoRegisterEntitiesAugmentation } from './
29
29
  import { createDefaultAugmentations } from './augmentations/defaultAugmentations.js';
30
30
  // import { RealtimeStreamingAugmentation } from './augmentations/realtimeStreamingAugmentation.js'
31
31
  import { IntelligentVerbScoringAugmentation } from './augmentations/intelligentVerbScoringAugmentation.js';
32
- import { NeuralAPI } from './neural/neuralAPI.js';
32
+ import { ImprovedNeuralAPI } from './neural/improvedNeuralAPI.js';
33
33
  import { TripleIntelligenceEngine } from './triple/TripleIntelligence.js';
34
34
  export class BrainyData {
35
35
  // REMOVED: HealthMonitor is now handled by MonitoringAugmentation
@@ -3011,9 +3011,16 @@ export class BrainyData {
3011
3011
  */
3012
3012
  async addVerbs(verbs) {
3013
3013
  const ids = [];
3014
- for (const verb of verbs) {
3015
- const id = await this.addVerb(verb.source, verb.target, verb.type, verb.metadata);
3016
- ids.push(id);
3014
+ const chunkSize = 10; // Conservative chunk size for parallel processing
3015
+ // Process verbs in parallel chunks to improve performance
3016
+ for (let i = 0; i < verbs.length; i += chunkSize) {
3017
+ const chunk = verbs.slice(i, i + chunkSize);
3018
+ // Process chunk in parallel
3019
+ const chunkPromises = chunk.map(verb => this.addVerb(verb.source, verb.target, verb.type, verb.metadata));
3020
+ // Wait for all in chunk to complete
3021
+ const chunkIds = await Promise.all(chunkPromises);
3022
+ // Maintain order by adding chunk results
3023
+ ids.push(...chunkIds);
3017
3024
  }
3018
3025
  return ids;
3019
3026
  }
@@ -3024,8 +3031,16 @@ export class BrainyData {
3024
3031
  */
3025
3032
  async deleteVerbs(ids) {
3026
3033
  const results = [];
3027
- for (const id of ids) {
3028
- results.push(await this.deleteVerb(id));
3034
+ const chunkSize = 10; // Conservative chunk size for parallel processing
3035
+ // Process deletions in parallel chunks to improve performance
3036
+ for (let i = 0; i < ids.length; i += chunkSize) {
3037
+ const chunk = ids.slice(i, i + chunkSize);
3038
+ // Process chunk in parallel
3039
+ const chunkPromises = chunk.map(id => this.deleteVerb(id));
3040
+ // Wait for all in chunk to complete
3041
+ const chunkResults = await Promise.all(chunkPromises);
3042
+ // Maintain order by adding chunk results
3043
+ results.push(...chunkResults);
3029
3044
  }
3030
3045
  return results;
3031
3046
  }
@@ -5749,8 +5764,16 @@ export class BrainyData {
5749
5764
  */
5750
5765
  async deleteNouns(ids) {
5751
5766
  const results = [];
5752
- for (const id of ids) {
5753
- results.push(await this.deleteNoun(id));
5767
+ const chunkSize = 10; // Conservative chunk size for parallel processing
5768
+ // Process deletions in parallel chunks to improve performance
5769
+ for (let i = 0; i < ids.length; i += chunkSize) {
5770
+ const chunk = ids.slice(i, i + chunkSize);
5771
+ // Process chunk in parallel
5772
+ const chunkPromises = chunk.map(id => this.deleteNoun(id));
5773
+ // Wait for all in chunk to complete
5774
+ const chunkResults = await Promise.all(chunkPromises);
5775
+ // Maintain order by adding chunk results
5776
+ results.push(...chunkResults);
5754
5777
  }
5755
5778
  return results;
5756
5779
  }
@@ -5931,34 +5954,41 @@ export class BrainyData {
5931
5954
  get neural() {
5932
5955
  if (!this._neural) {
5933
5956
  // Create the unified Neural API instance
5934
- this._neural = new NeuralAPI(this);
5957
+ this._neural = new ImprovedNeuralAPI(this);
5935
5958
  }
5936
5959
  return this._neural;
5937
5960
  }
5938
5961
  /**
5939
5962
  * Simple similarity check (shorthand for neural.similar)
5940
5963
  */
5941
- async similar(a, b) {
5942
- return this.neural.similar(a, b);
5964
+ async similar(a, b, options) {
5965
+ const result = await this.neural.similar(a, b, options);
5966
+ // Always return simple number for main class shortcut
5967
+ return typeof result === 'object' ? result.score : result;
5943
5968
  }
5944
5969
  /**
5945
5970
  * Get semantic clusters (shorthand for neural.clusters)
5946
5971
  */
5947
- async clusters(options) {
5948
- return this.neural.clusters(options);
5972
+ async clusters(items, options) {
5973
+ // Support both (items, options) and (options) patterns
5974
+ if (typeof items === 'object' && !Array.isArray(items) && options === undefined) {
5975
+ // First argument is options object
5976
+ return this.neural.clusters(items);
5977
+ }
5978
+ // Standard (items, options) pattern
5979
+ if (options) {
5980
+ return this.neural.clusters({ ...options, items });
5981
+ }
5982
+ return this.neural.clusters(items);
5949
5983
  }
5950
5984
  /**
5951
5985
  * Get related items (shorthand for neural.neighbors)
5952
5986
  */
5953
- async related(id, limit) {
5954
- const result = await this.neural.neighbors(id, { limit });
5955
- return result.neighbors;
5956
- }
5957
- /**
5958
- * Get visualization data (shorthand for neural.visualize)
5959
- */
5960
- async visualize(options) {
5961
- return this.neural.visualize(options);
5987
+ async related(id, options) {
5988
+ const limit = typeof options === 'number' ? options : options?.limit;
5989
+ const fullOptions = typeof options === 'number' ? { limit } : options;
5990
+ const result = await this.neural.neighbors(id, fullOptions);
5991
+ return result.neighbors || [];
5962
5992
  }
5963
5993
  /**
5964
5994
  * 🚀 TRIPLE INTELLIGENCE SEARCH - Natural Language & Complex Queries
@@ -0,0 +1,346 @@
1
+ /**
2
+ * Improved Neural API - Clean, Consistent, Performant
3
+ *
4
+ * Public API Surface:
5
+ * - brain.neural.similar(a, b, options?) // Similarity calculation
6
+ * - brain.neural.clusters(items?, options?) // Semantic clustering
7
+ * - brain.neural.neighbors(id, options?) // K-nearest neighbors
8
+ * - brain.neural.hierarchy(id, options?) // Semantic hierarchy
9
+ * - brain.neural.outliers(options?) // Anomaly detection
10
+ * - brain.neural.visualize(options?) // Visualization data
11
+ *
12
+ * Advanced Clustering:
13
+ * - brain.neural.clusterByDomain(field, options?) // Domain-aware clustering
14
+ * - brain.neural.clusterByTime(field, windows, options?) // Temporal clustering
15
+ * - brain.neural.clusterStream(options?) // AsyncIterator for streaming
16
+ * - brain.neural.updateClusters(items, options?) // Incremental clustering
17
+ *
18
+ * Private methods are prefixed with _ and not exposed in public API
19
+ */
20
+ import { Vector } from '../coreTypes.js';
21
+ import { SemanticCluster, DomainCluster, TemporalCluster, SimilarityOptions, SimilarityResult, NeighborOptions, NeighborsResult, SemanticHierarchy, HierarchyOptions, ClusteringOptions, DomainClusteringOptions, TemporalClusteringOptions, StreamClusteringOptions, VisualizationOptions, VisualizationResult, OutlierOptions, Outlier, StreamingBatch, TimeWindow, PerformanceMetrics, NeuralAPIConfig } from './types.js';
22
+ export declare class ImprovedNeuralAPI {
23
+ private brain;
24
+ private config;
25
+ private similarityCache;
26
+ private clusterCache;
27
+ private hierarchyCache;
28
+ private neighborsCache;
29
+ private performanceMetrics;
30
+ constructor(brain: any, config?: NeuralAPIConfig);
31
+ /**
32
+ * Calculate similarity between any two items (auto-detection)
33
+ * Supports: IDs, text strings, vectors, or mixed types
34
+ */
35
+ similar(a: string | Vector | any, b: string | Vector | any, options?: SimilarityOptions): Promise<number | SimilarityResult>;
36
+ /**
37
+ * Intelligent semantic clustering with auto-routing
38
+ * - No input: Cluster all data
39
+ * - Array: Cluster specific items
40
+ * - String: Find clusters near this item
41
+ * - Options object: Advanced configuration
42
+ */
43
+ clusters(input?: string | string[] | ClusteringOptions): Promise<SemanticCluster[]>;
44
+ /**
45
+ * Fast hierarchical clustering using HNSW levels
46
+ */
47
+ clusterFast(options?: {
48
+ level?: number;
49
+ maxClusters?: number;
50
+ }): Promise<SemanticCluster[]>;
51
+ /**
52
+ * Large-scale clustering with intelligent sampling
53
+ */
54
+ clusterLarge(options?: {
55
+ sampleSize?: number;
56
+ strategy?: 'random' | 'diverse' | 'recent';
57
+ }): Promise<SemanticCluster[]>;
58
+ /**
59
+ * Domain-aware clustering based on metadata fields
60
+ */
61
+ clusterByDomain(field: string, options?: DomainClusteringOptions): Promise<DomainCluster[]>;
62
+ /**
63
+ * Temporal clustering based on time windows
64
+ */
65
+ clusterByTime(timeField: string, windows: TimeWindow[], options?: TemporalClusteringOptions): Promise<TemporalCluster[]>;
66
+ /**
67
+ * Streaming clustering with real-time updates
68
+ */
69
+ clusterStream(options?: StreamClusteringOptions): AsyncIterableIterator<StreamingBatch>;
70
+ /**
71
+ * Incremental clustering - add new items to existing clusters
72
+ */
73
+ updateClusters(newItems: string[], options?: ClusteringOptions): Promise<SemanticCluster[]>;
74
+ /**
75
+ * Find K-nearest semantic neighbors
76
+ */
77
+ neighbors(id: string, options?: NeighborOptions): Promise<NeighborsResult>;
78
+ /**
79
+ * Build semantic hierarchy around an item
80
+ */
81
+ hierarchy(id: string, options?: HierarchyOptions): Promise<SemanticHierarchy>;
82
+ /**
83
+ * Detect outliers and anomalous items
84
+ */
85
+ outliers(options?: OutlierOptions): Promise<Outlier[]>;
86
+ /**
87
+ * Generate visualization data for graph libraries
88
+ */
89
+ visualize(options?: VisualizationOptions): Promise<VisualizationResult>;
90
+ private _routeClusteringAlgorithm;
91
+ private _performClustering;
92
+ /**
93
+ * SEMANTIC-AWARE CLUSTERING: Uses existing NounType/VerbType taxonomy + HNSW
94
+ */
95
+ private _performSemanticClustering;
96
+ /**
97
+ * HIERARCHICAL CLUSTERING: Uses existing HNSW levels for O(n) clustering
98
+ */
99
+ private _performHierarchicalClustering;
100
+ /**
101
+ * K-MEANS CLUSTERING: Real implementation using existing distance functions
102
+ */
103
+ private _performKMeansClustering;
104
+ /**
105
+ * DBSCAN CLUSTERING: Density-based clustering with adaptive parameters using HNSW
106
+ */
107
+ private _performDBSCANClustering;
108
+ /**
109
+ * GRAPH COMMUNITY DETECTION: Uses existing verb relationships for clustering
110
+ */
111
+ private _performGraphClustering;
112
+ /**
113
+ * MULTI-MODAL FUSION: Combines vector + graph + semantic + Triple Intelligence
114
+ */
115
+ private _performMultiModalClustering;
116
+ /**
117
+ * SAMPLED CLUSTERING: For very large datasets using intelligent sampling
118
+ */
119
+ private _performSampledClustering;
120
+ private _similarityById;
121
+ private _similarityByVector;
122
+ private _similarityByText;
123
+ private _isId;
124
+ private _isVector;
125
+ private _convertToVector;
126
+ private _createSimilarityKey;
127
+ private _createClusteringKey;
128
+ private _cacheResult;
129
+ private _trackPerformance;
130
+ private _createPerformanceMetrics;
131
+ private _initializeCleanupTimer;
132
+ /**
133
+ * Build graph structure from existing verb relationships
134
+ */
135
+ private _buildGraphFromVerbs;
136
+ /**
137
+ * Detect communities using Louvain modularity optimization
138
+ */
139
+ private _detectCommunities;
140
+ /**
141
+ * Refine community boundaries using vector similarity
142
+ */
143
+ private _refineCommunitiesWithVectors;
144
+ /**
145
+ * Get items with their metadata including noun types
146
+ */
147
+ private _getItemsWithMetadata;
148
+ /**
149
+ * Group items by their semantic noun types
150
+ */
151
+ private _groupBySemanticType;
152
+ private _getAllItemIds;
153
+ private _getTotalItemCount;
154
+ private _calculateTotalWeight;
155
+ private _getNeighborCommunities;
156
+ private _calculateModularityGain;
157
+ private _getNodeDegree;
158
+ private _getEdgesToCommunity;
159
+ private _getCommunityWeight;
160
+ private _calculateCommunityModularity;
161
+ private _calculateCommunityDensity;
162
+ private _findStrongestConnections;
163
+ /**
164
+ * Get items with their vector representations
165
+ */
166
+ private _getItemsWithVectors;
167
+ /**
168
+ * Calculate centroid from items using existing distance functions
169
+ */
170
+ private _calculateCentroidFromItems;
171
+ /**
172
+ * Initialize centroids using k-means++ algorithm for better convergence
173
+ */
174
+ private _initializeCentroidsKMeansPlusPlus;
175
+ /**
176
+ * Assign points to nearest centroids using existing distance functions
177
+ */
178
+ private _assignPointsToCentroids;
179
+ /**
180
+ * Update centroids based on current assignments
181
+ */
182
+ private _updateCentroids;
183
+ /**
184
+ * Calculate how much assignments have changed between iterations
185
+ */
186
+ private _calculateAssignmentChangeRate;
187
+ /**
188
+ * Calculate cluster confidence for k-means clusters
189
+ */
190
+ private _calculateKMeansClusterConfidence;
191
+ /**
192
+ * Estimate optimal eps parameter using k-nearest neighbor distances
193
+ */
194
+ private _estimateOptimalEps;
195
+ /**
196
+ * Find neighbors within epsilon distance using efficient vector operations
197
+ */
198
+ private _findNeighborsWithinEps;
199
+ /**
200
+ * Expand DBSCAN cluster by adding density-reachable points
201
+ */
202
+ private _expandCluster;
203
+ /**
204
+ * Calculate DBSCAN cluster confidence based on density
205
+ */
206
+ private _calculateDBSCANClusterConfidence;
207
+ /**
208
+ * Calculate squared Euclidean distance (more efficient than sqrt)
209
+ */
210
+ private _calculateSquaredDistance;
211
+ /**
212
+ * Calculate vector coherence for community refinement
213
+ */
214
+ private _calculateVectorCoherence;
215
+ private _getItemsByField;
216
+ /**
217
+ * Generate intelligent cluster labels using Triple Intelligence
218
+ */
219
+ private _generateIntelligentClusterLabel;
220
+ /**
221
+ * Generate simple cluster labels based on semantic analysis
222
+ */
223
+ private _generateClusterLabel;
224
+ /**
225
+ * Fuse clustering results using Triple Intelligence consensus
226
+ */
227
+ private _fuseClusteringResultsWithTripleIntelligence;
228
+ /**
229
+ * Get items in a specific cluster from cluster sets
230
+ */
231
+ private _getItemsInCluster;
232
+ /**
233
+ * Count co-occurrences between two sets of assignments
234
+ */
235
+ private _countCoOccurrences;
236
+ /**
237
+ * Calculate fusion confidence based on algorithm agreement
238
+ */
239
+ private _calculateFusionConfidence;
240
+ /**
241
+ * Generate empty clustering result for edge cases
242
+ */
243
+ private _createEmptyResult;
244
+ /**
245
+ * Get sample using specified strategy for large dataset clustering
246
+ */
247
+ private _getSampleUsingStrategy;
248
+ /**
249
+ * Random sampling
250
+ */
251
+ private _getRandomSample;
252
+ /**
253
+ * Diverse sampling using vector space distribution
254
+ */
255
+ private _getDiverseSample;
256
+ /**
257
+ * Recent sampling based on creation time
258
+ */
259
+ private _getRecentSample;
260
+ /**
261
+ * Important sampling based on connection count and metadata
262
+ */
263
+ private _getImportantSample;
264
+ /**
265
+ * Project clusters back to full dataset using HNSW neighbors
266
+ */
267
+ private _projectClustersToFullDataset;
268
+ private _groupByDomain;
269
+ private _calculateDomainConfidence;
270
+ private _findCrossDomainMembers;
271
+ private _findCrossDomainClusters;
272
+ private _getItemsByTimeWindow;
273
+ private _calculateTemporalMetrics;
274
+ private _mergeOverlappingTemporalClusters;
275
+ private _adjustThresholdAdaptively;
276
+ private _calculateItemToClusterSimilarity;
277
+ private _recalculateClusterCentroid;
278
+ private _calculateSimilarity;
279
+ private _sortNeighbors;
280
+ private _buildSemanticHierarchy;
281
+ private _detectOutliersClusterBased;
282
+ private _detectOutliersIsolation;
283
+ private _detectOutliersStatistical;
284
+ private _generateVisualizationNodes;
285
+ private _generateVisualizationEdges;
286
+ private _generateVisualizationClusters;
287
+ private _applyLayoutAlgorithm;
288
+ private _manhattanDistance;
289
+ private _calculateConfidence;
290
+ private _generateSimilarityExplanation;
291
+ /**
292
+ * Get performance metrics for monitoring
293
+ */
294
+ getPerformanceMetrics(operation?: string): Map<string, PerformanceMetrics[]> | PerformanceMetrics[];
295
+ /**
296
+ * Clear all caches
297
+ */
298
+ clearCaches(): void;
299
+ /**
300
+ * Get cache statistics
301
+ */
302
+ getCacheStats(): Record<string, {
303
+ size: number;
304
+ maxSize: number;
305
+ }>;
306
+ /**
307
+ * Analyze data characteristics for algorithm selection
308
+ */
309
+ private _analyzeDataCharacteristics;
310
+ /**
311
+ * Calculate centroid for a group of items
312
+ */
313
+ private _calculateGroupCentroid;
314
+ /**
315
+ * Cluster within semantic type using vector similarity
316
+ */
317
+ private _clusterWithinSemanticType;
318
+ /**
319
+ * Find cross-type connections via verbs
320
+ */
321
+ private _findCrossTypeConnections;
322
+ /**
323
+ * Merge semantic clusters based on connections
324
+ */
325
+ private _mergeSemanticClusters;
326
+ /**
327
+ * Get optimal clustering level for HNSW
328
+ */
329
+ private _getOptimalClusteringLevel;
330
+ /**
331
+ * Get nodes at HNSW level
332
+ */
333
+ private _getHNSWLevelNodes;
334
+ /**
335
+ * Find cluster members using HNSW neighbors
336
+ */
337
+ private _findClusterMembers;
338
+ /**
339
+ * Calculate hierarchical clustering confidence
340
+ */
341
+ private _calculateHierarchicalConfidence;
342
+ /**
343
+ * Assign unassigned items to nearest clusters
344
+ */
345
+ private _assignUnassignedItems;
346
+ }