@soulcraft/brainy 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/CHANGELOG.md +53 -3
  2. package/README.md +353 -110
  3. package/bin/brainy.js +340 -62
  4. package/dist/api/ConfigAPI.d.ts +67 -0
  5. package/dist/api/ConfigAPI.js +166 -0
  6. package/dist/api/DataAPI.d.ts +123 -0
  7. package/dist/api/DataAPI.js +391 -0
  8. package/dist/api/SecurityAPI.d.ts +50 -0
  9. package/dist/api/SecurityAPI.js +139 -0
  10. package/dist/api/UniversalImportAPI.d.ts +134 -0
  11. package/dist/api/UniversalImportAPI.js +615 -0
  12. package/dist/augmentationManager.js +12 -7
  13. package/dist/augmentationPipeline.d.ts +0 -61
  14. package/dist/augmentationPipeline.js +0 -87
  15. package/dist/augmentationRegistry.d.ts +1 -1
  16. package/dist/augmentationRegistry.js +1 -1
  17. package/dist/augmentations/apiServerAugmentation.d.ts +27 -1
  18. package/dist/augmentations/apiServerAugmentation.js +290 -9
  19. package/dist/augmentations/auditLogAugmentation.d.ts +109 -0
  20. package/dist/augmentations/auditLogAugmentation.js +358 -0
  21. package/dist/augmentations/batchProcessingAugmentation.d.ts +3 -2
  22. package/dist/augmentations/batchProcessingAugmentation.js +123 -22
  23. package/dist/augmentations/brainyAugmentation.d.ts +142 -8
  24. package/dist/augmentations/brainyAugmentation.js +179 -2
  25. package/dist/augmentations/cacheAugmentation.d.ts +8 -5
  26. package/dist/augmentations/cacheAugmentation.js +116 -17
  27. package/dist/augmentations/conduitAugmentations.d.ts +2 -2
  28. package/dist/augmentations/conduitAugmentations.js +2 -2
  29. package/dist/augmentations/configResolver.d.ts +122 -0
  30. package/dist/augmentations/configResolver.js +440 -0
  31. package/dist/augmentations/connectionPoolAugmentation.d.ts +3 -1
  32. package/dist/augmentations/connectionPoolAugmentation.js +37 -12
  33. package/dist/augmentations/defaultAugmentations.d.ts +14 -10
  34. package/dist/augmentations/defaultAugmentations.js +16 -11
  35. package/dist/augmentations/discovery/catalogDiscovery.d.ts +142 -0
  36. package/dist/augmentations/discovery/catalogDiscovery.js +249 -0
  37. package/dist/augmentations/discovery/localDiscovery.d.ts +84 -0
  38. package/dist/augmentations/discovery/localDiscovery.js +246 -0
  39. package/dist/augmentations/discovery/runtimeLoader.d.ts +97 -0
  40. package/dist/augmentations/discovery/runtimeLoader.js +337 -0
  41. package/dist/augmentations/discovery.d.ts +152 -0
  42. package/dist/augmentations/discovery.js +441 -0
  43. package/dist/augmentations/display/cache.d.ts +130 -0
  44. package/dist/augmentations/display/cache.js +319 -0
  45. package/dist/augmentations/display/fieldPatterns.d.ts +52 -0
  46. package/dist/augmentations/display/fieldPatterns.js +393 -0
  47. package/dist/augmentations/display/iconMappings.d.ts +57 -0
  48. package/dist/augmentations/display/iconMappings.js +68 -0
  49. package/dist/augmentations/display/intelligentComputation.d.ts +109 -0
  50. package/dist/augmentations/display/intelligentComputation.js +462 -0
  51. package/dist/augmentations/display/types.d.ts +203 -0
  52. package/dist/augmentations/display/types.js +7 -0
  53. package/dist/augmentations/entityRegistryAugmentation.d.ts +3 -1
  54. package/dist/augmentations/entityRegistryAugmentation.js +5 -1
  55. package/dist/augmentations/indexAugmentation.d.ts +5 -3
  56. package/dist/augmentations/indexAugmentation.js +5 -2
  57. package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +24 -7
  58. package/dist/augmentations/intelligentVerbScoringAugmentation.js +111 -27
  59. package/dist/augmentations/manifest.d.ts +176 -0
  60. package/dist/augmentations/manifest.js +8 -0
  61. package/dist/augmentations/marketplace/AugmentationMarketplace.d.ts +168 -0
  62. package/dist/augmentations/marketplace/AugmentationMarketplace.js +329 -0
  63. package/dist/augmentations/marketplace/cli.d.ts +47 -0
  64. package/dist/augmentations/marketplace/cli.js +265 -0
  65. package/dist/augmentations/metricsAugmentation.d.ts +3 -3
  66. package/dist/augmentations/metricsAugmentation.js +2 -2
  67. package/dist/augmentations/monitoringAugmentation.d.ts +3 -3
  68. package/dist/augmentations/monitoringAugmentation.js +2 -2
  69. package/dist/augmentations/neuralImport.d.ts +1 -1
  70. package/dist/augmentations/neuralImport.js +4 -4
  71. package/dist/augmentations/rateLimitAugmentation.d.ts +82 -0
  72. package/dist/augmentations/rateLimitAugmentation.js +321 -0
  73. package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +2 -2
  74. package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -1
  75. package/dist/augmentations/storageAugmentation.d.ts +1 -1
  76. package/dist/augmentations/storageAugmentation.js +2 -2
  77. package/dist/augmentations/storageAugmentations.d.ts +37 -8
  78. package/dist/augmentations/storageAugmentations.js +204 -15
  79. package/dist/augmentations/synapseAugmentation.d.ts +1 -1
  80. package/dist/augmentations/synapseAugmentation.js +35 -16
  81. package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
  82. package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
  83. package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +39 -59
  84. package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +103 -389
  85. package/dist/augmentations/universalDisplayAugmentation.d.ts +191 -0
  86. package/dist/augmentations/universalDisplayAugmentation.js +371 -0
  87. package/dist/brainy-unified.d.ts +106 -0
  88. package/dist/brainy-unified.js +327 -0
  89. package/dist/brainy.d.ts +273 -0
  90. package/dist/brainy.js +1181 -0
  91. package/dist/brainyData.d.ts +56 -111
  92. package/dist/brainyData.js +912 -756
  93. package/dist/brainyDataV3.d.ts +186 -0
  94. package/dist/brainyDataV3.js +337 -0
  95. package/dist/browserFramework.d.ts +6 -6
  96. package/dist/browserFramework.js +11 -8
  97. package/dist/browserFramework.minimal.d.ts +5 -5
  98. package/dist/browserFramework.minimal.js +11 -8
  99. package/dist/config/distributedPresets-new.d.ts +118 -0
  100. package/dist/config/distributedPresets-new.js +318 -0
  101. package/dist/config/distributedPresets.d.ts +118 -0
  102. package/dist/config/distributedPresets.js +318 -0
  103. package/dist/config/extensibleConfig.d.ts +99 -0
  104. package/dist/config/extensibleConfig.js +268 -0
  105. package/dist/config/index.d.ts +17 -0
  106. package/dist/config/index.js +35 -0
  107. package/dist/config/modelAutoConfig.d.ts +32 -0
  108. package/dist/config/modelAutoConfig.js +139 -0
  109. package/dist/config/modelPrecisionManager.d.ts +42 -0
  110. package/dist/config/modelPrecisionManager.js +98 -0
  111. package/dist/config/sharedConfigManager.d.ts +67 -0
  112. package/dist/config/sharedConfigManager.js +215 -0
  113. package/dist/config/storageAutoConfig.d.ts +41 -0
  114. package/dist/config/storageAutoConfig.js +328 -0
  115. package/dist/config/zeroConfig.d.ts +68 -0
  116. package/dist/config/zeroConfig.js +301 -0
  117. package/dist/cortex/backupRestore.d.ts +2 -2
  118. package/dist/cortex/backupRestore.js +85 -27
  119. package/dist/cortex/healthCheck.d.ts +2 -2
  120. package/dist/cortex/neuralImport.d.ts +2 -2
  121. package/dist/cortex/neuralImport.js +18 -13
  122. package/dist/cortex/performanceMonitor.d.ts +2 -2
  123. package/dist/critical/model-guardian.d.ts +4 -0
  124. package/dist/critical/model-guardian.js +31 -11
  125. package/dist/demo.d.ts +4 -4
  126. package/dist/demo.js +7 -7
  127. package/dist/distributed/cacheSync.d.ts +112 -0
  128. package/dist/distributed/cacheSync.js +265 -0
  129. package/dist/distributed/coordinator.d.ts +193 -0
  130. package/dist/distributed/coordinator.js +548 -0
  131. package/dist/distributed/httpTransport.d.ts +120 -0
  132. package/dist/distributed/httpTransport.js +446 -0
  133. package/dist/distributed/index.d.ts +8 -0
  134. package/dist/distributed/index.js +5 -0
  135. package/dist/distributed/networkTransport.d.ts +132 -0
  136. package/dist/distributed/networkTransport.js +633 -0
  137. package/dist/distributed/queryPlanner.d.ts +104 -0
  138. package/dist/distributed/queryPlanner.js +327 -0
  139. package/dist/distributed/readWriteSeparation.d.ts +134 -0
  140. package/dist/distributed/readWriteSeparation.js +350 -0
  141. package/dist/distributed/shardManager.d.ts +114 -0
  142. package/dist/distributed/shardManager.js +357 -0
  143. package/dist/distributed/shardMigration.d.ts +110 -0
  144. package/dist/distributed/shardMigration.js +289 -0
  145. package/dist/distributed/storageDiscovery.d.ts +160 -0
  146. package/dist/distributed/storageDiscovery.js +551 -0
  147. package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
  148. package/dist/embeddings/CachedEmbeddings.js +146 -0
  149. package/dist/embeddings/EmbeddingManager.d.ts +102 -0
  150. package/dist/embeddings/EmbeddingManager.js +291 -0
  151. package/dist/embeddings/SingletonModelManager.d.ts +95 -0
  152. package/dist/embeddings/SingletonModelManager.js +220 -0
  153. package/dist/embeddings/index.d.ts +12 -0
  154. package/dist/embeddings/index.js +16 -0
  155. package/dist/embeddings/lightweight-embedder.d.ts +0 -1
  156. package/dist/embeddings/lightweight-embedder.js +4 -12
  157. package/dist/embeddings/model-manager.d.ts +11 -0
  158. package/dist/embeddings/model-manager.js +43 -7
  159. package/dist/embeddings/universal-memory-manager.d.ts +1 -1
  160. package/dist/embeddings/universal-memory-manager.js +27 -67
  161. package/dist/embeddings/worker-embedding.js +4 -8
  162. package/dist/errors/brainyError.d.ts +5 -1
  163. package/dist/errors/brainyError.js +12 -0
  164. package/dist/examples/basicUsage.js +7 -4
  165. package/dist/graph/graphAdjacencyIndex.d.ts +96 -0
  166. package/dist/graph/graphAdjacencyIndex.js +288 -0
  167. package/dist/graph/pathfinding.js +4 -2
  168. package/dist/hnsw/scaledHNSWSystem.js +11 -2
  169. package/dist/importManager.js +8 -5
  170. package/dist/index.d.ts +17 -22
  171. package/dist/index.js +37 -23
  172. package/dist/mcp/brainyMCPAdapter.d.ts +4 -4
  173. package/dist/mcp/brainyMCPAdapter.js +5 -5
  174. package/dist/mcp/brainyMCPService.d.ts +3 -3
  175. package/dist/mcp/brainyMCPService.js +3 -11
  176. package/dist/mcp/mcpAugmentationToolset.js +20 -30
  177. package/dist/neural/embeddedPatterns.d.ts +1 -1
  178. package/dist/neural/embeddedPatterns.js +2 -2
  179. package/dist/neural/entityExtractor.d.ts +65 -0
  180. package/dist/neural/entityExtractor.js +316 -0
  181. package/dist/neural/improvedNeuralAPI.d.ts +357 -0
  182. package/dist/neural/improvedNeuralAPI.js +2628 -0
  183. package/dist/neural/naturalLanguageProcessor.d.ts +155 -10
  184. package/dist/neural/naturalLanguageProcessor.js +941 -66
  185. package/dist/neural/naturalLanguageProcessorStatic.d.ts +2 -2
  186. package/dist/neural/naturalLanguageProcessorStatic.js +3 -3
  187. package/dist/neural/neuralAPI.js +8 -2
  188. package/dist/neural/patternLibrary.d.ts +57 -3
  189. package/dist/neural/patternLibrary.js +348 -13
  190. package/dist/neural/staticPatternMatcher.d.ts +2 -2
  191. package/dist/neural/staticPatternMatcher.js +2 -2
  192. package/dist/neural/types.d.ts +287 -0
  193. package/dist/neural/types.js +24 -0
  194. package/dist/shared/default-augmentations.d.ts +3 -3
  195. package/dist/shared/default-augmentations.js +5 -5
  196. package/dist/storage/adapters/baseStorageAdapter.d.ts +42 -0
  197. package/dist/storage/adapters/fileSystemStorage.d.ts +26 -2
  198. package/dist/storage/adapters/fileSystemStorage.js +218 -15
  199. package/dist/storage/adapters/memoryStorage.d.ts +4 -4
  200. package/dist/storage/adapters/memoryStorage.js +17 -12
  201. package/dist/storage/adapters/opfsStorage.d.ts +2 -2
  202. package/dist/storage/adapters/opfsStorage.js +2 -2
  203. package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
  204. package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
  205. package/dist/storage/backwardCompatibility.d.ts +10 -78
  206. package/dist/storage/backwardCompatibility.js +17 -132
  207. package/dist/storage/baseStorage.d.ts +18 -2
  208. package/dist/storage/baseStorage.js +74 -3
  209. package/dist/storage/cacheManager.js +2 -2
  210. package/dist/storage/readOnlyOptimizations.js +8 -3
  211. package/dist/streaming/pipeline.d.ts +154 -0
  212. package/dist/streaming/pipeline.js +551 -0
  213. package/dist/triple/TripleIntelligence.d.ts +25 -110
  214. package/dist/triple/TripleIntelligence.js +4 -574
  215. package/dist/triple/TripleIntelligenceSystem.d.ts +159 -0
  216. package/dist/triple/TripleIntelligenceSystem.js +519 -0
  217. package/dist/types/apiTypes.d.ts +278 -0
  218. package/dist/types/apiTypes.js +33 -0
  219. package/dist/types/brainy.types.d.ts +308 -0
  220. package/dist/types/brainy.types.js +8 -0
  221. package/dist/types/brainyDataInterface.d.ts +5 -8
  222. package/dist/types/brainyDataInterface.js +2 -2
  223. package/dist/types/graphTypes.js +2 -2
  224. package/dist/utils/brainyTypes.d.ts +217 -0
  225. package/dist/utils/brainyTypes.js +261 -0
  226. package/dist/utils/cacheAutoConfig.d.ts +3 -3
  227. package/dist/utils/embedding.d.ts +9 -4
  228. package/dist/utils/embedding.js +89 -26
  229. package/dist/utils/enhancedLogger.d.ts +104 -0
  230. package/dist/utils/enhancedLogger.js +232 -0
  231. package/dist/utils/hybridModelManager.d.ts +19 -28
  232. package/dist/utils/hybridModelManager.js +36 -200
  233. package/dist/utils/index.d.ts +1 -1
  234. package/dist/utils/index.js +1 -1
  235. package/dist/utils/intelligentTypeMapper.d.ts +60 -0
  236. package/dist/utils/intelligentTypeMapper.js +349 -0
  237. package/dist/utils/metadataIndex.d.ts +118 -1
  238. package/dist/utils/metadataIndex.js +539 -16
  239. package/dist/utils/nodeVersionCheck.d.ts +24 -0
  240. package/dist/utils/nodeVersionCheck.js +65 -0
  241. package/dist/utils/paramValidation.d.ts +39 -0
  242. package/dist/utils/paramValidation.js +192 -0
  243. package/dist/utils/rateLimiter.d.ts +160 -0
  244. package/dist/utils/rateLimiter.js +271 -0
  245. package/dist/utils/statistics.d.ts +4 -4
  246. package/dist/utils/statistics.js +3 -3
  247. package/dist/utils/structuredLogger.d.ts +146 -0
  248. package/dist/utils/structuredLogger.js +394 -0
  249. package/dist/utils/textEncoding.js +2 -1
  250. package/dist/utils/typeValidation.d.ts +59 -0
  251. package/dist/utils/typeValidation.js +374 -0
  252. package/dist/utils/version.js +19 -3
  253. package/package.json +15 -4
  254. package/scripts/download-models.cjs +94 -20
  255. package/dist/augmentations/walAugmentation.d.ts +0 -109
  256. package/dist/augmentations/walAugmentation.js +0 -516
  257. package/dist/chat/BrainyChat.d.ts +0 -121
  258. package/dist/chat/BrainyChat.js +0 -396
  259. package/dist/chat/ChatCLI.d.ts +0 -61
  260. package/dist/chat/ChatCLI.js +0 -351
@@ -0,0 +1,2628 @@
1
+ /**
2
+ * Improved Neural API - Clean, Consistent, Performant
3
+ *
4
+ * Public API Surface:
5
+ * - brain.neural.similar(a, b, options?) // Similarity calculation
6
+ * - brain.neural.clusters(items?, options?) // Semantic clustering
7
+ * - brain.neural.neighbors(id, options?) // K-nearest neighbors
8
+ * - brain.neural.hierarchy(id, options?) // Semantic hierarchy
9
+ * - brain.neural.outliers(options?) // Anomaly detection
10
+ * - brain.neural.visualize(options?) // Visualization data
11
+ *
12
+ * Advanced Clustering:
13
+ * - brain.neural.clusterByDomain(field, options?) // Domain-aware clustering
14
+ * - brain.neural.clusterByTime(field, windows, options?) // Temporal clustering
15
+ * - brain.neural.clusterStream(options?) // AsyncIterator for streaming
16
+ * - brain.neural.updateClusters(items, options?) // Incremental clustering
17
+ *
18
+ * Private methods are prefixed with _ and not exposed in public API
19
+ */
20
+ import { cosineDistance, euclideanDistance } from '../utils/distance.js';
21
+ import { NeuralAPIError, ClusteringError, SimilarityError } from './types.js';
22
+ export class ImprovedNeuralAPI {
23
+ constructor(brain, config = {}) {
24
+ // Caching for performance
25
+ this.similarityCache = new Map();
26
+ this.clusterCache = new Map();
27
+ this.hierarchyCache = new Map();
28
+ this.neighborsCache = new Map();
29
+ // Performance tracking
30
+ this.performanceMetrics = new Map();
31
+ this.brain = brain;
32
+ this.config = {
33
+ cacheSize: 1000,
34
+ defaultAlgorithm: 'auto',
35
+ similarityMetric: 'cosine',
36
+ performanceTracking: true,
37
+ maxMemoryUsage: '1GB',
38
+ parallelProcessing: true,
39
+ streamingBatchSize: 100,
40
+ ...config
41
+ };
42
+ this._initializeCleanupTimer();
43
+ }
44
+ // ===== PUBLIC API: SIMILARITY =====
45
+ /**
46
+ * Calculate similarity between any two items (auto-detection)
47
+ * Supports: IDs, text strings, vectors, or mixed types
48
+ */
49
+ async similar(a, b, options = {}) {
50
+ const startTime = performance.now();
51
+ try {
52
+ // Create cache key
53
+ const cacheKey = this._createSimilarityKey(a, b, options);
54
+ if (this.similarityCache.has(cacheKey)) {
55
+ return this.similarityCache.get(cacheKey);
56
+ }
57
+ let result;
58
+ // Auto-detect input types and route accordingly
59
+ if (this._isId(a) && this._isId(b)) {
60
+ result = await this._similarityById(a, b, options);
61
+ }
62
+ else if (this._isVector(a) && this._isVector(b)) {
63
+ result = await this._similarityByVector(a, b, options);
64
+ }
65
+ else if (typeof a === 'string' && typeof b === 'string') {
66
+ result = await this._similarityByText(a, b, options);
67
+ }
68
+ else {
69
+ // Mixed types - convert to vectors
70
+ const vectorA = await this._convertToVector(a);
71
+ const vectorB = await this._convertToVector(b);
72
+ result = await this._similarityByVector(vectorA, vectorB, options);
73
+ }
74
+ // Cache result
75
+ this._cacheResult(cacheKey, result, this.similarityCache);
76
+ // Track performance
77
+ this._trackPerformance('similarity', startTime, 2, 'mixed');
78
+ return result;
79
+ }
80
+ catch (error) {
81
+ const errorMessage = error instanceof Error ? error.message : String(error);
82
+ throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
83
+ inputA: typeof a === 'object' ? 'vector' : String(a).substring(0, 50),
84
+ inputB: typeof b === 'object' ? 'vector' : String(b).substring(0, 50),
85
+ options
86
+ });
87
+ }
88
+ }
89
+ // ===== PUBLIC API: CLUSTERING =====
90
+ /**
91
+ * Intelligent semantic clustering with auto-routing
92
+ * - No input: Cluster all data
93
+ * - Array: Cluster specific items
94
+ * - String: Find clusters near this item
95
+ * - Options object: Advanced configuration
96
+ */
97
+ async clusters(input) {
98
+ const startTime = performance.now();
99
+ try {
100
+ let options = {};
101
+ let items;
102
+ // Parse input
103
+ if (!input) {
104
+ // Cluster all data
105
+ items = undefined;
106
+ options = { algorithm: 'auto' };
107
+ }
108
+ else if (Array.isArray(input)) {
109
+ // Cluster specific items
110
+ items = input;
111
+ options = { algorithm: 'auto' };
112
+ }
113
+ else if (typeof input === 'string') {
114
+ // Find clusters near this item
115
+ const nearbyResult = await this.neighbors(input, { limit: 100 });
116
+ items = nearbyResult.neighbors.map(n => n.id);
117
+ options = { algorithm: 'auto' };
118
+ }
119
+ else if (typeof input === 'object') {
120
+ // Configuration object
121
+ options = input;
122
+ items = undefined;
123
+ }
124
+ else {
125
+ throw new ClusteringError('Invalid input for clustering', { input });
126
+ }
127
+ // Check cache
128
+ const cacheKey = this._createClusteringKey(items, options);
129
+ if (this.clusterCache.has(cacheKey)) {
130
+ const cached = this.clusterCache.get(cacheKey);
131
+ return cached.clusters;
132
+ }
133
+ // Route to optimal algorithm
134
+ const result = await this._routeClusteringAlgorithm(items, options);
135
+ // Cache result
136
+ this._cacheResult(cacheKey, result, this.clusterCache);
137
+ // Track performance
138
+ this._trackPerformance('clustering', startTime, items?.length || 0, options.algorithm || 'auto');
139
+ return result.clusters;
140
+ }
141
+ catch (error) {
142
+ const errorMessage = error instanceof Error ? error.message : String(error);
143
+ throw new ClusteringError(`Failed to perform clustering: ${errorMessage}`, {
144
+ input: typeof input === 'object' ? JSON.stringify(input) : input,
145
+ });
146
+ }
147
+ }
148
+ /**
149
+ * Fast hierarchical clustering using HNSW levels
150
+ */
151
+ async clusterFast(options = {}) {
152
+ const fullOptions = {
153
+ algorithm: 'hierarchical',
154
+ maxClusters: options.maxClusters,
155
+ ...options
156
+ };
157
+ const result = await this._performHierarchicalClustering(undefined, fullOptions);
158
+ return result.clusters;
159
+ }
160
+ /**
161
+ * Large-scale clustering with intelligent sampling
162
+ */
163
+ async clusterLarge(options = {}) {
164
+ const fullOptions = {
165
+ algorithm: 'auto',
166
+ sampleSize: options.sampleSize || 1000,
167
+ strategy: options.strategy || 'diverse',
168
+ ...options
169
+ };
170
+ const result = await this._performSampledClustering(undefined, fullOptions);
171
+ return result.clusters;
172
+ }
173
+ // ===== PUBLIC API: ADVANCED CLUSTERING =====
174
+ /**
175
+ * Domain-aware clustering based on metadata fields
176
+ */
177
+ async clusterByDomain(field, options = {}) {
178
+ const startTime = performance.now();
179
+ try {
180
+ // Get all items with the specified field
181
+ const items = await this._getItemsByField(field);
182
+ if (items.length === 0) {
183
+ return [];
184
+ }
185
+ // Group by domain values
186
+ const domainGroups = this._groupByDomain(items, field);
187
+ const domainClusters = [];
188
+ // Cluster within each domain
189
+ for (const [domain, domainItems] of domainGroups) {
190
+ const domainOptions = {
191
+ ...options,
192
+ algorithm: 'auto',
193
+ maxClusters: Math.min(options.maxClusters || 10, Math.ceil(domainItems.length / 3))
194
+ };
195
+ const clusters = await this._performClustering(domainItems.map(item => item.id), domainOptions);
196
+ // Convert to domain clusters
197
+ for (const cluster of clusters.clusters) {
198
+ domainClusters.push({
199
+ ...cluster,
200
+ domain,
201
+ domainConfidence: this._calculateDomainConfidence(cluster, domainItems),
202
+ crossDomainMembers: options.crossDomainThreshold
203
+ ? await this._findCrossDomainMembers(cluster, options.crossDomainThreshold)
204
+ : undefined
205
+ });
206
+ }
207
+ }
208
+ // Handle cross-domain clustering if enabled
209
+ if (!options.preserveDomainBoundaries) {
210
+ const crossDomainClusters = await this._findCrossDomainClusters(domainClusters, options.crossDomainThreshold || 0.8);
211
+ domainClusters.push(...crossDomainClusters);
212
+ }
213
+ this._trackPerformance('domainClustering', startTime, items.length, field);
214
+ return domainClusters;
215
+ }
216
+ catch (error) {
217
+ const errorMessage = error instanceof Error ? error.message : String(error);
218
+ throw new ClusteringError(`Failed to cluster by domain: ${errorMessage}`, { field, options });
219
+ }
220
+ }
221
+ /**
222
+ * Temporal clustering based on time windows
223
+ */
224
+ async clusterByTime(timeField, windows, options = { timeField, windows }) {
225
+ const startTime = performance.now();
226
+ try {
227
+ const temporalClusters = [];
228
+ for (const window of windows) {
229
+ // Get items in this time window
230
+ const windowItems = await this._getItemsByTimeWindow(timeField, window);
231
+ if (windowItems.length === 0)
232
+ continue;
233
+ // Cluster items in this window
234
+ const clusteringOptions = {
235
+ ...options,
236
+ algorithm: 'auto'
237
+ };
238
+ const clusters = await this._performClustering(windowItems.map(item => item.id), clusteringOptions);
239
+ // Convert to temporal clusters
240
+ for (const cluster of clusters.clusters) {
241
+ const temporal = await this._calculateTemporalMetrics(cluster, windowItems, timeField);
242
+ temporalClusters.push({
243
+ ...cluster,
244
+ timeWindow: window,
245
+ trend: temporal.trend,
246
+ temporal: temporal.metrics
247
+ });
248
+ }
249
+ }
250
+ // Handle overlapping windows
251
+ if (options.overlapStrategy === 'merge') {
252
+ return this._mergeOverlappingTemporalClusters(temporalClusters);
253
+ }
254
+ this._trackPerformance('temporalClustering', startTime, temporalClusters.length, 'temporal');
255
+ return temporalClusters;
256
+ }
257
+ catch (error) {
258
+ const errorMessage = error instanceof Error ? error.message : String(error);
259
+ throw new ClusteringError(`Failed to cluster by time: ${errorMessage}`, { timeField, windows, options });
260
+ }
261
+ }
262
+ /**
263
+ * Streaming clustering with real-time updates
264
+ */
265
+ async *clusterStream(options = {}) {
266
+ const batchSize = options.batchSize || this.config.streamingBatchSize || 100;
267
+ let batchNumber = 0;
268
+ let processedCount = 0;
269
+ try {
270
+ // Get all items for processing
271
+ const allItems = await this._getAllItemIds();
272
+ const totalItems = allItems.length;
273
+ // Process in batches
274
+ for (let i = 0; i < allItems.length; i += batchSize) {
275
+ const startTime = performance.now();
276
+ const batch = allItems.slice(i, i + batchSize);
277
+ // Perform clustering on this batch
278
+ const result = await this._performClustering(batch, {
279
+ ...options,
280
+ algorithm: 'auto',
281
+ cacheResults: false // Don't cache streaming results
282
+ });
283
+ processedCount += batch.length;
284
+ const isComplete = processedCount >= totalItems;
285
+ yield {
286
+ clusters: result.clusters,
287
+ batchNumber: ++batchNumber,
288
+ isComplete,
289
+ progress: {
290
+ processed: processedCount,
291
+ total: totalItems,
292
+ percentage: (processedCount / totalItems) * 100
293
+ },
294
+ metrics: {
295
+ ...result.metrics,
296
+ executionTime: performance.now() - startTime
297
+ }
298
+ };
299
+ // Adaptive threshold adjustment
300
+ if (options.adaptiveThreshold && batchNumber > 1) {
301
+ options.threshold = this._adjustThresholdAdaptively(result.clusters, options.threshold);
302
+ }
303
+ }
304
+ }
305
+ catch (error) {
306
+ const errorMessage = error instanceof Error ? error.message : String(error);
307
+ throw new ClusteringError(`Failed in streaming clustering: ${errorMessage}`, { options, batchNumber });
308
+ }
309
+ }
310
+ /**
311
+ * Incremental clustering - add new items to existing clusters
312
+ */
313
+ async updateClusters(newItems, options = {}) {
314
+ const startTime = performance.now();
315
+ try {
316
+ // Get existing clusters
317
+ const existingClusters = await this.clusters({ ...options, algorithm: 'auto' });
318
+ // For each new item, find best cluster or create new one
319
+ const updatedClusters = [...existingClusters];
320
+ const unassignedItems = [];
321
+ for (const itemId of newItems) {
322
+ let bestCluster = null;
323
+ let bestSimilarity = 0;
324
+ // Find most similar existing cluster
325
+ for (const cluster of updatedClusters) {
326
+ const similarity = await this._calculateItemToClusterSimilarity(itemId, cluster);
327
+ if (similarity > bestSimilarity && similarity > (options.threshold || 0.6)) {
328
+ bestSimilarity = similarity;
329
+ bestCluster = cluster;
330
+ }
331
+ }
332
+ if (bestCluster) {
333
+ // Add to existing cluster
334
+ bestCluster.members.push(itemId);
335
+ bestCluster.size = bestCluster.members.length;
336
+ // Recalculate centroid
337
+ bestCluster.centroid = await this._recalculateClusterCentroid(bestCluster);
338
+ }
339
+ else {
340
+ // Item doesn't fit existing clusters
341
+ unassignedItems.push(itemId);
342
+ }
343
+ }
344
+ // Create new clusters for unassigned items
345
+ if (unassignedItems.length > 0) {
346
+ const newClusters = await this._performClustering(unassignedItems, options);
347
+ updatedClusters.push(...newClusters.clusters);
348
+ }
349
+ this._trackPerformance('incrementalClustering', startTime, newItems.length, 'incremental');
350
+ return updatedClusters;
351
+ }
352
+ catch (error) {
353
+ const errorMessage = error instanceof Error ? error.message : String(error);
354
+ throw new ClusteringError(`Failed to update clusters: ${errorMessage}`, { newItems, options });
355
+ }
356
+ }
357
+ /**
358
+ * Enhanced clustering with relationship analysis using verbs
359
+ * Returns clusters with intra-cluster and inter-cluster relationship information
360
+ *
361
+ * Scalable for millions of nodes - uses efficient batching and filtering
362
+ */
363
+ async clustersWithRelationships(input, options) {
364
+ const startTime = performance.now();
365
+ const batchSize = options?.batchSize || 1000;
366
+ const maxRelationships = options?.maxRelationships || 10000;
367
+ let processedCount = 0;
368
+ try {
369
+ // Get basic clusters first
370
+ const basicClusters = await this.clusters(input);
371
+ if (basicClusters.length === 0) {
372
+ return [];
373
+ }
374
+ // Build member lookup for O(1) cluster membership checking
375
+ const memberToClusterMap = new Map();
376
+ const clusterMap = new Map();
377
+ for (const cluster of basicClusters) {
378
+ clusterMap.set(cluster.id, cluster);
379
+ for (const memberId of cluster.members) {
380
+ memberToClusterMap.set(memberId, cluster.id);
381
+ }
382
+ }
383
+ // Initialize cluster edge collections
384
+ const clusterEdges = new Map();
385
+ for (const cluster of basicClusters) {
386
+ clusterEdges.set(cluster.id, {
387
+ intra: [],
388
+ inter: [],
389
+ edgeTypes: {}
390
+ });
391
+ }
392
+ // Process verbs in batches to handle millions of relationships efficiently
393
+ let hasMoreVerbs = true;
394
+ let offset = 0;
395
+ while (hasMoreVerbs && processedCount < maxRelationships) {
396
+ // Get batch of verbs using proper pagination API
397
+ // Get all items and process in chunks (simplified approach)
398
+ const allItems = await this.brain.find({ query: '', limit: Math.min(1000, maxRelationships) });
399
+ const verbBatch = allItems.slice(offset, offset + batchSize);
400
+ if (verbBatch.length === 0) {
401
+ hasMoreVerbs = false;
402
+ break;
403
+ }
404
+ // Process this batch
405
+ for (const verb of verbBatch) {
406
+ if (processedCount >= maxRelationships)
407
+ break;
408
+ const sourceClusterId = memberToClusterMap.get(verb.sourceId);
409
+ const targetClusterId = memberToClusterMap.get(verb.targetId);
410
+ // Skip verbs that don't involve any clustered nodes
411
+ if (!sourceClusterId && !targetClusterId)
412
+ continue;
413
+ const edgeWeight = this._calculateEdgeWeight(verb);
414
+ const edgeType = verb.verb || verb.type || 'relationship';
415
+ if (sourceClusterId && targetClusterId) {
416
+ if (sourceClusterId === targetClusterId) {
417
+ // Intra-cluster relationship
418
+ const edges = clusterEdges.get(sourceClusterId);
419
+ edges.intra.push({
420
+ id: verb.id,
421
+ source: verb.sourceId,
422
+ target: verb.targetId,
423
+ type: edgeType,
424
+ weight: edgeWeight,
425
+ isInterCluster: false,
426
+ sourceCluster: sourceClusterId,
427
+ targetCluster: sourceClusterId
428
+ });
429
+ edges.edgeTypes[edgeType] = (edges.edgeTypes[edgeType] || 0) + 1;
430
+ }
431
+ else {
432
+ // Inter-cluster relationship
433
+ const sourceEdges = clusterEdges.get(sourceClusterId);
434
+ const targetEdges = clusterEdges.get(targetClusterId);
435
+ const edge = {
436
+ id: verb.id,
437
+ source: verb.sourceId,
438
+ target: verb.targetId,
439
+ type: edgeType,
440
+ weight: edgeWeight,
441
+ isInterCluster: true,
442
+ sourceCluster: sourceClusterId,
443
+ targetCluster: targetClusterId
444
+ };
445
+ sourceEdges.inter.push(edge);
446
+ // Don't duplicate - target cluster will see this as incoming
447
+ sourceEdges.edgeTypes[edgeType] = (sourceEdges.edgeTypes[edgeType] || 0) + 1;
448
+ }
449
+ }
450
+ else {
451
+ // One-way relationship to/from cluster
452
+ const clusterId = sourceClusterId || targetClusterId;
453
+ const edges = clusterEdges.get(clusterId);
454
+ edges.inter.push({
455
+ id: verb.id,
456
+ source: verb.sourceId,
457
+ target: verb.targetId,
458
+ type: edgeType,
459
+ weight: edgeWeight,
460
+ isInterCluster: true,
461
+ sourceCluster: sourceClusterId || 'external',
462
+ targetCluster: targetClusterId || 'external'
463
+ });
464
+ edges.edgeTypes[edgeType] = (edges.edgeTypes[edgeType] || 0) + 1;
465
+ }
466
+ processedCount++;
467
+ }
468
+ offset += batchSize;
469
+ // Memory management: if we have too many edges, break early
470
+ const totalEdges = Array.from(clusterEdges.values())
471
+ .reduce((sum, edges) => sum + edges.intra.length + edges.inter.length, 0);
472
+ if (totalEdges >= maxRelationships) {
473
+ console.warn(`Relationship analysis stopped at ${totalEdges} edges to maintain performance`);
474
+ break;
475
+ }
476
+ // Check if we got fewer verbs than batch size (end of data)
477
+ if (verbBatch.length < batchSize) {
478
+ hasMoreVerbs = false;
479
+ }
480
+ }
481
+ // Build enhanced clusters
482
+ const enhancedClusters = [];
483
+ for (const cluster of basicClusters) {
484
+ const edges = clusterEdges.get(cluster.id);
485
+ enhancedClusters.push({
486
+ ...cluster,
487
+ intraClusterEdges: edges.intra,
488
+ interClusterEdges: edges.inter,
489
+ relationshipSummary: {
490
+ totalEdges: edges.intra.length + edges.inter.length,
491
+ intraClusterEdges: edges.intra.length,
492
+ interClusterEdges: edges.inter.length,
493
+ edgeTypes: edges.edgeTypes
494
+ }
495
+ });
496
+ }
497
+ this._trackPerformance('clustersWithRelationships', startTime, processedCount, 'enhanced-scalable');
498
+ return enhancedClusters;
499
+ }
500
+ catch (error) {
501
+ const errorMessage = error instanceof Error ? error.message : String(error);
502
+ throw new ClusteringError(`Failed to perform relationship-aware clustering: ${errorMessage}`, {
503
+ input: typeof input === 'object' ? JSON.stringify(input) : input,
504
+ processedCount: processedCount || 0
505
+ });
506
+ }
507
+ }
508
+ // ===== PUBLIC API: NEIGHBORS & HIERARCHY =====
509
+ /**
510
+ * Find K-nearest semantic neighbors
511
+ */
512
+ async neighbors(id, options = {}) {
513
+ const startTime = performance.now();
514
+ try {
515
+ const cacheKey = `neighbors:${id}:${JSON.stringify(options)}`;
516
+ if (this.neighborsCache.has(cacheKey)) {
517
+ return this.neighborsCache.get(cacheKey);
518
+ }
519
+ const limit = options.limit || 10;
520
+ const minSimilarity = options.minSimilarity || 0.1;
521
+ // Use HNSW index for efficient neighbor search
522
+ const searchResults = await this.brain.find({
523
+ query: '',
524
+ limit: limit * 2, // Get more than needed for filtering
525
+ where: options.includeMetadata ? {} : undefined
526
+ });
527
+ // Filter and sort neighbors
528
+ const neighbors = [];
529
+ for (const result of searchResults) {
530
+ if (result.id === id)
531
+ continue; // Skip self
532
+ const similarity = await this._calculateSimilarity(id, result.id);
533
+ if (similarity >= minSimilarity) {
534
+ neighbors.push({
535
+ id: result.id,
536
+ similarity,
537
+ data: result.content || result.data,
538
+ metadata: options.includeMetadata ? result.metadata : undefined,
539
+ distance: 1 - similarity
540
+ });
541
+ }
542
+ if (neighbors.length >= limit)
543
+ break;
544
+ }
545
+ // Sort by specified criteria
546
+ this._sortNeighbors(neighbors, options.sortBy || 'similarity');
547
+ const result = {
548
+ neighbors: neighbors.slice(0, limit),
549
+ queryId: id,
550
+ totalFound: neighbors.length,
551
+ averageSimilarity: neighbors.reduce((sum, n) => sum + n.similarity, 0) / neighbors.length
552
+ };
553
+ this._cacheResult(cacheKey, result, this.neighborsCache);
554
+ this._trackPerformance('neighbors', startTime, limit, 'knn');
555
+ return result;
556
+ }
557
+ catch (error) {
558
+ const errorMessage = error instanceof Error ? error.message : String(error);
559
+ throw new NeuralAPIError(`Failed to find neighbors: ${errorMessage}`, 'NEIGHBORS_ERROR', { id, options });
560
+ }
561
+ }
562
+ /**
563
+ * Build semantic hierarchy around an item
564
+ */
565
+ async hierarchy(id, options = {}) {
566
+ const startTime = performance.now();
567
+ try {
568
+ const cacheKey = `hierarchy:${id}:${JSON.stringify(options)}`;
569
+ if (this.hierarchyCache.has(cacheKey)) {
570
+ return this.hierarchyCache.get(cacheKey);
571
+ }
572
+ // Get item data
573
+ const item = await this.brain.get(id);
574
+ if (!item) {
575
+ throw new Error(`Item with ID ${id} not found`);
576
+ }
577
+ // Build hierarchy based on strategy
578
+ const hierarchy = await this._buildSemanticHierarchy(item, options);
579
+ this._cacheResult(cacheKey, hierarchy, this.hierarchyCache);
580
+ this._trackPerformance('hierarchy', startTime, 1, 'hierarchy');
581
+ return hierarchy;
582
+ }
583
+ catch (error) {
584
+ const errorMessage = error instanceof Error ? error.message : String(error);
585
+ throw new NeuralAPIError(`Failed to build hierarchy: ${errorMessage}`, 'HIERARCHY_ERROR', { id, options });
586
+ }
587
+ }
588
+ // ===== PUBLIC API: ANALYSIS =====
589
+ /**
590
+ * Detect outliers and anomalous items
591
+ */
592
+ async outliers(options = {}) {
593
+ const startTime = performance.now();
594
+ try {
595
+ const threshold = options.threshold || 0.3;
596
+ const method = options.method || 'cluster-based';
597
+ let outliers = [];
598
+ switch (method) {
599
+ case 'isolation':
600
+ outliers = await this._detectOutliersIsolation(threshold, options);
601
+ break;
602
+ case 'statistical':
603
+ outliers = await this._detectOutliersStatistical(threshold, options);
604
+ break;
605
+ case 'cluster-based':
606
+ default:
607
+ outliers = await this._detectOutliersClusterBased(threshold, options);
608
+ break;
609
+ }
610
+ this._trackPerformance('outlierDetection', startTime, outliers.length, method);
611
+ return outliers;
612
+ }
613
+ catch (error) {
614
+ const errorMessage = error instanceof Error ? error.message : String(error);
615
+ throw new NeuralAPIError(`Failed to detect outliers: ${errorMessage}`, 'OUTLIER_ERROR', { options });
616
+ }
617
+ }
618
+ /**
619
+ * Generate visualization data for graph libraries
620
+ */
621
+ async visualize(options = {}) {
622
+ const startTime = performance.now();
623
+ try {
624
+ const maxNodes = options.maxNodes || 100;
625
+ const dimensions = options.dimensions || 2;
626
+ const algorithm = options.algorithm || 'force';
627
+ // Get data for visualization
628
+ const nodes = await this._generateVisualizationNodes(maxNodes, options);
629
+ const edges = options.includeEdges ? await this._generateVisualizationEdges(nodes, options) : [];
630
+ const clusters = options.clusterColors ? await this._generateVisualizationClusters(nodes) : [];
631
+ // Apply layout algorithm
632
+ const positionedNodes = await this._applyLayoutAlgorithm(nodes, edges, algorithm, dimensions);
633
+ const result = {
634
+ nodes: positionedNodes,
635
+ edges,
636
+ clusters,
637
+ metadata: {
638
+ algorithm,
639
+ dimensions,
640
+ totalNodes: nodes.length,
641
+ totalEdges: edges.length,
642
+ generatedAt: new Date()
643
+ }
644
+ };
645
+ this._trackPerformance('visualization', startTime, nodes.length, algorithm);
646
+ return result;
647
+ }
648
+ catch (error) {
649
+ const errorMessage = error instanceof Error ? error.message : String(error);
650
+ throw new NeuralAPIError(`Failed to generate visualization: ${errorMessage}`, 'VISUALIZATION_ERROR', { options });
651
+ }
652
+ }
653
+ // ===== PRIVATE IMPLEMENTATION METHODS =====
654
+ async _routeClusteringAlgorithm(items, options) {
655
+ const algorithm = options.algorithm || 'auto';
656
+ const itemCount = items?.length || await this._getTotalItemCount();
657
+ // Auto-select optimal algorithm based on data size and characteristics
658
+ if (algorithm === 'auto') {
659
+ // Intelligent algorithm selection based on data characteristics
660
+ const itemIds = items || await this._getAllItemIds();
661
+ const dataCharacteristics = await this._analyzeDataCharacteristics(itemIds);
662
+ const hasRichGraph = dataCharacteristics.graphDensity > 0.05;
663
+ const hasSemanticTypes = Object.keys(dataCharacteristics.typeDistribution).length > 3;
664
+ if (hasRichGraph && hasSemanticTypes) {
665
+ // Best of all worlds for rich semantic graphs
666
+ return this._performMultiModalClustering(items, { ...options, algorithm: 'multimodal' });
667
+ }
668
+ else if (hasRichGraph) {
669
+ // Strong relationship network - use graph clustering
670
+ return this._performGraphClustering(items, { ...options, algorithm: 'graph' });
671
+ }
672
+ else if (hasSemanticTypes) {
673
+ // Rich semantic taxonomy - use semantic clustering
674
+ return this._performSemanticClustering(items, { ...options, algorithm: 'semantic' });
675
+ }
676
+ else if (itemCount > 10000) {
677
+ // Large dataset - use sampling
678
+ return this._performSampledClustering(items, { ...options, algorithm: 'sample' });
679
+ }
680
+ else if (itemCount > 1000) {
681
+ // Medium dataset - use hierarchical HNSW
682
+ return this._performHierarchicalClustering(items, { ...options, algorithm: 'hierarchical' });
683
+ }
684
+ else {
685
+ // Small dataset - use k-means for quality
686
+ return this._performKMeansClustering(items, { ...options, algorithm: 'kmeans' });
687
+ }
688
+ }
689
+ // Use specified algorithm
690
+ switch (algorithm) {
691
+ case 'hierarchical':
692
+ return this._performHierarchicalClustering(items, options);
693
+ case 'semantic':
694
+ return this._performSemanticClustering(items, options);
695
+ case 'graph':
696
+ return this._performGraphClustering(items, options);
697
+ case 'multimodal':
698
+ return this._performMultiModalClustering(items, options);
699
+ case 'kmeans':
700
+ return this._performKMeansClustering(items, options);
701
+ case 'dbscan':
702
+ return this._performDBSCANClustering(items, options);
703
+ case 'sample':
704
+ return this._performSampledClustering(items, options);
705
+ default:
706
+ throw new ClusteringError(`Unsupported algorithm: ${algorithm}`);
707
+ }
708
+ }
709
+ async _performClustering(items, options) {
710
+ // This is the main clustering dispatcher - routes to specific algorithms
711
+ return this._routeClusteringAlgorithm(items, options);
712
+ }
713
+ // ===== REAL CLUSTERING IMPLEMENTATIONS =====
714
+ /**
715
+ * SEMANTIC-AWARE CLUSTERING: Uses existing NounType/VerbType taxonomy + HNSW
716
+ */
717
+ async _performSemanticClustering(items, options) {
718
+ const startTime = performance.now();
719
+ // Get all items if not specified
720
+ const itemIds = items || await this._getAllItemIds();
721
+ if (itemIds.length === 0) {
722
+ return this._createEmptyResult(startTime, 'semantic');
723
+ }
724
+ // 1. Group items by semantic type (NounType) - O(n) operation
725
+ const itemsWithMetadata = await this._getItemsWithMetadata(itemIds);
726
+ const typeGroups = this._groupBySemanticType(itemsWithMetadata);
727
+ const allClusters = [];
728
+ // 2. Cluster within each semantic type using HNSW - parallel processing
729
+ const typeClusteringPromises = Array.from(typeGroups.entries()).map(async ([nounType, groupItems]) => {
730
+ if (groupItems.length < (options.minClusterSize || 2)) {
731
+ // Create single cluster for small groups
732
+ return [{
733
+ id: `semantic-${nounType}`,
734
+ centroid: await this._calculateGroupCentroid(groupItems),
735
+ members: groupItems.map(item => item.id),
736
+ size: groupItems.length,
737
+ confidence: 0.9, // High confidence for type-based clustering
738
+ label: `${nounType} cluster`,
739
+ metadata: { semanticType: nounType, clustering: 'semantic' }
740
+ }];
741
+ }
742
+ // Use HNSW hierarchical clustering within type
743
+ return this._clusterWithinSemanticType(groupItems, options);
744
+ });
745
+ const typeClusterResults = await Promise.all(typeClusteringPromises);
746
+ typeClusterResults.forEach(clusters => allClusters.push(...clusters));
747
+ // 3. Find cross-type relationships using existing verb connections
748
+ const crossTypeConnections = await this._findCrossTypeConnections(typeGroups, options);
749
+ // 4. Merge clusters that have strong cross-type relationships
750
+ const finalClusters = await this._mergeSemanticClusters(allClusters, crossTypeConnections);
751
+ return {
752
+ clusters: finalClusters.slice(0, options.maxClusters || finalClusters.length),
753
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'semantic'),
754
+ metadata: {
755
+ totalItems: itemIds.length,
756
+ clustersFound: finalClusters.length,
757
+ averageClusterSize: finalClusters.reduce((sum, c) => sum + c.size, 0) / finalClusters.length || 0,
758
+ semanticTypes: Array.from(typeGroups.keys()).length,
759
+ timestamp: new Date()
760
+ }
761
+ };
762
+ }
763
+ /**
764
+ * HIERARCHICAL CLUSTERING: Uses existing HNSW levels for O(n) clustering
765
+ */
766
+ async _performHierarchicalClustering(items, options) {
767
+ const startTime = performance.now();
768
+ const itemIds = items || await this._getAllItemIds();
769
+ if (itemIds.length === 0) {
770
+ return this._createEmptyResult(startTime, 'hierarchical');
771
+ }
772
+ // Use existing HNSW level structure for natural clustering
773
+ const level = options.level || this._getOptimalClusteringLevel(itemIds.length);
774
+ const maxClusters = options.maxClusters || Math.min(50, Math.ceil(itemIds.length / 20));
775
+ // Get HNSW level representatives - these are natural cluster centers
776
+ const levelNodes = await this._getHNSWLevelNodes(level);
777
+ const clusterCenters = levelNodes.slice(0, maxClusters);
778
+ const clusters = [];
779
+ // Create clusters around each level representative
780
+ for (let i = 0; i < clusterCenters.length; i++) {
781
+ const center = clusterCenters[i];
782
+ // Find items that belong to this cluster using HNSW neighbors
783
+ const members = await this._findClusterMembers(center, itemIds, 0.5);
784
+ if (members.length > 0) {
785
+ // Get actual node data for creating cluster
786
+ const memberData = await this._getItemsWithMetadata(members);
787
+ const centroid = await this._calculateCentroidFromItems(memberData);
788
+ clusters.push({
789
+ id: `hierarchical-${i}`,
790
+ centroid,
791
+ members,
792
+ size: members.length,
793
+ confidence: await this._calculateHierarchicalConfidence(members),
794
+ label: await this._generateClusterLabel(memberData, 'hierarchical'),
795
+ metadata: { level, clusterCenter: center, clustering: 'hierarchical' }
796
+ });
797
+ }
798
+ }
799
+ // Assign remaining items to nearest clusters
800
+ const assignedItems = new Set(clusters.flatMap(c => c.members));
801
+ const unassignedItems = itemIds.filter(id => !assignedItems.has(id));
802
+ if (unassignedItems.length > 0) {
803
+ await this._assignUnassignedItems(unassignedItems, clusters);
804
+ }
805
+ return {
806
+ clusters,
807
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'hierarchical'),
808
+ metadata: {
809
+ totalItems: itemIds.length,
810
+ clustersFound: clusters.length,
811
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
812
+ hnswLevel: level,
813
+ timestamp: new Date()
814
+ }
815
+ };
816
+ }
817
+ /**
818
+ * K-MEANS CLUSTERING: Real implementation using existing distance functions
819
+ */
820
+ async _performKMeansClustering(items, options) {
821
+ const startTime = performance.now();
822
+ const itemIds = items || await this._getAllItemIds();
823
+ if (itemIds.length === 0) {
824
+ return this._createEmptyResult(startTime, 'kmeans');
825
+ }
826
+ // Get vectors for all items using existing infrastructure
827
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
828
+ // Determine optimal k
829
+ const k = options.maxClusters || Math.min(Math.floor(Math.sqrt(itemsWithVectors.length / 2)), 50 // Maximum clusters for practical use
830
+ );
831
+ if (k <= 1) {
832
+ // Single cluster case
833
+ return {
834
+ clusters: [{
835
+ id: 'kmeans-single',
836
+ centroid: await this._calculateCentroidFromItems(itemsWithVectors),
837
+ members: itemIds,
838
+ size: itemIds.length,
839
+ confidence: 1.0,
840
+ label: 'Single cluster',
841
+ metadata: { clustering: 'kmeans', k: 1 }
842
+ }],
843
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'kmeans'),
844
+ metadata: {
845
+ totalItems: itemIds.length,
846
+ clustersFound: 1,
847
+ averageClusterSize: itemIds.length,
848
+ kValue: 1,
849
+ timestamp: new Date()
850
+ }
851
+ };
852
+ }
853
+ // Initialize centroids using k-means++ for better convergence
854
+ const centroids = await this._initializeCentroidsKMeansPlusPlus(itemsWithVectors, k);
855
+ let assignments = new Array(itemsWithVectors.length).fill(0);
856
+ let hasConverged = false;
857
+ const maxIterations = options.maxIterations || 100;
858
+ const tolerance = options.tolerance || 1e-4;
859
+ // K-means iteration loop
860
+ for (let iteration = 0; iteration < maxIterations && !hasConverged; iteration++) {
861
+ // Assignment step: assign each point to nearest centroid
862
+ const newAssignments = await this._assignPointsToCentroids(itemsWithVectors, centroids);
863
+ // Update step: recalculate centroids
864
+ const newCentroids = await this._updateCentroids(itemsWithVectors, newAssignments, k);
865
+ // Check convergence: has assignment changed significantly?
866
+ const changeRate = this._calculateAssignmentChangeRate(assignments, newAssignments);
867
+ hasConverged = changeRate < tolerance;
868
+ assignments = newAssignments;
869
+ // Update centroids for next iteration
870
+ for (let i = 0; i < centroids.length; i++) {
871
+ centroids[i] = newCentroids[i];
872
+ }
873
+ }
874
+ // Create semantic clusters from k-means results
875
+ const clusters = [];
876
+ for (let clusterIndex = 0; clusterIndex < k; clusterIndex++) {
877
+ const clusterMembers = itemsWithVectors.filter((_, i) => assignments[i] === clusterIndex);
878
+ if (clusterMembers.length > 0) {
879
+ const memberIds = clusterMembers.map(item => item.id);
880
+ clusters.push({
881
+ id: `kmeans-${clusterIndex}`,
882
+ centroid: centroids[clusterIndex],
883
+ members: memberIds,
884
+ size: memberIds.length,
885
+ confidence: await this._calculateKMeansClusterConfidence(clusterMembers, centroids[clusterIndex]),
886
+ label: await this._generateClusterLabel(clusterMembers, 'kmeans'),
887
+ metadata: {
888
+ clustering: 'kmeans',
889
+ k,
890
+ clusterIndex,
891
+ convergenceIterations: maxIterations
892
+ }
893
+ });
894
+ }
895
+ }
896
+ return {
897
+ clusters,
898
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'kmeans'),
899
+ metadata: {
900
+ totalItems: itemIds.length,
901
+ clustersFound: clusters.length,
902
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
903
+ kValue: k,
904
+ hasConverged,
905
+ timestamp: new Date()
906
+ }
907
+ };
908
+ }
909
+ /**
910
+ * DBSCAN CLUSTERING: Density-based clustering with adaptive parameters using HNSW
911
+ */
912
+ async _performDBSCANClustering(items, options) {
913
+ const startTime = performance.now();
914
+ const itemIds = items || await this._getAllItemIds();
915
+ if (itemIds.length === 0) {
916
+ return this._createEmptyResult(startTime, 'dbscan');
917
+ }
918
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
919
+ // Adaptive parameter selection using HNSW neighbors
920
+ const minPts = options.minClusterSize || Math.max(4, Math.floor(Math.log2(itemsWithVectors.length)));
921
+ const eps = options.threshold || await this._estimateOptimalEps(itemsWithVectors, minPts);
922
+ // DBSCAN state tracking
923
+ const NOISE = -1;
924
+ const UNVISITED = 0;
925
+ const visited = new Map();
926
+ const clusterAssignments = new Map();
927
+ let currentClusterId = 1;
928
+ // Process each point
929
+ for (const item of itemsWithVectors) {
930
+ if (visited.get(item.id))
931
+ continue;
932
+ visited.set(item.id, true);
933
+ // Find neighbors using existing HNSW infrastructure for efficiency
934
+ const neighbors = await this._findNeighborsWithinEps(item, itemsWithVectors, eps);
935
+ if (neighbors.length < minPts) {
936
+ // Mark as noise (outlier)
937
+ clusterAssignments.set(item.id, NOISE);
938
+ }
939
+ else {
940
+ // Start new cluster
941
+ await this._expandCluster(item, neighbors, currentClusterId, eps, minPts, itemsWithVectors, visited, clusterAssignments);
942
+ currentClusterId++;
943
+ }
944
+ }
945
+ // Convert DBSCAN results to SemanticCluster format
946
+ const clusters = [];
947
+ const clusterGroups = new Map();
948
+ const outliers = [];
949
+ // Group items by cluster assignment
950
+ for (const [itemId, clusterId] of clusterAssignments) {
951
+ if (clusterId === NOISE) {
952
+ outliers.push(itemId);
953
+ }
954
+ else {
955
+ if (!clusterGroups.has(clusterId)) {
956
+ clusterGroups.set(clusterId, []);
957
+ }
958
+ clusterGroups.get(clusterId).push(itemId);
959
+ }
960
+ }
961
+ // Create SemanticCluster objects
962
+ for (const [clusterId, memberIds] of clusterGroups) {
963
+ if (memberIds.length > 0) {
964
+ const members = itemsWithVectors.filter(item => memberIds.includes(item.id));
965
+ clusters.push({
966
+ id: `dbscan-${clusterId}`,
967
+ centroid: await this._calculateCentroidFromItems(members),
968
+ members: memberIds,
969
+ size: memberIds.length,
970
+ confidence: await this._calculateDBSCANClusterConfidence(members, eps),
971
+ label: await this._generateClusterLabel(members, 'dbscan'),
972
+ metadata: {
973
+ clustering: 'dbscan',
974
+ clusterId,
975
+ eps,
976
+ minPts,
977
+ isDensityBased: true
978
+ }
979
+ });
980
+ }
981
+ }
982
+ // Handle outliers - optionally create outlier cluster or assign to nearest
983
+ if (outliers.length > 0 && options.includeOutliers) {
984
+ const outlierMembers = itemsWithVectors.filter(item => outliers.includes(item.id));
985
+ clusters.push({
986
+ id: 'dbscan-outliers',
987
+ centroid: await this._calculateCentroidFromItems(outlierMembers),
988
+ members: outliers,
989
+ size: outliers.length,
990
+ confidence: 0.1, // Low confidence for outliers
991
+ label: 'Outliers',
992
+ metadata: {
993
+ clustering: 'dbscan',
994
+ isOutlierCluster: true,
995
+ eps,
996
+ minPts
997
+ }
998
+ });
999
+ }
1000
+ return {
1001
+ clusters,
1002
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'dbscan'),
1003
+ metadata: {
1004
+ totalItems: itemIds.length,
1005
+ clustersFound: clusters.length,
1006
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
1007
+ outlierCount: outliers.length,
1008
+ eps,
1009
+ minPts,
1010
+ timestamp: new Date()
1011
+ }
1012
+ };
1013
+ }
1014
+ /**
1015
+ * GRAPH COMMUNITY DETECTION: Uses existing verb relationships for clustering
1016
+ */
1017
+ async _performGraphClustering(items, options) {
1018
+ const startTime = performance.now();
1019
+ const itemIds = items || await this._getAllItemIds();
1020
+ if (itemIds.length === 0) {
1021
+ return this._createEmptyResult(startTime, 'graph');
1022
+ }
1023
+ // Build graph from existing verb relationships
1024
+ const graph = await this._buildGraphFromVerbs(itemIds, options);
1025
+ // Detect communities using modularity optimization
1026
+ const communities = await this._detectCommunities(graph, options);
1027
+ // Enhance communities with vector similarity for boundary refinement
1028
+ const refinedCommunities = await this._refineCommunitiesWithVectors(communities, options);
1029
+ // Convert to SemanticCluster format with Triple Intelligence labeling
1030
+ const clusters = [];
1031
+ for (let i = 0; i < refinedCommunities.length; i++) {
1032
+ const community = refinedCommunities[i];
1033
+ if (community.members.length > 0) {
1034
+ const members = await this._getItemsWithMetadata(community.members);
1035
+ // Use Triple Intelligence for intelligent cluster labeling
1036
+ const clusterLabel = await this._generateIntelligentClusterLabel(members, 'graph');
1037
+ const clusterCentroid = await this._calculateCentroidFromItems(members);
1038
+ clusters.push({
1039
+ id: `graph-${i}`,
1040
+ centroid: clusterCentroid,
1041
+ members: community.members,
1042
+ size: community.members.length,
1043
+ confidence: community.modularity || 0.7,
1044
+ label: clusterLabel,
1045
+ metadata: {
1046
+ clustering: 'graph',
1047
+ communityId: i,
1048
+ modularity: community.modularity,
1049
+ graphDensity: community.density,
1050
+ strongestConnections: community.strongestConnections
1051
+ }
1052
+ });
1053
+ }
1054
+ }
1055
+ return {
1056
+ clusters,
1057
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'graph'),
1058
+ metadata: {
1059
+ totalItems: itemIds.length,
1060
+ clustersFound: clusters.length,
1061
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
1062
+ averageModularity: clusters.reduce((sum, c) => sum + (c.metadata?.modularity || 0), 0) / clusters.length || 0,
1063
+ timestamp: new Date()
1064
+ }
1065
+ };
1066
+ }
1067
+ /**
1068
+ * MULTI-MODAL FUSION: Combines vector + graph + semantic + Triple Intelligence
1069
+ */
1070
+ async _performMultiModalClustering(items, options) {
1071
+ const startTime = performance.now();
1072
+ const itemIds = items || await this._getAllItemIds();
1073
+ if (itemIds.length === 0) {
1074
+ return this._createEmptyResult(startTime, 'multimodal');
1075
+ }
1076
+ // Run multiple clustering algorithms in parallel
1077
+ const [vectorClusters, graphClusters, semanticClusters] = await Promise.all([
1078
+ this._performHierarchicalClustering(itemIds, { ...options, algorithm: 'hierarchical' }),
1079
+ this._performGraphClustering(itemIds, { ...options, algorithm: 'graph' }),
1080
+ this._performSemanticClustering(itemIds, { ...options, algorithm: 'semantic' })
1081
+ ]);
1082
+ // Fuse results using intelligent consensus with Triple Intelligence
1083
+ const fusedClusters = await this._fuseClusteringResultsWithTripleIntelligence([vectorClusters.clusters, graphClusters.clusters, semanticClusters.clusters], options);
1084
+ return {
1085
+ clusters: fusedClusters,
1086
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'multimodal'),
1087
+ metadata: {
1088
+ totalItems: itemIds.length,
1089
+ clustersFound: fusedClusters.length,
1090
+ averageClusterSize: fusedClusters.reduce((sum, c) => sum + c.size, 0) / fusedClusters.length || 0,
1091
+ fusionMethod: 'triple_intelligence_consensus',
1092
+ componentAlgorithms: ['hierarchical', 'graph', 'semantic'],
1093
+ timestamp: new Date()
1094
+ }
1095
+ };
1096
+ }
1097
+ /**
1098
+ * SAMPLED CLUSTERING: For very large datasets using intelligent sampling
1099
+ */
1100
+ async _performSampledClustering(items, options) {
1101
+ const startTime = performance.now();
1102
+ const itemIds = items || await this._getAllItemIds();
1103
+ if (itemIds.length === 0) {
1104
+ return this._createEmptyResult(startTime, 'sampled');
1105
+ }
1106
+ const sampleSize = Math.min(options.sampleSize || 1000, itemIds.length);
1107
+ const strategy = options.strategy || 'diverse';
1108
+ // Intelligent sampling using existing infrastructure
1109
+ const sample = await this._getSampleUsingStrategy(itemIds, sampleSize, strategy);
1110
+ // Cluster the sample using the best algorithm for the sample size
1111
+ const sampleResult = await this._performHierarchicalClustering(sample, {
1112
+ ...options,
1113
+ maxClusters: Math.min(options.maxClusters || 50, Math.ceil(sample.length / 10))
1114
+ });
1115
+ // Project clusters back to full dataset using HNSW neighbors
1116
+ const projectedClusters = await this._projectClustersToFullDataset(sampleResult.clusters, itemIds, sample);
1117
+ return {
1118
+ clusters: projectedClusters,
1119
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'sampled'),
1120
+ metadata: {
1121
+ totalItems: itemIds.length,
1122
+ sampleSize: sample.length,
1123
+ samplingStrategy: strategy,
1124
+ clustersFound: projectedClusters.length,
1125
+ averageClusterSize: projectedClusters.reduce((sum, c) => sum + c.size, 0) / projectedClusters.length || 0,
1126
+ timestamp: new Date()
1127
+ }
1128
+ };
1129
+ }
1130
+ // Similarity implementation methods
1131
+ async _similarityById(id1, id2, options) {
1132
+ // Get vectors for both items
1133
+ const item1 = await this.brain.get(id1);
1134
+ const item2 = await this.brain.get(id2);
1135
+ if (!item1 || !item2) {
1136
+ return 0;
1137
+ }
1138
+ return this._similarityByVector(item1.vector, item2.vector, options);
1139
+ }
1140
+ async _similarityByVector(v1, v2, options) {
1141
+ const metric = options.metric || this.config.similarityMetric || 'cosine';
1142
+ let score = 0;
1143
+ switch (metric) {
1144
+ case 'cosine':
1145
+ score = 1 - cosineDistance(v1, v2);
1146
+ break;
1147
+ case 'euclidean':
1148
+ score = 1 / (1 + euclideanDistance(v1, v2));
1149
+ break;
1150
+ case 'manhattan':
1151
+ score = 1 / (1 + this._manhattanDistance(v1, v2));
1152
+ break;
1153
+ default:
1154
+ score = 1 - cosineDistance(v1, v2);
1155
+ }
1156
+ if (options.detailed) {
1157
+ return {
1158
+ score: options.normalized !== false ? Math.max(0, Math.min(1, score)) : score,
1159
+ confidence: this._calculateConfidence(score, v1, v2),
1160
+ explanation: this._generateSimilarityExplanation(score, metric),
1161
+ metric
1162
+ };
1163
+ }
1164
+ return options.normalized !== false ? Math.max(0, Math.min(1, score)) : score;
1165
+ }
1166
+ async _similarityByText(text1, text2, options) {
1167
+ // Convert text to vectors using brain's embedding function
1168
+ const vector1 = await this.brain.embed(text1);
1169
+ const vector2 = await this.brain.embed(text2);
1170
+ return this._similarityByVector(vector1, vector2, options);
1171
+ }
1172
+ // Utility methods for internal operations
1173
+ _isId(value) {
1174
+ return typeof value === 'string' &&
1175
+ (value.length === 36 && value.includes('-')) || // UUID-like
1176
+ (value.length > 10 && !value.includes(' ')); // ID-like string
1177
+ }
1178
+ _isVector(value) {
1179
+ return Array.isArray(value) &&
1180
+ value.length > 0 &&
1181
+ typeof value[0] === 'number';
1182
+ }
1183
+ async _convertToVector(input) {
1184
+ if (this._isVector(input)) {
1185
+ return input;
1186
+ }
1187
+ else if (this._isId(input)) {
1188
+ const item = await this.brain.get(input);
1189
+ return item?.vector || [];
1190
+ }
1191
+ else if (typeof input === 'string') {
1192
+ return await this.brain.embed(input);
1193
+ }
1194
+ else {
1195
+ throw new Error(`Cannot convert input to vector: ${typeof input}`);
1196
+ }
1197
+ }
1198
+ _createSimilarityKey(a, b, options) {
1199
+ const aKey = typeof a === 'object' ? JSON.stringify(a).substring(0, 50) : String(a);
1200
+ const bKey = typeof b === 'object' ? JSON.stringify(b).substring(0, 50) : String(b);
1201
+ return `${aKey}|${bKey}|${JSON.stringify(options)}`;
1202
+ }
1203
+ _createClusteringKey(items, options) {
1204
+ const itemsKey = items ? [...items].sort().join(',') : 'all';
1205
+ return `clustering:${itemsKey}:${JSON.stringify(options)}`;
1206
+ }
1207
+ _cacheResult(key, result, cache) {
1208
+ if (cache.size >= (this.config.cacheSize || 1000)) {
1209
+ // Remove oldest entries (simple LRU)
1210
+ const firstKey = cache.keys().next().value;
1211
+ if (firstKey)
1212
+ cache.delete(firstKey);
1213
+ }
1214
+ cache.set(key, result);
1215
+ }
1216
+ _trackPerformance(operation, startTime, itemCount, algorithm) {
1217
+ if (!this.config.performanceTracking)
1218
+ return;
1219
+ const metrics = {
1220
+ executionTime: performance.now() - startTime,
1221
+ memoryUsed: 0, // Would implement actual memory tracking
1222
+ itemsProcessed: itemCount,
1223
+ cacheHits: 0, // Would track actual cache hits
1224
+ cacheMisses: 0, // Would track actual cache misses
1225
+ algorithm
1226
+ };
1227
+ if (!this.performanceMetrics.has(operation)) {
1228
+ this.performanceMetrics.set(operation, []);
1229
+ }
1230
+ this.performanceMetrics.get(operation).push(metrics);
1231
+ }
1232
+ _createPerformanceMetrics(startTime, itemCount, algorithm) {
1233
+ return {
1234
+ executionTime: performance.now() - startTime,
1235
+ memoryUsed: 0,
1236
+ itemsProcessed: itemCount,
1237
+ cacheHits: 0,
1238
+ cacheMisses: 0,
1239
+ algorithm
1240
+ };
1241
+ }
1242
+ _initializeCleanupTimer() {
1243
+ // Periodically clean up caches to prevent memory leaks
1244
+ setInterval(() => {
1245
+ if (this.similarityCache.size > (this.config.cacheSize || 1000)) {
1246
+ this.similarityCache.clear();
1247
+ }
1248
+ if (this.clusterCache.size > (this.config.cacheSize || 1000)) {
1249
+ this.clusterCache.clear();
1250
+ }
1251
+ if (this.hierarchyCache.size > (this.config.cacheSize || 1000)) {
1252
+ this.hierarchyCache.clear();
1253
+ }
1254
+ if (this.neighborsCache.size > (this.config.cacheSize || 1000)) {
1255
+ this.neighborsCache.clear();
1256
+ }
1257
+ }, 300000); // Clean every 5 minutes
1258
+ }
1259
+ // ===== GRAPH COMMUNITY DETECTION UTILITIES =====
1260
+ /**
1261
+ * Build graph structure from existing verb relationships
1262
+ */
1263
+ async _buildGraphFromVerbs(itemIds, options) {
1264
+ const nodes = new Set(itemIds);
1265
+ const edges = new Map();
1266
+ const verbWeights = new Map();
1267
+ // Initialize verb relationship weights
1268
+ const relationshipWeights = {
1269
+ 'creates': 1.0,
1270
+ 'partOf': 0.9,
1271
+ 'contains': 0.9,
1272
+ 'relatedTo': 0.7,
1273
+ 'references': 0.6,
1274
+ 'causes': 0.8,
1275
+ 'dependsOn': 0.8,
1276
+ 'memberOf': 0.9,
1277
+ 'worksWith': 0.7,
1278
+ 'communicates': 0.6
1279
+ };
1280
+ // Get all verbs connecting the items
1281
+ for (const sourceId of itemIds) {
1282
+ const sourceVerbs = await this.brain.getRelations(sourceId);
1283
+ for (const verb of sourceVerbs) {
1284
+ const targetId = verb.target;
1285
+ if (nodes.has(targetId) && sourceId !== targetId) {
1286
+ // Initialize edge map if needed
1287
+ if (!edges.has(sourceId)) {
1288
+ edges.set(sourceId, new Map());
1289
+ }
1290
+ // Calculate edge weight from verb type and metadata
1291
+ const verbType = verb.verb;
1292
+ const baseWeight = relationshipWeights[verbType] || 0.5;
1293
+ const confidenceWeight = verb.confidence || 1.0;
1294
+ const weight = baseWeight * confidenceWeight;
1295
+ // Add or strengthen edge
1296
+ const currentWeight = edges.get(sourceId)?.get(targetId) || 0;
1297
+ edges.get(sourceId).set(targetId, Math.min(currentWeight + weight, 1.0));
1298
+ // Make graph undirected by adding reverse edge
1299
+ if (!edges.has(targetId)) {
1300
+ edges.set(targetId, new Map());
1301
+ }
1302
+ const reverseWeight = edges.get(targetId)?.get(sourceId) || 0;
1303
+ edges.get(targetId).set(sourceId, Math.min(reverseWeight + weight, 1.0));
1304
+ }
1305
+ }
1306
+ }
1307
+ return {
1308
+ nodes: Array.from(nodes),
1309
+ edges,
1310
+ nodeCount: nodes.size,
1311
+ edgeCount: Array.from(edges.values()).reduce((sum, edgeMap) => sum + edgeMap.size, 0) / 2 // Undirected
1312
+ };
1313
+ }
1314
+ /**
1315
+ * Detect communities using Louvain modularity optimization
1316
+ */
1317
+ async _detectCommunities(graph, options) {
1318
+ const { nodes, edges } = graph;
1319
+ // Initialize each node as its own community
1320
+ const communities = new Map();
1321
+ nodes.forEach((node, index) => communities.set(node, index));
1322
+ const totalWeight = this._calculateTotalWeight(edges);
1323
+ let improved = true;
1324
+ let iteration = 0;
1325
+ const maxIterations = 50;
1326
+ // Louvain algorithm: iteratively move nodes to communities that maximize modularity
1327
+ while (improved && iteration < maxIterations) {
1328
+ improved = false;
1329
+ iteration++;
1330
+ for (const node of nodes) {
1331
+ const currentCommunity = communities.get(node);
1332
+ let bestCommunity = currentCommunity;
1333
+ let bestGain = 0;
1334
+ // Consider neighboring communities
1335
+ const neighborCommunities = this._getNeighborCommunities(node, edges, communities);
1336
+ for (const neighborCommunity of neighborCommunities) {
1337
+ if (neighborCommunity !== currentCommunity) {
1338
+ const gain = this._calculateModularityGain(node, currentCommunity, neighborCommunity, edges, communities, totalWeight);
1339
+ if (gain > bestGain) {
1340
+ bestGain = gain;
1341
+ bestCommunity = neighborCommunity;
1342
+ }
1343
+ }
1344
+ }
1345
+ // Move node if beneficial
1346
+ if (bestCommunity !== currentCommunity) {
1347
+ communities.set(node, bestCommunity);
1348
+ improved = true;
1349
+ }
1350
+ }
1351
+ }
1352
+ // Group nodes by final community assignment
1353
+ const communityGroups = new Map();
1354
+ for (const [node, communityId] of communities) {
1355
+ if (!communityGroups.has(communityId)) {
1356
+ communityGroups.set(communityId, []);
1357
+ }
1358
+ communityGroups.get(communityId).push(node);
1359
+ }
1360
+ // Convert to Community objects with metadata
1361
+ const result = [];
1362
+ for (const [communityId, members] of communityGroups) {
1363
+ if (members.length >= (options.minClusterSize || 2)) {
1364
+ const modularity = this._calculateCommunityModularity(members, edges, totalWeight);
1365
+ const density = this._calculateCommunityDensity(members, edges);
1366
+ const strongestConnections = this._findStrongestConnections(members, edges, 3);
1367
+ result.push({
1368
+ id: communityId,
1369
+ members,
1370
+ modularity,
1371
+ density,
1372
+ strongestConnections
1373
+ });
1374
+ }
1375
+ }
1376
+ return result;
1377
+ }
1378
+ /**
1379
+ * Refine community boundaries using vector similarity
1380
+ */
1381
+ async _refineCommunitiesWithVectors(communities, options) {
1382
+ const refined = [];
1383
+ for (const community of communities) {
1384
+ const membersWithVectors = await this._getItemsWithVectors(community.members);
1385
+ // Check if community is coherent in vector space
1386
+ const vectorCoherence = await this._calculateVectorCoherence(membersWithVectors);
1387
+ if (vectorCoherence > 0.3) {
1388
+ // Community is coherent, keep as is
1389
+ refined.push(community);
1390
+ }
1391
+ else {
1392
+ // Split community using vector-based sub-clustering
1393
+ const subClusters = await this._performHierarchicalClustering(community.members, { ...options, maxClusters: Math.ceil(community.members.length / 5) });
1394
+ // Convert sub-clusters to communities
1395
+ for (let i = 0; i < subClusters.clusters.length; i++) {
1396
+ const subCluster = subClusters.clusters[i];
1397
+ refined.push({
1398
+ id: community.id * 1000 + i, // Unique sub-community ID
1399
+ members: subCluster.members,
1400
+ modularity: community.modularity * 0.8, // Slightly lower modularity for sub-communities
1401
+ density: community.density,
1402
+ strongestConnections: []
1403
+ });
1404
+ }
1405
+ }
1406
+ }
1407
+ return refined;
1408
+ }
1409
+ // ===== SEMANTIC CLUSTERING UTILITIES =====
1410
+ /**
1411
+ * Get items with their metadata including noun types
1412
+ */
1413
+ async _getItemsWithMetadata(itemIds) {
1414
+ const items = await Promise.all(itemIds.map(async (id) => {
1415
+ const noun = await this.brain.get(id);
1416
+ if (!noun) {
1417
+ return null;
1418
+ }
1419
+ return {
1420
+ id,
1421
+ vector: noun.vector || [],
1422
+ metadata: noun.metadata || {},
1423
+ nounType: noun.metadata?.noun || noun.metadata?.nounType || 'content',
1424
+ label: noun.metadata?.label || noun.metadata?.data || id,
1425
+ data: noun.metadata
1426
+ };
1427
+ }));
1428
+ return items.filter((item) => item !== null);
1429
+ }
1430
+ /**
1431
+ * Group items by their semantic noun types
1432
+ */
1433
+ _groupBySemanticType(items) {
1434
+ const groups = new Map();
1435
+ for (const item of items) {
1436
+ const type = item.nounType;
1437
+ if (!groups.has(type)) {
1438
+ groups.set(type, []);
1439
+ }
1440
+ groups.get(type).push(item);
1441
+ }
1442
+ return groups;
1443
+ }
1444
+ // Placeholder implementations for complex operations
1445
+ async _getAllItemIds() {
1446
+ // Get all noun IDs from the brain
1447
+ // Get total item count using find with empty query
1448
+ const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1449
+ const stats = { totalNouns: allItems.length || 0 };
1450
+ if (!stats.totalNouns || stats.totalNouns === 0) {
1451
+ return [];
1452
+ }
1453
+ // Get nouns with pagination (limit to 10000 for performance)
1454
+ const limit = Math.min(stats.totalNouns, 10000);
1455
+ const result = await this.brain.find({
1456
+ query: '',
1457
+ limit
1458
+ });
1459
+ return result.map((item) => item.id).filter((id) => id);
1460
+ }
1461
+ async _getTotalItemCount() {
1462
+ // Get total item count using find with empty query
1463
+ const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1464
+ const stats = { totalNouns: allItems.length || 0 };
1465
+ return stats.totalNouns || 0;
1466
+ }
1467
+ // ===== GRAPH ALGORITHM SUPPORTING METHODS =====
1468
+ _calculateTotalWeight(edges) {
1469
+ let total = 0;
1470
+ for (const edgeMap of edges.values()) {
1471
+ for (const weight of edgeMap.values()) {
1472
+ total += weight;
1473
+ }
1474
+ }
1475
+ return total / 2; // Undirected graph, so divide by 2
1476
+ }
1477
+ _getNeighborCommunities(node, edges, communities) {
1478
+ const neighborCommunities = new Set();
1479
+ const nodeEdges = edges.get(node);
1480
+ if (nodeEdges) {
1481
+ for (const neighbor of nodeEdges.keys()) {
1482
+ const neighborCommunity = communities.get(neighbor);
1483
+ if (neighborCommunity !== undefined) {
1484
+ neighborCommunities.add(neighborCommunity);
1485
+ }
1486
+ }
1487
+ }
1488
+ return neighborCommunities;
1489
+ }
1490
+ _calculateModularityGain(node, oldCommunity, newCommunity, edges, communities, totalWeight) {
1491
+ // Calculate the degree of the node
1492
+ const nodeDegree = this._getNodeDegree(node, edges);
1493
+ // Calculate edges to old and new communities
1494
+ const edgesToOld = this._getEdgesToCommunity(node, oldCommunity, edges, communities);
1495
+ const edgesToNew = this._getEdgesToCommunity(node, newCommunity, edges, communities);
1496
+ // Calculate community weights
1497
+ const oldCommunityWeight = this._getCommunityWeight(oldCommunity, edges, communities);
1498
+ const newCommunityWeight = this._getCommunityWeight(newCommunity, edges, communities);
1499
+ // Modularity gain calculation (simplified)
1500
+ const oldContrib = edgesToOld - (nodeDegree * oldCommunityWeight) / (2 * totalWeight);
1501
+ const newContrib = edgesToNew - (nodeDegree * newCommunityWeight) / (2 * totalWeight);
1502
+ return newContrib - oldContrib;
1503
+ }
1504
+ _getNodeDegree(node, edges) {
1505
+ const nodeEdges = edges.get(node);
1506
+ if (!nodeEdges)
1507
+ return 0;
1508
+ return Array.from(nodeEdges.values()).reduce((sum, weight) => sum + weight, 0);
1509
+ }
1510
+ _getEdgesToCommunity(node, community, edges, communities) {
1511
+ const nodeEdges = edges.get(node);
1512
+ if (!nodeEdges)
1513
+ return 0;
1514
+ let total = 0;
1515
+ for (const [neighbor, weight] of nodeEdges) {
1516
+ if (communities.get(neighbor) === community) {
1517
+ total += weight;
1518
+ }
1519
+ }
1520
+ return total;
1521
+ }
1522
+ _getCommunityWeight(community, edges, communities) {
1523
+ let total = 0;
1524
+ for (const [node, nodeCommunity] of communities) {
1525
+ if (nodeCommunity === community) {
1526
+ total += this._getNodeDegree(node, edges);
1527
+ }
1528
+ }
1529
+ return total;
1530
+ }
1531
+ _calculateCommunityModularity(members, edges, totalWeight) {
1532
+ if (members.length < 2)
1533
+ return 0;
1534
+ let internalWeight = 0;
1535
+ let totalDegree = 0;
1536
+ for (const member of members) {
1537
+ const memberEdges = edges.get(member);
1538
+ if (memberEdges) {
1539
+ totalDegree += Array.from(memberEdges.values()).reduce((sum, w) => sum + w, 0);
1540
+ // Count internal edges
1541
+ for (const [neighbor, weight] of memberEdges) {
1542
+ if (members.includes(neighbor)) {
1543
+ internalWeight += weight;
1544
+ }
1545
+ }
1546
+ }
1547
+ }
1548
+ internalWeight /= 2; // Undirected graph
1549
+ const expectedInternal = (totalDegree * totalDegree) / (4 * totalWeight);
1550
+ return (internalWeight / totalWeight) - expectedInternal / totalWeight;
1551
+ }
1552
+ _calculateCommunityDensity(members, edges) {
1553
+ if (members.length < 2)
1554
+ return 0;
1555
+ let actualEdges = 0;
1556
+ const maxPossibleEdges = (members.length * (members.length - 1)) / 2;
1557
+ for (const member of members) {
1558
+ const memberEdges = edges.get(member);
1559
+ if (memberEdges) {
1560
+ for (const neighbor of memberEdges.keys()) {
1561
+ if (members.includes(neighbor) && member < neighbor) { // Avoid double counting
1562
+ actualEdges++;
1563
+ }
1564
+ }
1565
+ }
1566
+ }
1567
+ return actualEdges / maxPossibleEdges;
1568
+ }
1569
+ _findStrongestConnections(members, edges, limit) {
1570
+ const connections = [];
1571
+ for (const member of members) {
1572
+ const memberEdges = edges.get(member);
1573
+ if (memberEdges) {
1574
+ for (const [neighbor, weight] of memberEdges) {
1575
+ if (members.includes(neighbor) && member < neighbor) { // Avoid duplicates
1576
+ connections.push({ from: member, to: neighbor, weight });
1577
+ }
1578
+ }
1579
+ }
1580
+ }
1581
+ return connections
1582
+ .sort((a, b) => b.weight - a.weight)
1583
+ .slice(0, limit);
1584
+ }
1585
+ // ===== K-MEANS UTILITIES =====
1586
+ /**
1587
+ * Get items with their vector representations
1588
+ */
1589
+ async _getItemsWithVectors(itemIds) {
1590
+ const items = await Promise.all(itemIds.map(async (id) => {
1591
+ const noun = await this.brain.get(id);
1592
+ return {
1593
+ id,
1594
+ vector: noun?.vector || []
1595
+ };
1596
+ }));
1597
+ return items.filter((item) => item !== null && item.vector.length > 0);
1598
+ }
1599
+ /**
1600
+ * Calculate centroid from items using existing distance functions
1601
+ */
1602
+ async _calculateCentroidFromItems(items) {
1603
+ if (items.length === 0)
1604
+ return [];
1605
+ if (items.length === 1)
1606
+ return [...items[0].vector];
1607
+ const dimensions = items[0].vector.length;
1608
+ const centroid = new Array(dimensions).fill(0);
1609
+ for (const item of items) {
1610
+ for (let i = 0; i < dimensions; i++) {
1611
+ centroid[i] += item.vector[i];
1612
+ }
1613
+ }
1614
+ for (let i = 0; i < dimensions; i++) {
1615
+ centroid[i] /= items.length;
1616
+ }
1617
+ return centroid;
1618
+ }
1619
+ /**
1620
+ * Initialize centroids using k-means++ algorithm for better convergence
1621
+ */
1622
+ async _initializeCentroidsKMeansPlusPlus(items, k) {
1623
+ const centroids = [];
1624
+ // Choose first centroid randomly
1625
+ const firstIdx = Math.floor(Math.random() * items.length);
1626
+ centroids.push([...items[firstIdx].vector]);
1627
+ // Choose remaining centroids using k-means++ probability
1628
+ for (let i = 1; i < k; i++) {
1629
+ const distances = items.map(item => {
1630
+ // Find distance to closest existing centroid
1631
+ let minDist = Infinity;
1632
+ for (const centroid of centroids) {
1633
+ const dist = this._calculateSquaredDistance(item.vector, centroid);
1634
+ minDist = Math.min(minDist, dist);
1635
+ }
1636
+ return minDist;
1637
+ });
1638
+ // Choose next centroid with probability proportional to squared distance
1639
+ const totalDistance = distances.reduce((sum, d) => sum + d, 0);
1640
+ const target = Math.random() * totalDistance;
1641
+ let cumulative = 0;
1642
+ for (let j = 0; j < distances.length; j++) {
1643
+ cumulative += distances[j];
1644
+ if (cumulative >= target) {
1645
+ centroids.push([...items[j].vector]);
1646
+ break;
1647
+ }
1648
+ }
1649
+ }
1650
+ return centroids;
1651
+ }
1652
+ /**
1653
+ * Assign points to nearest centroids using existing distance functions
1654
+ */
1655
+ async _assignPointsToCentroids(items, centroids) {
1656
+ const assignments = [];
1657
+ for (const item of items) {
1658
+ let bestCentroid = 0;
1659
+ let minDistance = Infinity;
1660
+ for (let i = 0; i < centroids.length; i++) {
1661
+ const distance = this._calculateSquaredDistance(item.vector, centroids[i]);
1662
+ if (distance < minDistance) {
1663
+ minDistance = distance;
1664
+ bestCentroid = i;
1665
+ }
1666
+ }
1667
+ assignments.push(bestCentroid);
1668
+ }
1669
+ return assignments;
1670
+ }
1671
+ /**
1672
+ * Update centroids based on current assignments
1673
+ */
1674
+ async _updateCentroids(items, assignments, k) {
1675
+ const newCentroids = [];
1676
+ for (let i = 0; i < k; i++) {
1677
+ const clusterItems = items.filter((_, idx) => assignments[idx] === i);
1678
+ if (clusterItems.length > 0) {
1679
+ newCentroids.push(await this._calculateCentroidFromItems(clusterItems));
1680
+ }
1681
+ else {
1682
+ // Keep old centroid if no items assigned
1683
+ newCentroids.push(new Array(items[0].vector.length).fill(0));
1684
+ }
1685
+ }
1686
+ return newCentroids;
1687
+ }
1688
+ /**
1689
+ * Calculate how much assignments have changed between iterations
1690
+ */
1691
+ _calculateAssignmentChangeRate(oldAssignments, newAssignments) {
1692
+ if (oldAssignments.length !== newAssignments.length)
1693
+ return 1.0;
1694
+ let changes = 0;
1695
+ for (let i = 0; i < oldAssignments.length; i++) {
1696
+ if (oldAssignments[i] !== newAssignments[i]) {
1697
+ changes++;
1698
+ }
1699
+ }
1700
+ return changes / oldAssignments.length;
1701
+ }
1702
+ /**
1703
+ * Calculate cluster confidence for k-means clusters
1704
+ */
1705
+ async _calculateKMeansClusterConfidence(clusterItems, centroid) {
1706
+ if (clusterItems.length <= 1)
1707
+ return 1.0;
1708
+ // Calculate average distance to centroid
1709
+ const distances = clusterItems.map(item => this._calculateSquaredDistance(item.vector, centroid));
1710
+ const avgDistance = distances.reduce((sum, d) => sum + d, 0) / distances.length;
1711
+ // Calculate standard deviation
1712
+ const variance = distances.reduce((sum, d) => sum + Math.pow(d - avgDistance, 2), 0) / distances.length;
1713
+ const stdDev = Math.sqrt(variance);
1714
+ // Higher confidence for tighter clusters
1715
+ const tightness = avgDistance > 0 ? Math.max(0, 1 - (stdDev / avgDistance)) : 1.0;
1716
+ return Math.min(1.0, tightness);
1717
+ }
1718
+ // ===== DBSCAN UTILITIES =====
1719
+ /**
1720
+ * Estimate optimal eps parameter using k-nearest neighbor distances
1721
+ */
1722
+ async _estimateOptimalEps(items, minPts) {
1723
+ if (items.length < minPts)
1724
+ return 0.5;
1725
+ // Calculate k-nearest neighbor distances for each point
1726
+ const kDistances = [];
1727
+ for (const item of items) {
1728
+ const distances = [];
1729
+ for (const otherItem of items) {
1730
+ if (item.id !== otherItem.id) {
1731
+ const distance = Math.sqrt(this._calculateSquaredDistance(item.vector, otherItem.vector));
1732
+ distances.push(distance);
1733
+ }
1734
+ }
1735
+ distances.sort((a, b) => a - b);
1736
+ // Get k-th nearest neighbor distance (minPts-1 because we exclude self)
1737
+ const kthDistance = distances[Math.min(minPts - 1, distances.length - 1)];
1738
+ kDistances.push(kthDistance);
1739
+ }
1740
+ kDistances.sort((a, b) => a - b);
1741
+ // Use knee point detection - find point with maximum curvature
1742
+ // Simplified approach: use 90th percentile of k-distances
1743
+ const percentileIndex = Math.floor(kDistances.length * 0.9);
1744
+ return kDistances[percentileIndex] || 0.5;
1745
+ }
1746
+ /**
1747
+ * Find neighbors within epsilon distance using efficient vector operations
1748
+ */
1749
+ async _findNeighborsWithinEps(item, allItems, eps) {
1750
+ const neighbors = [];
1751
+ const epsSquared = eps * eps;
1752
+ for (const otherItem of allItems) {
1753
+ if (item.id !== otherItem.id) {
1754
+ const distanceSquared = this._calculateSquaredDistance(item.vector, otherItem.vector);
1755
+ if (distanceSquared <= epsSquared) {
1756
+ neighbors.push(otherItem);
1757
+ }
1758
+ }
1759
+ }
1760
+ return neighbors;
1761
+ }
1762
+ /**
1763
+ * Expand DBSCAN cluster by adding density-reachable points
1764
+ */
1765
+ async _expandCluster(seedPoint, neighbors, clusterId, eps, minPts, allItems, visited, clusterAssignments) {
1766
+ clusterAssignments.set(seedPoint.id, clusterId);
1767
+ let i = 0;
1768
+ while (i < neighbors.length) {
1769
+ const neighbor = neighbors[i];
1770
+ if (!visited.get(neighbor.id)) {
1771
+ visited.set(neighbor.id, true);
1772
+ const neighborNeighbors = await this._findNeighborsWithinEps(neighbor, allItems, eps);
1773
+ if (neighborNeighbors.length >= minPts) {
1774
+ // Add new neighbors to the list (union operation)
1775
+ for (const newNeighbor of neighborNeighbors) {
1776
+ if (!neighbors.some(n => n.id === newNeighbor.id)) {
1777
+ neighbors.push(newNeighbor);
1778
+ }
1779
+ }
1780
+ }
1781
+ }
1782
+ // If neighbor is not assigned to any cluster, assign to current cluster
1783
+ if (!clusterAssignments.has(neighbor.id)) {
1784
+ clusterAssignments.set(neighbor.id, clusterId);
1785
+ }
1786
+ i++;
1787
+ }
1788
+ }
1789
+ /**
1790
+ * Calculate DBSCAN cluster confidence based on density
1791
+ */
1792
+ async _calculateDBSCANClusterConfidence(clusterItems, eps) {
1793
+ if (clusterItems.length <= 1)
1794
+ return 1.0;
1795
+ // Calculate average density within the cluster
1796
+ let totalNeighborCount = 0;
1797
+ const epsSquared = eps * eps;
1798
+ for (const item of clusterItems) {
1799
+ let neighborCount = 0;
1800
+ for (const otherItem of clusterItems) {
1801
+ if (item !== otherItem) {
1802
+ const distanceSquared = this._calculateSquaredDistance(item.vector, otherItem.vector);
1803
+ if (distanceSquared <= epsSquared) {
1804
+ neighborCount++;
1805
+ }
1806
+ }
1807
+ }
1808
+ totalNeighborCount += neighborCount;
1809
+ }
1810
+ const avgDensity = totalNeighborCount / clusterItems.length;
1811
+ const maxPossibleDensity = clusterItems.length - 1;
1812
+ return maxPossibleDensity > 0 ? avgDensity / maxPossibleDensity : 1.0;
1813
+ }
1814
+ // ===== VECTOR UTILITIES =====
1815
+ /**
1816
+ * Calculate squared Euclidean distance (more efficient than sqrt)
1817
+ */
1818
+ _calculateSquaredDistance(vec1, vec2) {
1819
+ if (vec1.length !== vec2.length)
1820
+ return Infinity;
1821
+ let sum = 0;
1822
+ for (let i = 0; i < vec1.length; i++) {
1823
+ const diff = vec1[i] - vec2[i];
1824
+ sum += diff * diff;
1825
+ }
1826
+ return sum;
1827
+ }
1828
+ /**
1829
+ * Calculate vector coherence for community refinement
1830
+ */
1831
+ async _calculateVectorCoherence(items) {
1832
+ if (items.length <= 1)
1833
+ return 1.0;
1834
+ const centroid = await this._calculateCentroidFromItems(items);
1835
+ // Calculate average distance to centroid
1836
+ const distances = items.map(item => Math.sqrt(this._calculateSquaredDistance(item.vector, centroid)));
1837
+ const avgDistance = distances.reduce((sum, d) => sum + d, 0) / distances.length;
1838
+ // Calculate cohesion as inverse of average distance (normalized)
1839
+ const maxDistance = Math.sqrt(centroid.length); // Rough normalization
1840
+ return Math.max(0, 1 - (avgDistance / maxDistance));
1841
+ }
1842
+ async _getItemsByField(field) {
1843
+ // Implementation would query items by metadata field
1844
+ return [];
1845
+ }
1846
+ // ===== TRIPLE INTELLIGENCE INTEGRATION =====
1847
+ /**
1848
+ * Generate intelligent cluster labels using Triple Intelligence
1849
+ */
1850
+ async _generateIntelligentClusterLabel(members, algorithm) {
1851
+ if (members.length === 0)
1852
+ return `${algorithm}-cluster`;
1853
+ // Use simple labeling - Triple Intelligence doesn't generate labels from prompts
1854
+ return this._generateClusterLabel(members, algorithm);
1855
+ }
1856
+ /**
1857
+ * Generate simple cluster labels based on semantic analysis
1858
+ */
1859
+ async _generateClusterLabel(members, algorithm) {
1860
+ if (members.length === 0)
1861
+ return `${algorithm}-cluster`;
1862
+ // Analyze member types and create descriptive label
1863
+ const typeCount = new Map();
1864
+ for (const member of members) {
1865
+ const type = member.nounType || 'unknown';
1866
+ typeCount.set(type, (typeCount.get(type) || 0) + 1);
1867
+ }
1868
+ // Find most common type
1869
+ let dominantType = 'mixed';
1870
+ let maxCount = 0;
1871
+ for (const [type, count] of typeCount) {
1872
+ if (count > maxCount) {
1873
+ maxCount = count;
1874
+ dominantType = type;
1875
+ }
1876
+ }
1877
+ // Generate label based on dominant type and size
1878
+ const size = members.length;
1879
+ const typePercent = Math.round((maxCount / size) * 100);
1880
+ if (typePercent >= 80) {
1881
+ return `${dominantType} group (${size})`;
1882
+ }
1883
+ else if (typePercent >= 60) {
1884
+ return `mostly ${dominantType} (${size})`;
1885
+ }
1886
+ else {
1887
+ const topTypes = Array.from(typeCount.entries())
1888
+ .sort((a, b) => b[1] - a[1])
1889
+ .slice(0, 2)
1890
+ .map(([type]) => type)
1891
+ .join(' & ');
1892
+ return `${topTypes} cluster (${size})`;
1893
+ }
1894
+ }
1895
+ /**
1896
+ * Fuse clustering results using Triple Intelligence consensus
1897
+ */
1898
+ async _fuseClusteringResultsWithTripleIntelligence(clusterSets, options) {
1899
+ if (clusterSets.length === 0)
1900
+ return [];
1901
+ if (clusterSets.length === 1)
1902
+ return clusterSets[0];
1903
+ // Simple weighted fusion if Triple Intelligence is not available
1904
+ const [vectorClusters, graphClusters, semanticClusters] = clusterSets;
1905
+ // Create consensus mapping of items to clusters
1906
+ const itemClusterMapping = new Map();
1907
+ // Collect all cluster assignments
1908
+ const allAlgorithms = ['vector', 'graph', 'semantic'];
1909
+ const algorithmClusters = [vectorClusters, graphClusters, semanticClusters];
1910
+ for (let i = 0; i < algorithmClusters.length; i++) {
1911
+ const algorithm = allAlgorithms[i];
1912
+ const clusters = algorithmClusters[i] || [];
1913
+ for (const cluster of clusters) {
1914
+ for (const memberId of cluster.members) {
1915
+ if (!itemClusterMapping.has(memberId)) {
1916
+ itemClusterMapping.set(memberId, []);
1917
+ }
1918
+ itemClusterMapping.get(memberId).push({
1919
+ algorithm,
1920
+ clusterId: cluster.id,
1921
+ confidence: cluster.confidence
1922
+ });
1923
+ }
1924
+ }
1925
+ }
1926
+ // Find consensus clusters - items that appear together in multiple algorithms
1927
+ const consensusClusters = new Map();
1928
+ const processedItems = new Set();
1929
+ for (const [itemId, assignments] of itemClusterMapping) {
1930
+ if (processedItems.has(itemId))
1931
+ continue;
1932
+ // Find all items that consistently cluster with this item
1933
+ const consensusGroup = new Set([itemId]);
1934
+ // Look for items that share clusters with this item across algorithms
1935
+ for (const assignment of assignments) {
1936
+ const sameClusterItems = this._getItemsInCluster(assignment.clusterId, clusterSets);
1937
+ for (const otherItem of sameClusterItems) {
1938
+ if (!processedItems.has(otherItem) && otherItem !== itemId) {
1939
+ const otherAssignments = itemClusterMapping.get(otherItem) || [];
1940
+ // Check if items co-occur in multiple algorithms
1941
+ const coOccurrences = this._countCoOccurrences(assignments, otherAssignments);
1942
+ if (coOccurrences >= 2) { // Must appear together in at least 2 algorithms
1943
+ consensusGroup.add(otherItem);
1944
+ }
1945
+ }
1946
+ }
1947
+ }
1948
+ // Mark all items in this consensus group as processed
1949
+ for (const groupItem of consensusGroup) {
1950
+ processedItems.add(groupItem);
1951
+ }
1952
+ if (consensusGroup.size >= (options.minClusterSize || 2)) {
1953
+ const consensusId = `fusion-${consensusClusters.size}`;
1954
+ consensusClusters.set(consensusId, consensusGroup);
1955
+ }
1956
+ }
1957
+ // Convert consensus groups to SemanticCluster objects
1958
+ const fusedClusters = [];
1959
+ for (const [clusterId, memberSet] of consensusClusters) {
1960
+ const members = Array.from(memberSet);
1961
+ const membersWithMetadata = await this._getItemsWithMetadata(members);
1962
+ if (membersWithMetadata.length > 0) {
1963
+ const centroid = await this._calculateCentroidFromItems(membersWithMetadata);
1964
+ const label = await this._generateIntelligentClusterLabel(membersWithMetadata, 'multimodal');
1965
+ // Calculate fusion confidence based on algorithm agreement
1966
+ const avgConfidence = this._calculateFusionConfidence(members, itemClusterMapping);
1967
+ fusedClusters.push({
1968
+ id: clusterId,
1969
+ centroid,
1970
+ members,
1971
+ size: members.length,
1972
+ confidence: avgConfidence,
1973
+ label,
1974
+ metadata: {
1975
+ clustering: 'multimodal_fusion',
1976
+ algorithms: allAlgorithms,
1977
+ fusionMethod: 'consensus',
1978
+ agreementLevel: avgConfidence
1979
+ }
1980
+ });
1981
+ }
1982
+ }
1983
+ return fusedClusters;
1984
+ }
1985
+ /**
1986
+ * Get items in a specific cluster from cluster sets
1987
+ */
1988
+ _getItemsInCluster(clusterId, clusterSets) {
1989
+ for (const clusterSet of clusterSets) {
1990
+ for (const cluster of clusterSet) {
1991
+ if (cluster.id === clusterId) {
1992
+ return cluster.members;
1993
+ }
1994
+ }
1995
+ }
1996
+ return [];
1997
+ }
1998
+ /**
1999
+ * Count co-occurrences between two sets of assignments
2000
+ */
2001
+ _countCoOccurrences(assignments1, assignments2) {
2002
+ let count = 0;
2003
+ for (const assignment1 of assignments1) {
2004
+ for (const assignment2 of assignments2) {
2005
+ if (assignment1.algorithm === assignment2.algorithm &&
2006
+ assignment1.clusterId === assignment2.clusterId) {
2007
+ count++;
2008
+ }
2009
+ }
2010
+ }
2011
+ return count;
2012
+ }
2013
+ /**
2014
+ * Calculate fusion confidence based on algorithm agreement
2015
+ */
2016
+ _calculateFusionConfidence(members, itemClusterMapping) {
2017
+ let totalConfidence = 0;
2018
+ let totalAssignments = 0;
2019
+ for (const member of members) {
2020
+ const assignments = itemClusterMapping.get(member) || [];
2021
+ for (const assignment of assignments) {
2022
+ totalConfidence += assignment.confidence;
2023
+ totalAssignments++;
2024
+ }
2025
+ }
2026
+ return totalAssignments > 0 ? totalConfidence / totalAssignments : 0.5;
2027
+ }
2028
+ // ===== ADDITIONAL UTILITIES =====
2029
+ /**
2030
+ * Generate empty clustering result for edge cases
2031
+ */
2032
+ _createEmptyResult(startTime, algorithm) {
2033
+ return {
2034
+ clusters: [],
2035
+ metrics: this._createPerformanceMetrics(startTime, 0, algorithm),
2036
+ metadata: {
2037
+ totalItems: 0,
2038
+ clustersFound: 0,
2039
+ averageClusterSize: 0,
2040
+ timestamp: new Date()
2041
+ }
2042
+ };
2043
+ }
2044
+ // ===== SAMPLING AND PROJECTION UTILITIES =====
2045
+ /**
2046
+ * Get sample using specified strategy for large dataset clustering
2047
+ */
2048
+ async _getSampleUsingStrategy(itemIds, sampleSize, strategy) {
2049
+ if (itemIds.length <= sampleSize)
2050
+ return itemIds;
2051
+ switch (strategy) {
2052
+ case 'random':
2053
+ return this._getRandomSample(itemIds, sampleSize);
2054
+ case 'diverse':
2055
+ return await this._getDiverseSample(itemIds, sampleSize);
2056
+ case 'recent':
2057
+ return await this._getRecentSample(itemIds, sampleSize);
2058
+ case 'important':
2059
+ return await this._getImportantSample(itemIds, sampleSize);
2060
+ default:
2061
+ return this._getRandomSample(itemIds, sampleSize);
2062
+ }
2063
+ }
2064
+ /**
2065
+ * Random sampling
2066
+ */
2067
+ _getRandomSample(itemIds, sampleSize) {
2068
+ const shuffled = [...itemIds].sort(() => Math.random() - 0.5);
2069
+ return shuffled.slice(0, sampleSize);
2070
+ }
2071
+ /**
2072
+ * Diverse sampling using vector space distribution
2073
+ */
2074
+ async _getDiverseSample(itemIds, sampleSize) {
2075
+ // Get vectors for all items
2076
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
2077
+ if (itemsWithVectors.length <= sampleSize) {
2078
+ return itemIds;
2079
+ }
2080
+ // Use k-means++ style selection for diversity
2081
+ const sample = [];
2082
+ // Select first item randomly
2083
+ let remainingItems = [...itemsWithVectors];
2084
+ const firstIdx = Math.floor(Math.random() * remainingItems.length);
2085
+ sample.push(remainingItems[firstIdx].id);
2086
+ remainingItems.splice(firstIdx, 1);
2087
+ // Select remaining items based on maximum distance to already selected items
2088
+ while (sample.length < sampleSize && remainingItems.length > 0) {
2089
+ let maxDistance = -1;
2090
+ let bestIdx = 0;
2091
+ for (let i = 0; i < remainingItems.length; i++) {
2092
+ const item = remainingItems[i];
2093
+ // Find minimum distance to any selected item
2094
+ let minDistanceToSelected = Infinity;
2095
+ for (const selectedId of sample) {
2096
+ const selectedItem = itemsWithVectors.find(it => it.id === selectedId);
2097
+ if (selectedItem) {
2098
+ const distance = Math.sqrt(this._calculateSquaredDistance(item.vector, selectedItem.vector));
2099
+ minDistanceToSelected = Math.min(minDistanceToSelected, distance);
2100
+ }
2101
+ }
2102
+ // Select item with maximum minimum distance (most diverse)
2103
+ if (minDistanceToSelected > maxDistance) {
2104
+ maxDistance = minDistanceToSelected;
2105
+ bestIdx = i;
2106
+ }
2107
+ }
2108
+ sample.push(remainingItems[bestIdx].id);
2109
+ remainingItems.splice(bestIdx, 1);
2110
+ }
2111
+ return sample;
2112
+ }
2113
+ /**
2114
+ * Recent sampling based on creation time
2115
+ */
2116
+ async _getRecentSample(itemIds, sampleSize) {
2117
+ const items = await Promise.all(itemIds.map(async (id) => {
2118
+ const noun = await this.brain.get(id);
2119
+ return {
2120
+ id,
2121
+ createdAt: noun?.createdAt || new Date(0)
2122
+ };
2123
+ }));
2124
+ // Sort by creation time (most recent first)
2125
+ items.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
2126
+ return items.slice(0, sampleSize).map(item => item.id);
2127
+ }
2128
+ /**
2129
+ * Important sampling based on connection count and metadata
2130
+ */
2131
+ async _getImportantSample(itemIds, sampleSize) {
2132
+ const items = await Promise.all(itemIds.map(async (id) => {
2133
+ const verbs = await this.brain.getRelations(id);
2134
+ const noun = await this.brain.get(id);
2135
+ // Calculate importance score
2136
+ const connectionScore = verbs.length;
2137
+ const dataScore = noun?.data ? Object.keys(noun.data).length : 0;
2138
+ const importanceScore = connectionScore * 2 + dataScore;
2139
+ return {
2140
+ id,
2141
+ importance: importanceScore
2142
+ };
2143
+ }));
2144
+ // Sort by importance (highest first)
2145
+ items.sort((a, b) => b.importance - a.importance);
2146
+ return items.slice(0, sampleSize).map(item => item.id);
2147
+ }
2148
+ /**
2149
+ * Project clusters back to full dataset using HNSW neighbors
2150
+ */
2151
+ async _projectClustersToFullDataset(sampleClusters, fullItemIds, sampleIds) {
2152
+ const projectedClusters = [];
2153
+ // Create mapping of items not in sample
2154
+ const remainingItems = fullItemIds.filter(id => !sampleIds.includes(id));
2155
+ // For each sample cluster, find which remaining items should belong to it
2156
+ for (const sampleCluster of sampleClusters) {
2157
+ const projectedMembers = [...sampleCluster.members];
2158
+ // For each remaining item, find its nearest neighbors in the sample
2159
+ for (const itemId of remainingItems) {
2160
+ try {
2161
+ const neighbors = await this.brain.neural.neighbors(itemId, {
2162
+ limit: 3,
2163
+ includeMetadata: false
2164
+ });
2165
+ // Check if any of the nearest neighbors belong to this cluster
2166
+ let belongsToCluster = false;
2167
+ for (const neighbor of neighbors.neighbors) {
2168
+ if (sampleCluster.members.includes(neighbor.id) && neighbor.similarity > 0.7) {
2169
+ belongsToCluster = true;
2170
+ break;
2171
+ }
2172
+ }
2173
+ if (belongsToCluster) {
2174
+ projectedMembers.push(itemId);
2175
+ }
2176
+ }
2177
+ catch (error) {
2178
+ // Skip items that can't be processed
2179
+ continue;
2180
+ }
2181
+ }
2182
+ // Create projected cluster
2183
+ if (projectedMembers.length > 0) {
2184
+ const membersWithVectors = await this._getItemsWithVectors(projectedMembers);
2185
+ projectedClusters.push({
2186
+ ...sampleCluster,
2187
+ id: `projected-${sampleCluster.id}`,
2188
+ members: projectedMembers,
2189
+ size: projectedMembers.length,
2190
+ centroid: await this._calculateCentroidFromItems(membersWithVectors),
2191
+ confidence: sampleCluster.confidence * 0.9, // Slightly lower confidence for projection
2192
+ metadata: {
2193
+ ...sampleCluster.metadata,
2194
+ isProjected: true,
2195
+ originalSampleSize: sampleCluster.size,
2196
+ projectedSize: projectedMembers.length
2197
+ }
2198
+ });
2199
+ }
2200
+ }
2201
+ return projectedClusters;
2202
+ }
2203
+ _groupByDomain(items, field) {
2204
+ const groups = new Map();
2205
+ for (const item of items) {
2206
+ const domain = item.metadata?.[field] || 'unknown';
2207
+ if (!groups.has(domain)) {
2208
+ groups.set(domain, []);
2209
+ }
2210
+ groups.get(domain).push(item);
2211
+ }
2212
+ return groups;
2213
+ }
2214
+ _calculateDomainConfidence(cluster, domainItems) {
2215
+ // Calculate how well this cluster represents the domain
2216
+ // Based on cluster density and coherence
2217
+ const density = cluster.members.length / (cluster.members.length + 10); // Normalize
2218
+ const coherence = cluster.cohesion || 0.5; // Use cluster's cohesion if available
2219
+ // Domain relevance: what fraction of cluster members are from this domain
2220
+ const domainMemberCount = cluster.members.filter(id => domainItems.some(item => item.id === id)).length;
2221
+ const domainRelevance = cluster.members.length > 0
2222
+ ? domainMemberCount / cluster.members.length
2223
+ : 0;
2224
+ return (density * 0.3 + coherence * 0.3 + domainRelevance * 0.4); // Weighted average
2225
+ }
2226
+ async _findCrossDomainMembers(cluster, threshold) {
2227
+ // Find members that might belong to multiple domains
2228
+ return [];
2229
+ }
2230
+ async _findCrossDomainClusters(clusters, threshold) {
2231
+ // Find clusters that span multiple domains
2232
+ return [];
2233
+ }
2234
+ async _getItemsByTimeWindow(timeField, window) {
2235
+ // Implementation would query items within time window
2236
+ return [];
2237
+ }
2238
+ async _calculateTemporalMetrics(cluster, items, timeField) {
2239
+ // Calculate temporal characteristics of the cluster
2240
+ return {
2241
+ trend: 'stable',
2242
+ metrics: {
2243
+ startTime: new Date(),
2244
+ endTime: new Date(),
2245
+ peakTime: new Date(),
2246
+ frequency: 1
2247
+ }
2248
+ };
2249
+ }
2250
+ _mergeOverlappingTemporalClusters(clusters) {
2251
+ // Merge clusters from overlapping time windows
2252
+ return clusters;
2253
+ }
2254
+ _adjustThresholdAdaptively(clusters, currentThreshold) {
2255
+ // Adjust clustering threshold based on results
2256
+ return currentThreshold || 0.6;
2257
+ }
2258
+ async _calculateItemToClusterSimilarity(itemId, cluster) {
2259
+ // Calculate similarity between an item and a cluster centroid
2260
+ const item = await this.brain.get(itemId);
2261
+ if (!item || !item.vector || !cluster.centroid) {
2262
+ return 0; // No similarity if vectors missing
2263
+ }
2264
+ // Calculate cosine similarity
2265
+ const dotProduct = item.vector.reduce((sum, val, i) => sum + val * cluster.centroid[i], 0);
2266
+ const itemMagnitude = Math.sqrt(item.vector.reduce((sum, val) => sum + val * val, 0));
2267
+ const centroidMagnitude = Math.sqrt(cluster.centroid.reduce((sum, val) => sum + val * val, 0));
2268
+ if (itemMagnitude === 0 || centroidMagnitude === 0) {
2269
+ return 0;
2270
+ }
2271
+ return dotProduct / (itemMagnitude * centroidMagnitude);
2272
+ }
2273
+ async _recalculateClusterCentroid(cluster) {
2274
+ // Recalculate centroid after adding new members
2275
+ if (cluster.members.length === 0) {
2276
+ return cluster.centroid; // Keep existing if no members
2277
+ }
2278
+ // Get all member vectors
2279
+ const memberVectors = [];
2280
+ for (const memberId of cluster.members) {
2281
+ const member = await this.brain.get(memberId);
2282
+ if (member && member.vector) {
2283
+ memberVectors.push(member.vector);
2284
+ }
2285
+ }
2286
+ if (memberVectors.length === 0) {
2287
+ return cluster.centroid; // Keep existing if no valid vectors
2288
+ }
2289
+ // Calculate mean vector (centroid)
2290
+ const dimensions = memberVectors[0].length;
2291
+ const newCentroid = new Array(dimensions).fill(0);
2292
+ for (const vector of memberVectors) {
2293
+ for (let i = 0; i < dimensions; i++) {
2294
+ newCentroid[i] += vector[i];
2295
+ }
2296
+ }
2297
+ for (let i = 0; i < dimensions; i++) {
2298
+ newCentroid[i] /= memberVectors.length;
2299
+ }
2300
+ return newCentroid;
2301
+ }
2302
+ async _calculateSimilarity(id1, id2) {
2303
+ return await this.similar(id1, id2);
2304
+ }
2305
+ _calculateEdgeWeight(verb) {
2306
+ // Calculate edge weight based on verb properties
2307
+ let weight = 1.0;
2308
+ // Factor in connection strength if available
2309
+ if (verb.connections && verb.connections instanceof Map) {
2310
+ const connectionCount = verb.connections.size;
2311
+ weight += Math.log(connectionCount + 1) * 0.1;
2312
+ }
2313
+ // Factor in verb type significance
2314
+ const significantVerbs = ['caused', 'created', 'contains', 'implements', 'extends'];
2315
+ if (verb.verb && significantVerbs.includes(verb.verb.toLowerCase())) {
2316
+ weight += 0.3;
2317
+ }
2318
+ // Factor in recency if available
2319
+ if (verb.metadata?.createdAt) {
2320
+ const now = Date.now();
2321
+ const created = new Date(verb.metadata.createdAt).getTime();
2322
+ const daysSinceCreated = (now - created) / (1000 * 60 * 60 * 24);
2323
+ // Newer relationships get slight boost
2324
+ weight += Math.max(0, (30 - daysSinceCreated) / 100);
2325
+ }
2326
+ return Math.min(weight, 3.0); // Cap at 3.0
2327
+ }
2328
+ _sortNeighbors(neighbors, sortBy) {
2329
+ switch (sortBy) {
2330
+ case 'similarity':
2331
+ neighbors.sort((a, b) => b.similarity - a.similarity);
2332
+ break;
2333
+ case 'importance':
2334
+ neighbors.sort((a, b) => (b.metadata?.importance || 0) - (a.metadata?.importance || 0));
2335
+ break;
2336
+ case 'recency':
2337
+ neighbors.sort((a, b) => {
2338
+ const aTime = new Date(a.metadata?.createdAt || 0).getTime();
2339
+ const bTime = new Date(b.metadata?.createdAt || 0).getTime();
2340
+ return bTime - aTime;
2341
+ });
2342
+ break;
2343
+ }
2344
+ }
2345
+ async _buildSemanticHierarchy(item, options) {
2346
+ // Build semantic hierarchy around an item
2347
+ return {
2348
+ self: { id: item.id, vector: item.vector, metadata: item.metadata }
2349
+ };
2350
+ }
2351
+ async _detectOutliersClusterBased(threshold, options) {
2352
+ // Detect outliers using cluster-based method
2353
+ return [];
2354
+ }
2355
+ async _detectOutliersIsolation(threshold, options) {
2356
+ // Detect outliers using isolation forest method
2357
+ return [];
2358
+ }
2359
+ async _detectOutliersStatistical(threshold, options) {
2360
+ // Detect outliers using statistical methods
2361
+ return [];
2362
+ }
2363
+ async _generateVisualizationNodes(maxNodes, options) {
2364
+ // Generate nodes for visualization
2365
+ return [];
2366
+ }
2367
+ async _generateVisualizationEdges(nodes, options) {
2368
+ // Generate edges for visualization
2369
+ return [];
2370
+ }
2371
+ async _generateVisualizationClusters(nodes) {
2372
+ // Generate cluster information for visualization
2373
+ return [];
2374
+ }
2375
+ async _applyLayoutAlgorithm(nodes, edges, algorithm, dimensions) {
2376
+ // Apply layout algorithm to position nodes
2377
+ return nodes.map((node, i) => ({
2378
+ ...node,
2379
+ x: Math.random() * 100,
2380
+ y: Math.random() * 100,
2381
+ z: dimensions === 3 ? Math.random() * 100 : undefined
2382
+ }));
2383
+ }
2384
+ _manhattanDistance(v1, v2) {
2385
+ let sum = 0;
2386
+ for (let i = 0; i < v1.length; i++) {
2387
+ sum += Math.abs(v1[i] - v2[i]);
2388
+ }
2389
+ return sum;
2390
+ }
2391
+ _calculateConfidence(score, v1, v2) {
2392
+ // Calculate confidence based on vector magnitudes and score
2393
+ return Math.min(1, score + 0.1);
2394
+ }
2395
+ _generateSimilarityExplanation(score, metric) {
2396
+ if (score > 0.9)
2397
+ return `Very high similarity using ${metric} distance`;
2398
+ if (score > 0.7)
2399
+ return `High similarity using ${metric} distance`;
2400
+ if (score > 0.5)
2401
+ return `Moderate similarity using ${metric} distance`;
2402
+ if (score > 0.3)
2403
+ return `Low similarity using ${metric} distance`;
2404
+ return `Very low similarity using ${metric} distance`;
2405
+ }
2406
+ // ===== PUBLIC API: UTILITY & STATUS =====
2407
+ /**
2408
+ * Get performance metrics for monitoring
2409
+ */
2410
+ getPerformanceMetrics(operation) {
2411
+ if (operation) {
2412
+ return this.performanceMetrics.get(operation) || [];
2413
+ }
2414
+ return this.performanceMetrics;
2415
+ }
2416
+ /**
2417
+ * Clear all caches
2418
+ */
2419
+ clearCaches() {
2420
+ this.similarityCache.clear();
2421
+ this.clusterCache.clear();
2422
+ this.hierarchyCache.clear();
2423
+ this.neighborsCache.clear();
2424
+ }
2425
+ /**
2426
+ * Get cache statistics
2427
+ */
2428
+ getCacheStats() {
2429
+ const maxSize = this.config.cacheSize || 1000;
2430
+ return {
2431
+ similarity: { size: this.similarityCache.size, maxSize },
2432
+ clustering: { size: this.clusterCache.size, maxSize },
2433
+ hierarchy: { size: this.hierarchyCache.size, maxSize },
2434
+ neighbors: { size: this.neighborsCache.size, maxSize }
2435
+ };
2436
+ }
2437
+ // ===== MISSING HELPER METHODS =====
2438
+ /**
2439
+ * Analyze data characteristics for algorithm selection
2440
+ */
2441
+ async _analyzeDataCharacteristics(itemIds) {
2442
+ const size = itemIds.length;
2443
+ const items = await this._getItemsWithMetadata(itemIds.slice(0, Math.min(100, size)));
2444
+ const dimensionality = items.length > 0 ? items[0].vector.length : 0;
2445
+ // Calculate graph density by sampling verb relationships
2446
+ let connectionCount = 0;
2447
+ const sampleSize = Math.min(50, itemIds.length);
2448
+ for (let i = 0; i < sampleSize; i++) {
2449
+ try {
2450
+ const verbs = await this.brain.getVerbsForNoun(itemIds[i]);
2451
+ connectionCount += verbs.length;
2452
+ }
2453
+ catch (error) {
2454
+ // Skip items that can't be processed
2455
+ continue;
2456
+ }
2457
+ }
2458
+ const graphDensity = sampleSize > 0 ? connectionCount / (sampleSize * sampleSize) : 0;
2459
+ // Calculate type distribution
2460
+ const typeDistribution = {};
2461
+ for (const item of items) {
2462
+ const type = item.nounType;
2463
+ typeDistribution[type] = (typeDistribution[type] || 0) + 1;
2464
+ }
2465
+ return { size, dimensionality, graphDensity, typeDistribution };
2466
+ }
2467
+ /**
2468
+ * Calculate centroid for a group of items
2469
+ */
2470
+ async _calculateGroupCentroid(items) {
2471
+ return this._calculateCentroidFromItems(items);
2472
+ }
2473
+ /**
2474
+ * Cluster within semantic type using vector similarity
2475
+ */
2476
+ async _clusterWithinSemanticType(items, options) {
2477
+ if (items.length <= 2) {
2478
+ return [{
2479
+ id: `semantic-single-${items[0]?.nounType || 'unknown'}`,
2480
+ centroid: await this._calculateCentroidFromItems(items),
2481
+ members: items.map(item => item.id),
2482
+ size: items.length,
2483
+ confidence: 1.0,
2484
+ label: `${items[0]?.nounType || 'unknown'} group`,
2485
+ metadata: { clustering: 'semantic', nounType: items[0]?.nounType }
2486
+ }];
2487
+ }
2488
+ // Use hierarchical clustering for within-type clustering
2489
+ const result = await this._performHierarchicalClustering(items.map(item => item.id), { ...options, maxClusters: Math.min(Math.ceil(items.length / 3), 10) });
2490
+ return result.clusters;
2491
+ }
2492
+ /**
2493
+ * Find cross-type connections via verbs
2494
+ */
2495
+ async _findCrossTypeConnections(typeGroups, _options) {
2496
+ const connections = [];
2497
+ // Convert Map to array for compatibility
2498
+ const typeGroupsArray = Array.from(typeGroups.entries());
2499
+ for (const [fromType, fromItems] of typeGroupsArray) {
2500
+ for (const [toType, toItems] of typeGroupsArray) {
2501
+ if (fromType !== toType) {
2502
+ for (const fromItem of fromItems.slice(0, 10)) { // Sample to avoid N^2
2503
+ try {
2504
+ const verbs = await this.brain.getVerbsForNoun(fromItem.id);
2505
+ for (const verb of verbs) {
2506
+ const toItem = toItems.find(item => item.id === verb.target);
2507
+ if (toItem) {
2508
+ connections.push({
2509
+ from: fromItem.id,
2510
+ to: toItem.id,
2511
+ strength: verb.confidence || 0.7
2512
+ });
2513
+ }
2514
+ }
2515
+ }
2516
+ catch (error) {
2517
+ // Skip items that can't be processed
2518
+ continue;
2519
+ }
2520
+ }
2521
+ }
2522
+ }
2523
+ }
2524
+ return connections.filter(conn => conn.strength > 0.5);
2525
+ }
2526
+ /**
2527
+ * Merge semantic clusters based on connections
2528
+ */
2529
+ async _mergeSemanticClusters(clusters, connections) {
2530
+ // Simple merging based on strong connections
2531
+ const merged = [...clusters];
2532
+ for (const connection of connections) {
2533
+ if (connection.strength > 0.8) {
2534
+ const fromCluster = merged.find(c => c.members.includes(connection.from));
2535
+ const toCluster = merged.find(c => c.members.includes(connection.to));
2536
+ if (fromCluster && toCluster && fromCluster !== toCluster) {
2537
+ // Merge clusters
2538
+ fromCluster.members = [...fromCluster.members, ...toCluster.members];
2539
+ fromCluster.size = fromCluster.members.length;
2540
+ fromCluster.label = `merged ${fromCluster.label}`;
2541
+ // Remove merged cluster
2542
+ const index = merged.indexOf(toCluster);
2543
+ if (index > -1)
2544
+ merged.splice(index, 1);
2545
+ }
2546
+ }
2547
+ }
2548
+ return merged;
2549
+ }
2550
+ /**
2551
+ * Get optimal clustering level for HNSW
2552
+ */
2553
+ _getOptimalClusteringLevel(totalItems) {
2554
+ if (totalItems < 100)
2555
+ return 0;
2556
+ if (totalItems < 1000)
2557
+ return 1;
2558
+ if (totalItems < 10000)
2559
+ return 2;
2560
+ return 3;
2561
+ }
2562
+ /**
2563
+ * Get nodes at HNSW level
2564
+ */
2565
+ async _getHNSWLevelNodes(level) {
2566
+ // This would use the HNSW index to get nodes at specified level
2567
+ // For now, return a sample of all items
2568
+ const allItems = await this._getAllItemIds();
2569
+ const sampleSize = Math.max(10, Math.floor(allItems.length / Math.pow(2, level + 1)));
2570
+ return this._getRandomSample(allItems, sampleSize);
2571
+ }
2572
+ /**
2573
+ * Find cluster members using HNSW neighbors
2574
+ */
2575
+ async _findClusterMembers(levelNode, _allItems, threshold) {
2576
+ try {
2577
+ const neighbors = await this.brain.neural.neighbors(levelNode, {
2578
+ limit: Math.min(50, Math.floor(_allItems.length / 10)),
2579
+ minSimilarity: threshold
2580
+ });
2581
+ return [levelNode, ...neighbors.neighbors.map((n) => n.id)];
2582
+ }
2583
+ catch (error) {
2584
+ return [levelNode];
2585
+ }
2586
+ }
2587
+ /**
2588
+ * Calculate hierarchical clustering confidence
2589
+ */
2590
+ async _calculateHierarchicalConfidence(members) {
2591
+ if (members.length <= 1)
2592
+ return 1.0;
2593
+ const items = await this._getItemsWithVectors(members);
2594
+ const coherence = await this._calculateVectorCoherence(items);
2595
+ return coherence;
2596
+ }
2597
+ /**
2598
+ * Assign unassigned items to nearest clusters
2599
+ */
2600
+ async _assignUnassignedItems(unassigned, clusters) {
2601
+ for (const itemId of unassigned) {
2602
+ if (clusters.length === 0)
2603
+ break;
2604
+ try {
2605
+ const noun = await this.brain.getNoun(itemId);
2606
+ const itemVector = noun?.vector || [];
2607
+ if (itemVector.length === 0)
2608
+ continue;
2609
+ let bestCluster = clusters[0];
2610
+ let minDistance = Infinity;
2611
+ for (const cluster of clusters) {
2612
+ const distance = Math.sqrt(this._calculateSquaredDistance(itemVector, cluster.centroid));
2613
+ if (distance < minDistance) {
2614
+ minDistance = distance;
2615
+ bestCluster = cluster;
2616
+ }
2617
+ }
2618
+ bestCluster.members.push(itemId);
2619
+ bestCluster.size++;
2620
+ }
2621
+ catch (error) {
2622
+ // Skip items that can't be processed
2623
+ continue;
2624
+ }
2625
+ }
2626
+ }
2627
+ }
2628
+ //# sourceMappingURL=improvedNeuralAPI.js.map