@soulcraft/brainy 2.11.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2439 @@
1
+ /**
2
+ * Improved Neural API - Clean, Consistent, Performant
3
+ *
4
+ * Public API Surface:
5
+ * - brain.neural.similar(a, b, options?) // Similarity calculation
6
+ * - brain.neural.clusters(items?, options?) // Semantic clustering
7
+ * - brain.neural.neighbors(id, options?) // K-nearest neighbors
8
+ * - brain.neural.hierarchy(id, options?) // Semantic hierarchy
9
+ * - brain.neural.outliers(options?) // Anomaly detection
10
+ * - brain.neural.visualize(options?) // Visualization data
11
+ *
12
+ * Advanced Clustering:
13
+ * - brain.neural.clusterByDomain(field, options?) // Domain-aware clustering
14
+ * - brain.neural.clusterByTime(field, windows, options?) // Temporal clustering
15
+ * - brain.neural.clusterStream(options?) // AsyncIterator for streaming
16
+ * - brain.neural.updateClusters(items, options?) // Incremental clustering
17
+ *
18
+ * Private methods are prefixed with _ and not exposed in public API
19
+ */
20
+ import { cosineDistance, euclideanDistance } from '../utils/distance.js';
21
+ import { NeuralAPIError, ClusteringError, SimilarityError } from './types.js';
22
+ export class ImprovedNeuralAPI {
23
+ constructor(brain, config = {}) {
24
+ // Caching for performance
25
+ this.similarityCache = new Map();
26
+ this.clusterCache = new Map();
27
+ this.hierarchyCache = new Map();
28
+ this.neighborsCache = new Map();
29
+ // Performance tracking
30
+ this.performanceMetrics = new Map();
31
+ this.brain = brain;
32
+ this.config = {
33
+ cacheSize: 1000,
34
+ defaultAlgorithm: 'auto',
35
+ similarityMetric: 'cosine',
36
+ performanceTracking: true,
37
+ maxMemoryUsage: '1GB',
38
+ parallelProcessing: true,
39
+ streamingBatchSize: 100,
40
+ ...config
41
+ };
42
+ this._initializeCleanupTimer();
43
+ }
44
+ // ===== PUBLIC API: SIMILARITY =====
45
+ /**
46
+ * Calculate similarity between any two items (auto-detection)
47
+ * Supports: IDs, text strings, vectors, or mixed types
48
+ */
49
+ async similar(a, b, options = {}) {
50
+ const startTime = performance.now();
51
+ try {
52
+ // Create cache key
53
+ const cacheKey = this._createSimilarityKey(a, b, options);
54
+ if (this.similarityCache.has(cacheKey)) {
55
+ return this.similarityCache.get(cacheKey);
56
+ }
57
+ let result;
58
+ // Auto-detect input types and route accordingly
59
+ if (this._isId(a) && this._isId(b)) {
60
+ result = await this._similarityById(a, b, options);
61
+ }
62
+ else if (this._isVector(a) && this._isVector(b)) {
63
+ result = await this._similarityByVector(a, b, options);
64
+ }
65
+ else if (typeof a === 'string' && typeof b === 'string') {
66
+ result = await this._similarityByText(a, b, options);
67
+ }
68
+ else {
69
+ // Mixed types - convert to vectors
70
+ const vectorA = await this._convertToVector(a);
71
+ const vectorB = await this._convertToVector(b);
72
+ result = await this._similarityByVector(vectorA, vectorB, options);
73
+ }
74
+ // Cache result
75
+ this._cacheResult(cacheKey, result, this.similarityCache);
76
+ // Track performance
77
+ this._trackPerformance('similarity', startTime, 2, 'mixed');
78
+ return result;
79
+ }
80
+ catch (error) {
81
+ const errorMessage = error instanceof Error ? error.message : String(error);
82
+ throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
83
+ inputA: typeof a === 'object' ? 'vector' : String(a).substring(0, 50),
84
+ inputB: typeof b === 'object' ? 'vector' : String(b).substring(0, 50),
85
+ options
86
+ });
87
+ }
88
+ }
89
+ // ===== PUBLIC API: CLUSTERING =====
90
+ /**
91
+ * Intelligent semantic clustering with auto-routing
92
+ * - No input: Cluster all data
93
+ * - Array: Cluster specific items
94
+ * - String: Find clusters near this item
95
+ * - Options object: Advanced configuration
96
+ */
97
+ async clusters(input) {
98
+ const startTime = performance.now();
99
+ try {
100
+ let options = {};
101
+ let items;
102
+ // Parse input
103
+ if (!input) {
104
+ // Cluster all data
105
+ items = undefined;
106
+ options = { algorithm: 'auto' };
107
+ }
108
+ else if (Array.isArray(input)) {
109
+ // Cluster specific items
110
+ items = input;
111
+ options = { algorithm: 'auto' };
112
+ }
113
+ else if (typeof input === 'string') {
114
+ // Find clusters near this item
115
+ const nearbyResult = await this.neighbors(input, { limit: 100 });
116
+ items = nearbyResult.neighbors.map(n => n.id);
117
+ options = { algorithm: 'auto' };
118
+ }
119
+ else if (typeof input === 'object') {
120
+ // Configuration object
121
+ options = input;
122
+ items = undefined;
123
+ }
124
+ else {
125
+ throw new ClusteringError('Invalid input for clustering', { input });
126
+ }
127
+ // Check cache
128
+ const cacheKey = this._createClusteringKey(items, options);
129
+ if (this.clusterCache.has(cacheKey)) {
130
+ const cached = this.clusterCache.get(cacheKey);
131
+ return cached.clusters;
132
+ }
133
+ // Route to optimal algorithm
134
+ const result = await this._routeClusteringAlgorithm(items, options);
135
+ // Cache result
136
+ this._cacheResult(cacheKey, result, this.clusterCache);
137
+ // Track performance
138
+ this._trackPerformance('clustering', startTime, items?.length || 0, options.algorithm || 'auto');
139
+ return result.clusters;
140
+ }
141
+ catch (error) {
142
+ const errorMessage = error instanceof Error ? error.message : String(error);
143
+ throw new ClusteringError(`Failed to perform clustering: ${errorMessage}`, {
144
+ input: typeof input === 'object' ? JSON.stringify(input) : input,
145
+ });
146
+ }
147
+ }
148
+ /**
149
+ * Fast hierarchical clustering using HNSW levels
150
+ */
151
+ async clusterFast(options = {}) {
152
+ const fullOptions = {
153
+ algorithm: 'hierarchical',
154
+ maxClusters: options.maxClusters,
155
+ ...options
156
+ };
157
+ const result = await this._performHierarchicalClustering(undefined, fullOptions);
158
+ return result.clusters;
159
+ }
160
+ /**
161
+ * Large-scale clustering with intelligent sampling
162
+ */
163
+ async clusterLarge(options = {}) {
164
+ const fullOptions = {
165
+ algorithm: 'auto',
166
+ sampleSize: options.sampleSize || 1000,
167
+ strategy: options.strategy || 'diverse',
168
+ ...options
169
+ };
170
+ const result = await this._performSampledClustering(undefined, fullOptions);
171
+ return result.clusters;
172
+ }
173
+ // ===== PUBLIC API: ADVANCED CLUSTERING =====
174
+ /**
175
+ * Domain-aware clustering based on metadata fields
176
+ */
177
+ async clusterByDomain(field, options = {}) {
178
+ const startTime = performance.now();
179
+ try {
180
+ // Get all items with the specified field
181
+ const items = await this._getItemsByField(field);
182
+ if (items.length === 0) {
183
+ return [];
184
+ }
185
+ // Group by domain values
186
+ const domainGroups = this._groupByDomain(items, field);
187
+ const domainClusters = [];
188
+ // Cluster within each domain
189
+ for (const [domain, domainItems] of domainGroups) {
190
+ const domainOptions = {
191
+ ...options,
192
+ algorithm: 'auto',
193
+ maxClusters: Math.min(options.maxClusters || 10, Math.ceil(domainItems.length / 3))
194
+ };
195
+ const clusters = await this._performClustering(domainItems.map(item => item.id), domainOptions);
196
+ // Convert to domain clusters
197
+ for (const cluster of clusters.clusters) {
198
+ domainClusters.push({
199
+ ...cluster,
200
+ domain,
201
+ domainConfidence: this._calculateDomainConfidence(cluster, domainItems),
202
+ crossDomainMembers: options.crossDomainThreshold
203
+ ? await this._findCrossDomainMembers(cluster, options.crossDomainThreshold)
204
+ : undefined
205
+ });
206
+ }
207
+ }
208
+ // Handle cross-domain clustering if enabled
209
+ if (!options.preserveDomainBoundaries) {
210
+ const crossDomainClusters = await this._findCrossDomainClusters(domainClusters, options.crossDomainThreshold || 0.8);
211
+ domainClusters.push(...crossDomainClusters);
212
+ }
213
+ this._trackPerformance('domainClustering', startTime, items.length, field);
214
+ return domainClusters;
215
+ }
216
+ catch (error) {
217
+ const errorMessage = error instanceof Error ? error.message : String(error);
218
+ throw new ClusteringError(`Failed to cluster by domain: ${errorMessage}`, { field, options });
219
+ }
220
+ }
221
+ /**
222
+ * Temporal clustering based on time windows
223
+ */
224
+ async clusterByTime(timeField, windows, options = { timeField, windows }) {
225
+ const startTime = performance.now();
226
+ try {
227
+ const temporalClusters = [];
228
+ for (const window of windows) {
229
+ // Get items in this time window
230
+ const windowItems = await this._getItemsByTimeWindow(timeField, window);
231
+ if (windowItems.length === 0)
232
+ continue;
233
+ // Cluster items in this window
234
+ const clusteringOptions = {
235
+ ...options,
236
+ algorithm: 'auto'
237
+ };
238
+ const clusters = await this._performClustering(windowItems.map(item => item.id), clusteringOptions);
239
+ // Convert to temporal clusters
240
+ for (const cluster of clusters.clusters) {
241
+ const temporal = await this._calculateTemporalMetrics(cluster, windowItems, timeField);
242
+ temporalClusters.push({
243
+ ...cluster,
244
+ timeWindow: window,
245
+ trend: temporal.trend,
246
+ temporal: temporal.metrics
247
+ });
248
+ }
249
+ }
250
+ // Handle overlapping windows
251
+ if (options.overlapStrategy === 'merge') {
252
+ return this._mergeOverlappingTemporalClusters(temporalClusters);
253
+ }
254
+ this._trackPerformance('temporalClustering', startTime, temporalClusters.length, 'temporal');
255
+ return temporalClusters;
256
+ }
257
+ catch (error) {
258
+ const errorMessage = error instanceof Error ? error.message : String(error);
259
+ throw new ClusteringError(`Failed to cluster by time: ${errorMessage}`, { timeField, windows, options });
260
+ }
261
+ }
262
+ /**
263
+ * Streaming clustering with real-time updates
264
+ */
265
+ async *clusterStream(options = {}) {
266
+ const batchSize = options.batchSize || this.config.streamingBatchSize || 100;
267
+ let batchNumber = 0;
268
+ let processedCount = 0;
269
+ try {
270
+ // Get all items for processing
271
+ const allItems = await this._getAllItemIds();
272
+ const totalItems = allItems.length;
273
+ // Process in batches
274
+ for (let i = 0; i < allItems.length; i += batchSize) {
275
+ const startTime = performance.now();
276
+ const batch = allItems.slice(i, i + batchSize);
277
+ // Perform clustering on this batch
278
+ const result = await this._performClustering(batch, {
279
+ ...options,
280
+ algorithm: 'auto',
281
+ cacheResults: false // Don't cache streaming results
282
+ });
283
+ processedCount += batch.length;
284
+ const isComplete = processedCount >= totalItems;
285
+ yield {
286
+ clusters: result.clusters,
287
+ batchNumber: ++batchNumber,
288
+ isComplete,
289
+ progress: {
290
+ processed: processedCount,
291
+ total: totalItems,
292
+ percentage: (processedCount / totalItems) * 100
293
+ },
294
+ metrics: {
295
+ ...result.metrics,
296
+ executionTime: performance.now() - startTime
297
+ }
298
+ };
299
+ // Adaptive threshold adjustment
300
+ if (options.adaptiveThreshold && batchNumber > 1) {
301
+ options.threshold = this._adjustThresholdAdaptively(result.clusters, options.threshold);
302
+ }
303
+ }
304
+ }
305
+ catch (error) {
306
+ const errorMessage = error instanceof Error ? error.message : String(error);
307
+ throw new ClusteringError(`Failed in streaming clustering: ${errorMessage}`, { options, batchNumber });
308
+ }
309
+ }
310
+ /**
311
+ * Incremental clustering - add new items to existing clusters
312
+ */
313
+ async updateClusters(newItems, options = {}) {
314
+ const startTime = performance.now();
315
+ try {
316
+ // Get existing clusters
317
+ const existingClusters = await this.clusters({ ...options, algorithm: 'auto' });
318
+ // For each new item, find best cluster or create new one
319
+ const updatedClusters = [...existingClusters];
320
+ const unassignedItems = [];
321
+ for (const itemId of newItems) {
322
+ let bestCluster = null;
323
+ let bestSimilarity = 0;
324
+ // Find most similar existing cluster
325
+ for (const cluster of updatedClusters) {
326
+ const similarity = await this._calculateItemToClusterSimilarity(itemId, cluster);
327
+ if (similarity > bestSimilarity && similarity > (options.threshold || 0.6)) {
328
+ bestSimilarity = similarity;
329
+ bestCluster = cluster;
330
+ }
331
+ }
332
+ if (bestCluster) {
333
+ // Add to existing cluster
334
+ bestCluster.members.push(itemId);
335
+ bestCluster.size = bestCluster.members.length;
336
+ // Recalculate centroid
337
+ bestCluster.centroid = await this._recalculateClusterCentroid(bestCluster);
338
+ }
339
+ else {
340
+ // Item doesn't fit existing clusters
341
+ unassignedItems.push(itemId);
342
+ }
343
+ }
344
+ // Create new clusters for unassigned items
345
+ if (unassignedItems.length > 0) {
346
+ const newClusters = await this._performClustering(unassignedItems, options);
347
+ updatedClusters.push(...newClusters.clusters);
348
+ }
349
+ this._trackPerformance('incrementalClustering', startTime, newItems.length, 'incremental');
350
+ return updatedClusters;
351
+ }
352
+ catch (error) {
353
+ const errorMessage = error instanceof Error ? error.message : String(error);
354
+ throw new ClusteringError(`Failed to update clusters: ${errorMessage}`, { newItems, options });
355
+ }
356
+ }
357
+ // ===== PUBLIC API: NEIGHBORS & HIERARCHY =====
358
+ /**
359
+ * Find K-nearest semantic neighbors
360
+ */
361
+ async neighbors(id, options = {}) {
362
+ const startTime = performance.now();
363
+ try {
364
+ const cacheKey = `neighbors:${id}:${JSON.stringify(options)}`;
365
+ if (this.neighborsCache.has(cacheKey)) {
366
+ return this.neighborsCache.get(cacheKey);
367
+ }
368
+ const limit = options.limit || 10;
369
+ const minSimilarity = options.minSimilarity || 0.1;
370
+ // Use HNSW index for efficient neighbor search
371
+ const searchResults = await this.brain.search('', {
372
+ ...options,
373
+ limit: limit * 2, // Get more than needed for filtering
374
+ metadata: options.includeMetadata ? {} : undefined
375
+ });
376
+ // Filter and sort neighbors
377
+ const neighbors = [];
378
+ for (const result of searchResults) {
379
+ if (result.id === id)
380
+ continue; // Skip self
381
+ const similarity = await this._calculateSimilarity(id, result.id);
382
+ if (similarity >= minSimilarity) {
383
+ neighbors.push({
384
+ id: result.id,
385
+ similarity,
386
+ data: result.content || result.data,
387
+ metadata: options.includeMetadata ? result.metadata : undefined,
388
+ distance: 1 - similarity
389
+ });
390
+ }
391
+ if (neighbors.length >= limit)
392
+ break;
393
+ }
394
+ // Sort by specified criteria
395
+ this._sortNeighbors(neighbors, options.sortBy || 'similarity');
396
+ const result = {
397
+ neighbors: neighbors.slice(0, limit),
398
+ queryId: id,
399
+ totalFound: neighbors.length,
400
+ averageSimilarity: neighbors.reduce((sum, n) => sum + n.similarity, 0) / neighbors.length
401
+ };
402
+ this._cacheResult(cacheKey, result, this.neighborsCache);
403
+ this._trackPerformance('neighbors', startTime, limit, 'knn');
404
+ return result;
405
+ }
406
+ catch (error) {
407
+ const errorMessage = error instanceof Error ? error.message : String(error);
408
+ throw new NeuralAPIError(`Failed to find neighbors: ${errorMessage}`, 'NEIGHBORS_ERROR', { id, options });
409
+ }
410
+ }
411
+ /**
412
+ * Build semantic hierarchy around an item
413
+ */
414
+ async hierarchy(id, options = {}) {
415
+ const startTime = performance.now();
416
+ try {
417
+ const cacheKey = `hierarchy:${id}:${JSON.stringify(options)}`;
418
+ if (this.hierarchyCache.has(cacheKey)) {
419
+ return this.hierarchyCache.get(cacheKey);
420
+ }
421
+ // Get item data
422
+ const item = await this.brain.getNoun(id);
423
+ if (!item) {
424
+ throw new Error(`Item with ID ${id} not found`);
425
+ }
426
+ // Build hierarchy based on strategy
427
+ const hierarchy = await this._buildSemanticHierarchy(item, options);
428
+ this._cacheResult(cacheKey, hierarchy, this.hierarchyCache);
429
+ this._trackPerformance('hierarchy', startTime, 1, 'hierarchy');
430
+ return hierarchy;
431
+ }
432
+ catch (error) {
433
+ const errorMessage = error instanceof Error ? error.message : String(error);
434
+ throw new NeuralAPIError(`Failed to build hierarchy: ${errorMessage}`, 'HIERARCHY_ERROR', { id, options });
435
+ }
436
+ }
437
+ // ===== PUBLIC API: ANALYSIS =====
438
+ /**
439
+ * Detect outliers and anomalous items
440
+ */
441
+ async outliers(options = {}) {
442
+ const startTime = performance.now();
443
+ try {
444
+ const threshold = options.threshold || 0.3;
445
+ const method = options.method || 'cluster-based';
446
+ let outliers = [];
447
+ switch (method) {
448
+ case 'isolation':
449
+ outliers = await this._detectOutliersIsolation(threshold, options);
450
+ break;
451
+ case 'statistical':
452
+ outliers = await this._detectOutliersStatistical(threshold, options);
453
+ break;
454
+ case 'cluster-based':
455
+ default:
456
+ outliers = await this._detectOutliersClusterBased(threshold, options);
457
+ break;
458
+ }
459
+ this._trackPerformance('outlierDetection', startTime, outliers.length, method);
460
+ return outliers;
461
+ }
462
+ catch (error) {
463
+ const errorMessage = error instanceof Error ? error.message : String(error);
464
+ throw new NeuralAPIError(`Failed to detect outliers: ${errorMessage}`, 'OUTLIER_ERROR', { options });
465
+ }
466
+ }
467
+ /**
468
+ * Generate visualization data for graph libraries
469
+ */
470
+ async visualize(options = {}) {
471
+ const startTime = performance.now();
472
+ try {
473
+ const maxNodes = options.maxNodes || 100;
474
+ const dimensions = options.dimensions || 2;
475
+ const algorithm = options.algorithm || 'force';
476
+ // Get data for visualization
477
+ const nodes = await this._generateVisualizationNodes(maxNodes, options);
478
+ const edges = options.includeEdges ? await this._generateVisualizationEdges(nodes, options) : [];
479
+ const clusters = options.clusterColors ? await this._generateVisualizationClusters(nodes) : [];
480
+ // Apply layout algorithm
481
+ const positionedNodes = await this._applyLayoutAlgorithm(nodes, edges, algorithm, dimensions);
482
+ const result = {
483
+ nodes: positionedNodes,
484
+ edges,
485
+ clusters,
486
+ metadata: {
487
+ algorithm,
488
+ dimensions,
489
+ totalNodes: nodes.length,
490
+ totalEdges: edges.length,
491
+ generatedAt: new Date()
492
+ }
493
+ };
494
+ this._trackPerformance('visualization', startTime, nodes.length, algorithm);
495
+ return result;
496
+ }
497
+ catch (error) {
498
+ const errorMessage = error instanceof Error ? error.message : String(error);
499
+ throw new NeuralAPIError(`Failed to generate visualization: ${errorMessage}`, 'VISUALIZATION_ERROR', { options });
500
+ }
501
+ }
502
+ // ===== PRIVATE IMPLEMENTATION METHODS =====
503
+ async _routeClusteringAlgorithm(items, options) {
504
+ const algorithm = options.algorithm || 'auto';
505
+ const itemCount = items?.length || await this._getTotalItemCount();
506
+ // Auto-select optimal algorithm based on data size and characteristics
507
+ if (algorithm === 'auto') {
508
+ // Intelligent algorithm selection based on data characteristics
509
+ const itemIds = items || await this._getAllItemIds();
510
+ const dataCharacteristics = await this._analyzeDataCharacteristics(itemIds);
511
+ const hasRichGraph = dataCharacteristics.graphDensity > 0.05;
512
+ const hasSemanticTypes = Object.keys(dataCharacteristics.typeDistribution).length > 3;
513
+ if (hasRichGraph && hasSemanticTypes) {
514
+ // Best of all worlds for rich semantic graphs
515
+ return this._performMultiModalClustering(items, { ...options, algorithm: 'multimodal' });
516
+ }
517
+ else if (hasRichGraph) {
518
+ // Strong relationship network - use graph clustering
519
+ return this._performGraphClustering(items, { ...options, algorithm: 'graph' });
520
+ }
521
+ else if (hasSemanticTypes) {
522
+ // Rich semantic taxonomy - use semantic clustering
523
+ return this._performSemanticClustering(items, { ...options, algorithm: 'semantic' });
524
+ }
525
+ else if (itemCount > 10000) {
526
+ // Large dataset - use sampling
527
+ return this._performSampledClustering(items, { ...options, algorithm: 'sample' });
528
+ }
529
+ else if (itemCount > 1000) {
530
+ // Medium dataset - use hierarchical HNSW
531
+ return this._performHierarchicalClustering(items, { ...options, algorithm: 'hierarchical' });
532
+ }
533
+ else {
534
+ // Small dataset - use k-means for quality
535
+ return this._performKMeansClustering(items, { ...options, algorithm: 'kmeans' });
536
+ }
537
+ }
538
+ // Use specified algorithm
539
+ switch (algorithm) {
540
+ case 'hierarchical':
541
+ return this._performHierarchicalClustering(items, options);
542
+ case 'semantic':
543
+ return this._performSemanticClustering(items, options);
544
+ case 'graph':
545
+ return this._performGraphClustering(items, options);
546
+ case 'multimodal':
547
+ return this._performMultiModalClustering(items, options);
548
+ case 'kmeans':
549
+ return this._performKMeansClustering(items, options);
550
+ case 'dbscan':
551
+ return this._performDBSCANClustering(items, options);
552
+ case 'sample':
553
+ return this._performSampledClustering(items, options);
554
+ default:
555
+ throw new ClusteringError(`Unsupported algorithm: ${algorithm}`);
556
+ }
557
+ }
558
+ async _performClustering(items, options) {
559
+ // This is the main clustering dispatcher - routes to specific algorithms
560
+ return this._routeClusteringAlgorithm(items, options);
561
+ }
562
+ // ===== REAL CLUSTERING IMPLEMENTATIONS =====
563
+ /**
564
+ * SEMANTIC-AWARE CLUSTERING: Uses existing NounType/VerbType taxonomy + HNSW
565
+ */
566
+ async _performSemanticClustering(items, options) {
567
+ const startTime = performance.now();
568
+ // Get all items if not specified
569
+ const itemIds = items || await this._getAllItemIds();
570
+ if (itemIds.length === 0) {
571
+ return this._createEmptyResult(startTime, 'semantic');
572
+ }
573
+ // 1. Group items by semantic type (NounType) - O(n) operation
574
+ const itemsWithMetadata = await this._getItemsWithMetadata(itemIds);
575
+ const typeGroups = this._groupBySemanticType(itemsWithMetadata);
576
+ const allClusters = [];
577
+ // 2. Cluster within each semantic type using HNSW - parallel processing
578
+ const typeClusteringPromises = Array.from(typeGroups.entries()).map(async ([nounType, groupItems]) => {
579
+ if (groupItems.length < (options.minClusterSize || 2)) {
580
+ // Create single cluster for small groups
581
+ return [{
582
+ id: `semantic-${nounType}`,
583
+ centroid: await this._calculateGroupCentroid(groupItems),
584
+ members: groupItems.map(item => item.id),
585
+ size: groupItems.length,
586
+ confidence: 0.9, // High confidence for type-based clustering
587
+ label: `${nounType} cluster`,
588
+ metadata: { semanticType: nounType, clustering: 'semantic' }
589
+ }];
590
+ }
591
+ // Use HNSW hierarchical clustering within type
592
+ return this._clusterWithinSemanticType(groupItems, options);
593
+ });
594
+ const typeClusterResults = await Promise.all(typeClusteringPromises);
595
+ typeClusterResults.forEach(clusters => allClusters.push(...clusters));
596
+ // 3. Find cross-type relationships using existing verb connections
597
+ const crossTypeConnections = await this._findCrossTypeConnections(typeGroups, options);
598
+ // 4. Merge clusters that have strong cross-type relationships
599
+ const finalClusters = await this._mergeSemanticClusters(allClusters, crossTypeConnections);
600
+ return {
601
+ clusters: finalClusters.slice(0, options.maxClusters || finalClusters.length),
602
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'semantic'),
603
+ metadata: {
604
+ totalItems: itemIds.length,
605
+ clustersFound: finalClusters.length,
606
+ averageClusterSize: finalClusters.reduce((sum, c) => sum + c.size, 0) / finalClusters.length || 0,
607
+ semanticTypes: Array.from(typeGroups.keys()).length,
608
+ timestamp: new Date()
609
+ }
610
+ };
611
+ }
612
+ /**
613
+ * HIERARCHICAL CLUSTERING: Uses existing HNSW levels for O(n) clustering
614
+ */
615
+ async _performHierarchicalClustering(items, options) {
616
+ const startTime = performance.now();
617
+ const itemIds = items || await this._getAllItemIds();
618
+ if (itemIds.length === 0) {
619
+ return this._createEmptyResult(startTime, 'hierarchical');
620
+ }
621
+ // Use existing HNSW level structure for natural clustering
622
+ const level = options.level || this._getOptimalClusteringLevel(itemIds.length);
623
+ const maxClusters = options.maxClusters || Math.min(50, Math.ceil(itemIds.length / 20));
624
+ // Get HNSW level representatives - these are natural cluster centers
625
+ const levelNodes = await this._getHNSWLevelNodes(level);
626
+ const clusterCenters = levelNodes.slice(0, maxClusters);
627
+ const clusters = [];
628
+ // Create clusters around each level representative
629
+ for (let i = 0; i < clusterCenters.length; i++) {
630
+ const center = clusterCenters[i];
631
+ // Find items that belong to this cluster using HNSW neighbors
632
+ const members = await this._findClusterMembers(center, itemIds, 0.5);
633
+ if (members.length > 0) {
634
+ // Get actual node data for creating cluster
635
+ const memberData = await this._getItemsWithMetadata(members);
636
+ const centroid = await this._calculateCentroidFromItems(memberData);
637
+ clusters.push({
638
+ id: `hierarchical-${i}`,
639
+ centroid,
640
+ members,
641
+ size: members.length,
642
+ confidence: await this._calculateHierarchicalConfidence(members),
643
+ label: await this._generateClusterLabel(memberData, 'hierarchical'),
644
+ metadata: { level, clusterCenter: center, clustering: 'hierarchical' }
645
+ });
646
+ }
647
+ }
648
+ // Assign remaining items to nearest clusters
649
+ const assignedItems = new Set(clusters.flatMap(c => c.members));
650
+ const unassignedItems = itemIds.filter(id => !assignedItems.has(id));
651
+ if (unassignedItems.length > 0) {
652
+ await this._assignUnassignedItems(unassignedItems, clusters);
653
+ }
654
+ return {
655
+ clusters,
656
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'hierarchical'),
657
+ metadata: {
658
+ totalItems: itemIds.length,
659
+ clustersFound: clusters.length,
660
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
661
+ hnswLevel: level,
662
+ timestamp: new Date()
663
+ }
664
+ };
665
+ }
666
+ /**
667
+ * K-MEANS CLUSTERING: Real implementation using existing distance functions
668
+ */
669
+ async _performKMeansClustering(items, options) {
670
+ const startTime = performance.now();
671
+ const itemIds = items || await this._getAllItemIds();
672
+ if (itemIds.length === 0) {
673
+ return this._createEmptyResult(startTime, 'kmeans');
674
+ }
675
+ // Get vectors for all items using existing infrastructure
676
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
677
+ // Determine optimal k
678
+ const k = options.maxClusters || Math.min(Math.floor(Math.sqrt(itemsWithVectors.length / 2)), 50 // Maximum clusters for practical use
679
+ );
680
+ if (k <= 1) {
681
+ // Single cluster case
682
+ return {
683
+ clusters: [{
684
+ id: 'kmeans-single',
685
+ centroid: await this._calculateCentroidFromItems(itemsWithVectors),
686
+ members: itemIds,
687
+ size: itemIds.length,
688
+ confidence: 1.0,
689
+ label: 'Single cluster',
690
+ metadata: { clustering: 'kmeans', k: 1 }
691
+ }],
692
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'kmeans'),
693
+ metadata: {
694
+ totalItems: itemIds.length,
695
+ clustersFound: 1,
696
+ averageClusterSize: itemIds.length,
697
+ kValue: 1,
698
+ timestamp: new Date()
699
+ }
700
+ };
701
+ }
702
+ // Initialize centroids using k-means++ for better convergence
703
+ const centroids = await this._initializeCentroidsKMeansPlusPlus(itemsWithVectors, k);
704
+ let assignments = new Array(itemsWithVectors.length).fill(0);
705
+ let hasConverged = false;
706
+ const maxIterations = options.maxIterations || 100;
707
+ const tolerance = options.tolerance || 1e-4;
708
+ // K-means iteration loop
709
+ for (let iteration = 0; iteration < maxIterations && !hasConverged; iteration++) {
710
+ // Assignment step: assign each point to nearest centroid
711
+ const newAssignments = await this._assignPointsToCentroids(itemsWithVectors, centroids);
712
+ // Update step: recalculate centroids
713
+ const newCentroids = await this._updateCentroids(itemsWithVectors, newAssignments, k);
714
+ // Check convergence: has assignment changed significantly?
715
+ const changeRate = this._calculateAssignmentChangeRate(assignments, newAssignments);
716
+ hasConverged = changeRate < tolerance;
717
+ assignments = newAssignments;
718
+ // Update centroids for next iteration
719
+ for (let i = 0; i < centroids.length; i++) {
720
+ centroids[i] = newCentroids[i];
721
+ }
722
+ }
723
+ // Create semantic clusters from k-means results
724
+ const clusters = [];
725
+ for (let clusterIndex = 0; clusterIndex < k; clusterIndex++) {
726
+ const clusterMembers = itemsWithVectors.filter((_, i) => assignments[i] === clusterIndex);
727
+ if (clusterMembers.length > 0) {
728
+ const memberIds = clusterMembers.map(item => item.id);
729
+ clusters.push({
730
+ id: `kmeans-${clusterIndex}`,
731
+ centroid: centroids[clusterIndex],
732
+ members: memberIds,
733
+ size: memberIds.length,
734
+ confidence: await this._calculateKMeansClusterConfidence(clusterMembers, centroids[clusterIndex]),
735
+ label: await this._generateClusterLabel(clusterMembers, 'kmeans'),
736
+ metadata: {
737
+ clustering: 'kmeans',
738
+ k,
739
+ clusterIndex,
740
+ convergenceIterations: maxIterations
741
+ }
742
+ });
743
+ }
744
+ }
745
+ return {
746
+ clusters,
747
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'kmeans'),
748
+ metadata: {
749
+ totalItems: itemIds.length,
750
+ clustersFound: clusters.length,
751
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
752
+ kValue: k,
753
+ hasConverged,
754
+ timestamp: new Date()
755
+ }
756
+ };
757
+ }
758
+ /**
759
+ * DBSCAN CLUSTERING: Density-based clustering with adaptive parameters using HNSW
760
+ */
761
+ async _performDBSCANClustering(items, options) {
762
+ const startTime = performance.now();
763
+ const itemIds = items || await this._getAllItemIds();
764
+ if (itemIds.length === 0) {
765
+ return this._createEmptyResult(startTime, 'dbscan');
766
+ }
767
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
768
+ // Adaptive parameter selection using HNSW neighbors
769
+ const minPts = options.minClusterSize || Math.max(4, Math.floor(Math.log2(itemsWithVectors.length)));
770
+ const eps = options.threshold || await this._estimateOptimalEps(itemsWithVectors, minPts);
771
+ // DBSCAN state tracking
772
+ const NOISE = -1;
773
+ const UNVISITED = 0;
774
+ const visited = new Map();
775
+ const clusterAssignments = new Map();
776
+ let currentClusterId = 1;
777
+ // Process each point
778
+ for (const item of itemsWithVectors) {
779
+ if (visited.get(item.id))
780
+ continue;
781
+ visited.set(item.id, true);
782
+ // Find neighbors using existing HNSW infrastructure for efficiency
783
+ const neighbors = await this._findNeighborsWithinEps(item, itemsWithVectors, eps);
784
+ if (neighbors.length < minPts) {
785
+ // Mark as noise (outlier)
786
+ clusterAssignments.set(item.id, NOISE);
787
+ }
788
+ else {
789
+ // Start new cluster
790
+ await this._expandCluster(item, neighbors, currentClusterId, eps, minPts, itemsWithVectors, visited, clusterAssignments);
791
+ currentClusterId++;
792
+ }
793
+ }
794
+ // Convert DBSCAN results to SemanticCluster format
795
+ const clusters = [];
796
+ const clusterGroups = new Map();
797
+ const outliers = [];
798
+ // Group items by cluster assignment
799
+ for (const [itemId, clusterId] of clusterAssignments) {
800
+ if (clusterId === NOISE) {
801
+ outliers.push(itemId);
802
+ }
803
+ else {
804
+ if (!clusterGroups.has(clusterId)) {
805
+ clusterGroups.set(clusterId, []);
806
+ }
807
+ clusterGroups.get(clusterId).push(itemId);
808
+ }
809
+ }
810
+ // Create SemanticCluster objects
811
+ for (const [clusterId, memberIds] of clusterGroups) {
812
+ if (memberIds.length > 0) {
813
+ const members = itemsWithVectors.filter(item => memberIds.includes(item.id));
814
+ clusters.push({
815
+ id: `dbscan-${clusterId}`,
816
+ centroid: await this._calculateCentroidFromItems(members),
817
+ members: memberIds,
818
+ size: memberIds.length,
819
+ confidence: await this._calculateDBSCANClusterConfidence(members, eps),
820
+ label: await this._generateClusterLabel(members, 'dbscan'),
821
+ metadata: {
822
+ clustering: 'dbscan',
823
+ clusterId,
824
+ eps,
825
+ minPts,
826
+ isDensityBased: true
827
+ }
828
+ });
829
+ }
830
+ }
831
+ // Handle outliers - optionally create outlier cluster or assign to nearest
832
+ if (outliers.length > 0 && options.includeOutliers) {
833
+ const outlierMembers = itemsWithVectors.filter(item => outliers.includes(item.id));
834
+ clusters.push({
835
+ id: 'dbscan-outliers',
836
+ centroid: await this._calculateCentroidFromItems(outlierMembers),
837
+ members: outliers,
838
+ size: outliers.length,
839
+ confidence: 0.1, // Low confidence for outliers
840
+ label: 'Outliers',
841
+ metadata: {
842
+ clustering: 'dbscan',
843
+ isOutlierCluster: true,
844
+ eps,
845
+ minPts
846
+ }
847
+ });
848
+ }
849
+ return {
850
+ clusters,
851
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'dbscan'),
852
+ metadata: {
853
+ totalItems: itemIds.length,
854
+ clustersFound: clusters.length,
855
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
856
+ outlierCount: outliers.length,
857
+ eps,
858
+ minPts,
859
+ timestamp: new Date()
860
+ }
861
+ };
862
+ }
863
+ /**
864
+ * GRAPH COMMUNITY DETECTION: Uses existing verb relationships for clustering
865
+ */
866
+ async _performGraphClustering(items, options) {
867
+ const startTime = performance.now();
868
+ const itemIds = items || await this._getAllItemIds();
869
+ if (itemIds.length === 0) {
870
+ return this._createEmptyResult(startTime, 'graph');
871
+ }
872
+ // Build graph from existing verb relationships
873
+ const graph = await this._buildGraphFromVerbs(itemIds, options);
874
+ // Detect communities using modularity optimization
875
+ const communities = await this._detectCommunities(graph, options);
876
+ // Enhance communities with vector similarity for boundary refinement
877
+ const refinedCommunities = await this._refineCommunitiesWithVectors(communities, options);
878
+ // Convert to SemanticCluster format with Triple Intelligence labeling
879
+ const clusters = [];
880
+ for (let i = 0; i < refinedCommunities.length; i++) {
881
+ const community = refinedCommunities[i];
882
+ if (community.members.length > 0) {
883
+ const members = await this._getItemsWithMetadata(community.members);
884
+ // Use Triple Intelligence for intelligent cluster labeling
885
+ const clusterLabel = await this._generateIntelligentClusterLabel(members, 'graph');
886
+ const clusterCentroid = await this._calculateCentroidFromItems(members);
887
+ clusters.push({
888
+ id: `graph-${i}`,
889
+ centroid: clusterCentroid,
890
+ members: community.members,
891
+ size: community.members.length,
892
+ confidence: community.modularity || 0.7,
893
+ label: clusterLabel,
894
+ metadata: {
895
+ clustering: 'graph',
896
+ communityId: i,
897
+ modularity: community.modularity,
898
+ graphDensity: community.density,
899
+ strongestConnections: community.strongestConnections
900
+ }
901
+ });
902
+ }
903
+ }
904
+ return {
905
+ clusters,
906
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'graph'),
907
+ metadata: {
908
+ totalItems: itemIds.length,
909
+ clustersFound: clusters.length,
910
+ averageClusterSize: clusters.reduce((sum, c) => sum + c.size, 0) / clusters.length || 0,
911
+ averageModularity: clusters.reduce((sum, c) => sum + (c.metadata?.modularity || 0), 0) / clusters.length || 0,
912
+ timestamp: new Date()
913
+ }
914
+ };
915
+ }
916
+ /**
917
+ * MULTI-MODAL FUSION: Combines vector + graph + semantic + Triple Intelligence
918
+ */
919
+ async _performMultiModalClustering(items, options) {
920
+ const startTime = performance.now();
921
+ const itemIds = items || await this._getAllItemIds();
922
+ if (itemIds.length === 0) {
923
+ return this._createEmptyResult(startTime, 'multimodal');
924
+ }
925
+ // Run multiple clustering algorithms in parallel
926
+ const [vectorClusters, graphClusters, semanticClusters] = await Promise.all([
927
+ this._performHierarchicalClustering(itemIds, { ...options, algorithm: 'hierarchical' }),
928
+ this._performGraphClustering(itemIds, { ...options, algorithm: 'graph' }),
929
+ this._performSemanticClustering(itemIds, { ...options, algorithm: 'semantic' })
930
+ ]);
931
+ // Fuse results using intelligent consensus with Triple Intelligence
932
+ const fusedClusters = await this._fuseClusteringResultsWithTripleIntelligence([vectorClusters.clusters, graphClusters.clusters, semanticClusters.clusters], options);
933
+ return {
934
+ clusters: fusedClusters,
935
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'multimodal'),
936
+ metadata: {
937
+ totalItems: itemIds.length,
938
+ clustersFound: fusedClusters.length,
939
+ averageClusterSize: fusedClusters.reduce((sum, c) => sum + c.size, 0) / fusedClusters.length || 0,
940
+ fusionMethod: 'triple_intelligence_consensus',
941
+ componentAlgorithms: ['hierarchical', 'graph', 'semantic'],
942
+ timestamp: new Date()
943
+ }
944
+ };
945
+ }
946
+ /**
947
+ * SAMPLED CLUSTERING: For very large datasets using intelligent sampling
948
+ */
949
+ async _performSampledClustering(items, options) {
950
+ const startTime = performance.now();
951
+ const itemIds = items || await this._getAllItemIds();
952
+ if (itemIds.length === 0) {
953
+ return this._createEmptyResult(startTime, 'sampled');
954
+ }
955
+ const sampleSize = Math.min(options.sampleSize || 1000, itemIds.length);
956
+ const strategy = options.strategy || 'diverse';
957
+ // Intelligent sampling using existing infrastructure
958
+ const sample = await this._getSampleUsingStrategy(itemIds, sampleSize, strategy);
959
+ // Cluster the sample using the best algorithm for the sample size
960
+ const sampleResult = await this._performHierarchicalClustering(sample, {
961
+ ...options,
962
+ maxClusters: Math.min(options.maxClusters || 50, Math.ceil(sample.length / 10))
963
+ });
964
+ // Project clusters back to full dataset using HNSW neighbors
965
+ const projectedClusters = await this._projectClustersToFullDataset(sampleResult.clusters, itemIds, sample);
966
+ return {
967
+ clusters: projectedClusters,
968
+ metrics: this._createPerformanceMetrics(startTime, itemIds.length, 'sampled'),
969
+ metadata: {
970
+ totalItems: itemIds.length,
971
+ sampleSize: sample.length,
972
+ samplingStrategy: strategy,
973
+ clustersFound: projectedClusters.length,
974
+ averageClusterSize: projectedClusters.reduce((sum, c) => sum + c.size, 0) / projectedClusters.length || 0,
975
+ timestamp: new Date()
976
+ }
977
+ };
978
+ }
979
+ // Similarity implementation methods
980
+ async _similarityById(id1, id2, options) {
981
+ // Get vectors for both items
982
+ const item1 = await this.brain.getNoun(id1);
983
+ const item2 = await this.brain.getNoun(id2);
984
+ if (!item1 || !item2) {
985
+ return 0;
986
+ }
987
+ return this._similarityByVector(item1.vector, item2.vector, options);
988
+ }
989
+ async _similarityByVector(v1, v2, options) {
990
+ const metric = options.metric || this.config.similarityMetric || 'cosine';
991
+ let score = 0;
992
+ switch (metric) {
993
+ case 'cosine':
994
+ score = 1 - cosineDistance(v1, v2);
995
+ break;
996
+ case 'euclidean':
997
+ score = 1 / (1 + euclideanDistance(v1, v2));
998
+ break;
999
+ case 'manhattan':
1000
+ score = 1 / (1 + this._manhattanDistance(v1, v2));
1001
+ break;
1002
+ default:
1003
+ score = 1 - cosineDistance(v1, v2);
1004
+ }
1005
+ if (options.detailed) {
1006
+ return {
1007
+ score: options.normalized !== false ? Math.max(0, Math.min(1, score)) : score,
1008
+ confidence: this._calculateConfidence(score, v1, v2),
1009
+ explanation: this._generateSimilarityExplanation(score, metric),
1010
+ metric
1011
+ };
1012
+ }
1013
+ return options.normalized !== false ? Math.max(0, Math.min(1, score)) : score;
1014
+ }
1015
+ async _similarityByText(text1, text2, options) {
1016
+ // Convert text to vectors using brain's embedding function
1017
+ const vector1 = await this.brain.embed(text1);
1018
+ const vector2 = await this.brain.embed(text2);
1019
+ return this._similarityByVector(vector1, vector2, options);
1020
+ }
1021
+ // Utility methods for internal operations
1022
+ _isId(value) {
1023
+ return typeof value === 'string' &&
1024
+ (value.length === 36 && value.includes('-')) || // UUID-like
1025
+ (value.length > 10 && !value.includes(' ')); // ID-like string
1026
+ }
1027
+ _isVector(value) {
1028
+ return Array.isArray(value) &&
1029
+ value.length > 0 &&
1030
+ typeof value[0] === 'number';
1031
+ }
1032
+ async _convertToVector(input) {
1033
+ if (this._isVector(input)) {
1034
+ return input;
1035
+ }
1036
+ else if (this._isId(input)) {
1037
+ const item = await this.brain.getNoun(input);
1038
+ return item?.vector || [];
1039
+ }
1040
+ else if (typeof input === 'string') {
1041
+ return await this.brain.embed(input);
1042
+ }
1043
+ else {
1044
+ throw new Error(`Cannot convert input to vector: ${typeof input}`);
1045
+ }
1046
+ }
1047
+ _createSimilarityKey(a, b, options) {
1048
+ const aKey = typeof a === 'object' ? JSON.stringify(a).substring(0, 50) : String(a);
1049
+ const bKey = typeof b === 'object' ? JSON.stringify(b).substring(0, 50) : String(b);
1050
+ return `${aKey}|${bKey}|${JSON.stringify(options)}`;
1051
+ }
1052
+ _createClusteringKey(items, options) {
1053
+ const itemsKey = items ? [...items].sort().join(',') : 'all';
1054
+ return `clustering:${itemsKey}:${JSON.stringify(options)}`;
1055
+ }
1056
+ _cacheResult(key, result, cache) {
1057
+ if (cache.size >= (this.config.cacheSize || 1000)) {
1058
+ // Remove oldest entries (simple LRU)
1059
+ const firstKey = cache.keys().next().value;
1060
+ if (firstKey)
1061
+ cache.delete(firstKey);
1062
+ }
1063
+ cache.set(key, result);
1064
+ }
1065
+ _trackPerformance(operation, startTime, itemCount, algorithm) {
1066
+ if (!this.config.performanceTracking)
1067
+ return;
1068
+ const metrics = {
1069
+ executionTime: performance.now() - startTime,
1070
+ memoryUsed: 0, // Would implement actual memory tracking
1071
+ itemsProcessed: itemCount,
1072
+ cacheHits: 0, // Would track actual cache hits
1073
+ cacheMisses: 0, // Would track actual cache misses
1074
+ algorithm
1075
+ };
1076
+ if (!this.performanceMetrics.has(operation)) {
1077
+ this.performanceMetrics.set(operation, []);
1078
+ }
1079
+ this.performanceMetrics.get(operation).push(metrics);
1080
+ }
1081
+ _createPerformanceMetrics(startTime, itemCount, algorithm) {
1082
+ return {
1083
+ executionTime: performance.now() - startTime,
1084
+ memoryUsed: 0,
1085
+ itemsProcessed: itemCount,
1086
+ cacheHits: 0,
1087
+ cacheMisses: 0,
1088
+ algorithm
1089
+ };
1090
+ }
1091
+ _initializeCleanupTimer() {
1092
+ // Periodically clean up caches to prevent memory leaks
1093
+ setInterval(() => {
1094
+ if (this.similarityCache.size > (this.config.cacheSize || 1000)) {
1095
+ this.similarityCache.clear();
1096
+ }
1097
+ if (this.clusterCache.size > (this.config.cacheSize || 1000)) {
1098
+ this.clusterCache.clear();
1099
+ }
1100
+ if (this.hierarchyCache.size > (this.config.cacheSize || 1000)) {
1101
+ this.hierarchyCache.clear();
1102
+ }
1103
+ if (this.neighborsCache.size > (this.config.cacheSize || 1000)) {
1104
+ this.neighborsCache.clear();
1105
+ }
1106
+ }, 300000); // Clean every 5 minutes
1107
+ }
1108
+ // ===== GRAPH COMMUNITY DETECTION UTILITIES =====
1109
+ /**
1110
+ * Build graph structure from existing verb relationships
1111
+ */
1112
+ async _buildGraphFromVerbs(itemIds, options) {
1113
+ const nodes = new Set(itemIds);
1114
+ const edges = new Map();
1115
+ const verbWeights = new Map();
1116
+ // Initialize verb relationship weights
1117
+ const relationshipWeights = {
1118
+ 'creates': 1.0,
1119
+ 'partOf': 0.9,
1120
+ 'contains': 0.9,
1121
+ 'relatedTo': 0.7,
1122
+ 'references': 0.6,
1123
+ 'causes': 0.8,
1124
+ 'dependsOn': 0.8,
1125
+ 'memberOf': 0.9,
1126
+ 'worksWith': 0.7,
1127
+ 'communicates': 0.6
1128
+ };
1129
+ // Get all verbs connecting the items
1130
+ for (const sourceId of itemIds) {
1131
+ const sourceVerbs = await this.brain.getVerbsForNoun(sourceId);
1132
+ for (const verb of sourceVerbs) {
1133
+ const targetId = verb.target;
1134
+ if (nodes.has(targetId) && sourceId !== targetId) {
1135
+ // Initialize edge map if needed
1136
+ if (!edges.has(sourceId)) {
1137
+ edges.set(sourceId, new Map());
1138
+ }
1139
+ // Calculate edge weight from verb type and metadata
1140
+ const verbType = verb.verb;
1141
+ const baseWeight = relationshipWeights[verbType] || 0.5;
1142
+ const confidenceWeight = verb.confidence || 1.0;
1143
+ const weight = baseWeight * confidenceWeight;
1144
+ // Add or strengthen edge
1145
+ const currentWeight = edges.get(sourceId)?.get(targetId) || 0;
1146
+ edges.get(sourceId).set(targetId, Math.min(currentWeight + weight, 1.0));
1147
+ // Make graph undirected by adding reverse edge
1148
+ if (!edges.has(targetId)) {
1149
+ edges.set(targetId, new Map());
1150
+ }
1151
+ const reverseWeight = edges.get(targetId)?.get(sourceId) || 0;
1152
+ edges.get(targetId).set(sourceId, Math.min(reverseWeight + weight, 1.0));
1153
+ }
1154
+ }
1155
+ }
1156
+ return {
1157
+ nodes: Array.from(nodes),
1158
+ edges,
1159
+ nodeCount: nodes.size,
1160
+ edgeCount: Array.from(edges.values()).reduce((sum, edgeMap) => sum + edgeMap.size, 0) / 2 // Undirected
1161
+ };
1162
+ }
1163
+ /**
1164
+ * Detect communities using Louvain modularity optimization
1165
+ */
1166
+ async _detectCommunities(graph, options) {
1167
+ const { nodes, edges } = graph;
1168
+ // Initialize each node as its own community
1169
+ const communities = new Map();
1170
+ nodes.forEach((node, index) => communities.set(node, index));
1171
+ const totalWeight = this._calculateTotalWeight(edges);
1172
+ let improved = true;
1173
+ let iteration = 0;
1174
+ const maxIterations = 50;
1175
+ // Louvain algorithm: iteratively move nodes to communities that maximize modularity
1176
+ while (improved && iteration < maxIterations) {
1177
+ improved = false;
1178
+ iteration++;
1179
+ for (const node of nodes) {
1180
+ const currentCommunity = communities.get(node);
1181
+ let bestCommunity = currentCommunity;
1182
+ let bestGain = 0;
1183
+ // Consider neighboring communities
1184
+ const neighborCommunities = this._getNeighborCommunities(node, edges, communities);
1185
+ for (const neighborCommunity of neighborCommunities) {
1186
+ if (neighborCommunity !== currentCommunity) {
1187
+ const gain = this._calculateModularityGain(node, currentCommunity, neighborCommunity, edges, communities, totalWeight);
1188
+ if (gain > bestGain) {
1189
+ bestGain = gain;
1190
+ bestCommunity = neighborCommunity;
1191
+ }
1192
+ }
1193
+ }
1194
+ // Move node if beneficial
1195
+ if (bestCommunity !== currentCommunity) {
1196
+ communities.set(node, bestCommunity);
1197
+ improved = true;
1198
+ }
1199
+ }
1200
+ }
1201
+ // Group nodes by final community assignment
1202
+ const communityGroups = new Map();
1203
+ for (const [node, communityId] of communities) {
1204
+ if (!communityGroups.has(communityId)) {
1205
+ communityGroups.set(communityId, []);
1206
+ }
1207
+ communityGroups.get(communityId).push(node);
1208
+ }
1209
+ // Convert to Community objects with metadata
1210
+ const result = [];
1211
+ for (const [communityId, members] of communityGroups) {
1212
+ if (members.length >= (options.minClusterSize || 2)) {
1213
+ const modularity = this._calculateCommunityModularity(members, edges, totalWeight);
1214
+ const density = this._calculateCommunityDensity(members, edges);
1215
+ const strongestConnections = this._findStrongestConnections(members, edges, 3);
1216
+ result.push({
1217
+ id: communityId,
1218
+ members,
1219
+ modularity,
1220
+ density,
1221
+ strongestConnections
1222
+ });
1223
+ }
1224
+ }
1225
+ return result;
1226
+ }
1227
+ /**
1228
+ * Refine community boundaries using vector similarity
1229
+ */
1230
+ async _refineCommunitiesWithVectors(communities, options) {
1231
+ const refined = [];
1232
+ for (const community of communities) {
1233
+ const membersWithVectors = await this._getItemsWithVectors(community.members);
1234
+ // Check if community is coherent in vector space
1235
+ const vectorCoherence = await this._calculateVectorCoherence(membersWithVectors);
1236
+ if (vectorCoherence > 0.3) {
1237
+ // Community is coherent, keep as is
1238
+ refined.push(community);
1239
+ }
1240
+ else {
1241
+ // Split community using vector-based sub-clustering
1242
+ const subClusters = await this._performHierarchicalClustering(community.members, { ...options, maxClusters: Math.ceil(community.members.length / 5) });
1243
+ // Convert sub-clusters to communities
1244
+ for (let i = 0; i < subClusters.clusters.length; i++) {
1245
+ const subCluster = subClusters.clusters[i];
1246
+ refined.push({
1247
+ id: community.id * 1000 + i, // Unique sub-community ID
1248
+ members: subCluster.members,
1249
+ modularity: community.modularity * 0.8, // Slightly lower modularity for sub-communities
1250
+ density: community.density,
1251
+ strongestConnections: []
1252
+ });
1253
+ }
1254
+ }
1255
+ }
1256
+ return refined;
1257
+ }
1258
+ // ===== SEMANTIC CLUSTERING UTILITIES =====
1259
+ /**
1260
+ * Get items with their metadata including noun types
1261
+ */
1262
+ async _getItemsWithMetadata(itemIds) {
1263
+ const items = await Promise.all(itemIds.map(async (id) => {
1264
+ const noun = await this.brain.getNoun(id);
1265
+ return {
1266
+ id,
1267
+ vector: noun?.vector || [],
1268
+ metadata: noun?.data || {},
1269
+ nounType: noun?.noun || 'concept',
1270
+ label: noun?.label || id,
1271
+ data: noun?.data
1272
+ };
1273
+ }));
1274
+ return items.filter(item => item.vector.length > 0);
1275
+ }
1276
+ /**
1277
+ * Group items by their semantic noun types
1278
+ */
1279
+ _groupBySemanticType(items) {
1280
+ const groups = new Map();
1281
+ for (const item of items) {
1282
+ const type = item.nounType;
1283
+ if (!groups.has(type)) {
1284
+ groups.set(type, []);
1285
+ }
1286
+ groups.get(type).push(item);
1287
+ }
1288
+ return groups;
1289
+ }
1290
+ // Placeholder implementations for complex operations
1291
+ async _getAllItemIds() {
1292
+ // Get all noun IDs from the brain
1293
+ const stats = await this.brain.getStatistics();
1294
+ if (!stats.totalNodes || stats.totalNodes === 0) {
1295
+ return [];
1296
+ }
1297
+ // Use a simple approach: get recent items or sample
1298
+ // In practice, this could be optimized with pagination
1299
+ const items = await this.brain.getRecent(Math.min(stats.totalNodes, 10000));
1300
+ return items.map((item) => item.id);
1301
+ }
1302
+ async _getTotalItemCount() {
1303
+ const stats = await this.brain.getStatistics();
1304
+ return stats.totalNodes || 0;
1305
+ }
1306
+ // ===== GRAPH ALGORITHM SUPPORTING METHODS =====
1307
+ _calculateTotalWeight(edges) {
1308
+ let total = 0;
1309
+ for (const edgeMap of edges.values()) {
1310
+ for (const weight of edgeMap.values()) {
1311
+ total += weight;
1312
+ }
1313
+ }
1314
+ return total / 2; // Undirected graph, so divide by 2
1315
+ }
1316
+ _getNeighborCommunities(node, edges, communities) {
1317
+ const neighborCommunities = new Set();
1318
+ const nodeEdges = edges.get(node);
1319
+ if (nodeEdges) {
1320
+ for (const neighbor of nodeEdges.keys()) {
1321
+ const neighborCommunity = communities.get(neighbor);
1322
+ if (neighborCommunity !== undefined) {
1323
+ neighborCommunities.add(neighborCommunity);
1324
+ }
1325
+ }
1326
+ }
1327
+ return neighborCommunities;
1328
+ }
1329
+ _calculateModularityGain(node, oldCommunity, newCommunity, edges, communities, totalWeight) {
1330
+ // Calculate the degree of the node
1331
+ const nodeDegree = this._getNodeDegree(node, edges);
1332
+ // Calculate edges to old and new communities
1333
+ const edgesToOld = this._getEdgesToCommunity(node, oldCommunity, edges, communities);
1334
+ const edgesToNew = this._getEdgesToCommunity(node, newCommunity, edges, communities);
1335
+ // Calculate community weights
1336
+ const oldCommunityWeight = this._getCommunityWeight(oldCommunity, edges, communities);
1337
+ const newCommunityWeight = this._getCommunityWeight(newCommunity, edges, communities);
1338
+ // Modularity gain calculation (simplified)
1339
+ const oldContrib = edgesToOld - (nodeDegree * oldCommunityWeight) / (2 * totalWeight);
1340
+ const newContrib = edgesToNew - (nodeDegree * newCommunityWeight) / (2 * totalWeight);
1341
+ return newContrib - oldContrib;
1342
+ }
1343
+ _getNodeDegree(node, edges) {
1344
+ const nodeEdges = edges.get(node);
1345
+ if (!nodeEdges)
1346
+ return 0;
1347
+ return Array.from(nodeEdges.values()).reduce((sum, weight) => sum + weight, 0);
1348
+ }
1349
+ _getEdgesToCommunity(node, community, edges, communities) {
1350
+ const nodeEdges = edges.get(node);
1351
+ if (!nodeEdges)
1352
+ return 0;
1353
+ let total = 0;
1354
+ for (const [neighbor, weight] of nodeEdges) {
1355
+ if (communities.get(neighbor) === community) {
1356
+ total += weight;
1357
+ }
1358
+ }
1359
+ return total;
1360
+ }
1361
+ _getCommunityWeight(community, edges, communities) {
1362
+ let total = 0;
1363
+ for (const [node, nodeCommunity] of communities) {
1364
+ if (nodeCommunity === community) {
1365
+ total += this._getNodeDegree(node, edges);
1366
+ }
1367
+ }
1368
+ return total;
1369
+ }
1370
+ _calculateCommunityModularity(members, edges, totalWeight) {
1371
+ if (members.length < 2)
1372
+ return 0;
1373
+ let internalWeight = 0;
1374
+ let totalDegree = 0;
1375
+ for (const member of members) {
1376
+ const memberEdges = edges.get(member);
1377
+ if (memberEdges) {
1378
+ totalDegree += Array.from(memberEdges.values()).reduce((sum, w) => sum + w, 0);
1379
+ // Count internal edges
1380
+ for (const [neighbor, weight] of memberEdges) {
1381
+ if (members.includes(neighbor)) {
1382
+ internalWeight += weight;
1383
+ }
1384
+ }
1385
+ }
1386
+ }
1387
+ internalWeight /= 2; // Undirected graph
1388
+ const expectedInternal = (totalDegree * totalDegree) / (4 * totalWeight);
1389
+ return (internalWeight / totalWeight) - expectedInternal / totalWeight;
1390
+ }
1391
+ _calculateCommunityDensity(members, edges) {
1392
+ if (members.length < 2)
1393
+ return 0;
1394
+ let actualEdges = 0;
1395
+ const maxPossibleEdges = (members.length * (members.length - 1)) / 2;
1396
+ for (const member of members) {
1397
+ const memberEdges = edges.get(member);
1398
+ if (memberEdges) {
1399
+ for (const neighbor of memberEdges.keys()) {
1400
+ if (members.includes(neighbor) && member < neighbor) { // Avoid double counting
1401
+ actualEdges++;
1402
+ }
1403
+ }
1404
+ }
1405
+ }
1406
+ return actualEdges / maxPossibleEdges;
1407
+ }
1408
+ _findStrongestConnections(members, edges, limit) {
1409
+ const connections = [];
1410
+ for (const member of members) {
1411
+ const memberEdges = edges.get(member);
1412
+ if (memberEdges) {
1413
+ for (const [neighbor, weight] of memberEdges) {
1414
+ if (members.includes(neighbor) && member < neighbor) { // Avoid duplicates
1415
+ connections.push({ from: member, to: neighbor, weight });
1416
+ }
1417
+ }
1418
+ }
1419
+ }
1420
+ return connections
1421
+ .sort((a, b) => b.weight - a.weight)
1422
+ .slice(0, limit);
1423
+ }
1424
+ // ===== K-MEANS UTILITIES =====
1425
+ /**
1426
+ * Get items with their vector representations
1427
+ */
1428
+ async _getItemsWithVectors(itemIds) {
1429
+ const items = await Promise.all(itemIds.map(async (id) => {
1430
+ const noun = await this.brain.getNoun(id);
1431
+ return {
1432
+ id,
1433
+ vector: noun?.vector || []
1434
+ };
1435
+ }));
1436
+ return items.filter(item => item.vector.length > 0);
1437
+ }
1438
+ /**
1439
+ * Calculate centroid from items using existing distance functions
1440
+ */
1441
+ async _calculateCentroidFromItems(items) {
1442
+ if (items.length === 0)
1443
+ return [];
1444
+ if (items.length === 1)
1445
+ return [...items[0].vector];
1446
+ const dimensions = items[0].vector.length;
1447
+ const centroid = new Array(dimensions).fill(0);
1448
+ for (const item of items) {
1449
+ for (let i = 0; i < dimensions; i++) {
1450
+ centroid[i] += item.vector[i];
1451
+ }
1452
+ }
1453
+ for (let i = 0; i < dimensions; i++) {
1454
+ centroid[i] /= items.length;
1455
+ }
1456
+ return centroid;
1457
+ }
1458
+ /**
1459
+ * Initialize centroids using k-means++ algorithm for better convergence
1460
+ */
1461
+ async _initializeCentroidsKMeansPlusPlus(items, k) {
1462
+ const centroids = [];
1463
+ // Choose first centroid randomly
1464
+ const firstIdx = Math.floor(Math.random() * items.length);
1465
+ centroids.push([...items[firstIdx].vector]);
1466
+ // Choose remaining centroids using k-means++ probability
1467
+ for (let i = 1; i < k; i++) {
1468
+ const distances = items.map(item => {
1469
+ // Find distance to closest existing centroid
1470
+ let minDist = Infinity;
1471
+ for (const centroid of centroids) {
1472
+ const dist = this._calculateSquaredDistance(item.vector, centroid);
1473
+ minDist = Math.min(minDist, dist);
1474
+ }
1475
+ return minDist;
1476
+ });
1477
+ // Choose next centroid with probability proportional to squared distance
1478
+ const totalDistance = distances.reduce((sum, d) => sum + d, 0);
1479
+ const target = Math.random() * totalDistance;
1480
+ let cumulative = 0;
1481
+ for (let j = 0; j < distances.length; j++) {
1482
+ cumulative += distances[j];
1483
+ if (cumulative >= target) {
1484
+ centroids.push([...items[j].vector]);
1485
+ break;
1486
+ }
1487
+ }
1488
+ }
1489
+ return centroids;
1490
+ }
1491
+ /**
1492
+ * Assign points to nearest centroids using existing distance functions
1493
+ */
1494
+ async _assignPointsToCentroids(items, centroids) {
1495
+ const assignments = [];
1496
+ for (const item of items) {
1497
+ let bestCentroid = 0;
1498
+ let minDistance = Infinity;
1499
+ for (let i = 0; i < centroids.length; i++) {
1500
+ const distance = this._calculateSquaredDistance(item.vector, centroids[i]);
1501
+ if (distance < minDistance) {
1502
+ minDistance = distance;
1503
+ bestCentroid = i;
1504
+ }
1505
+ }
1506
+ assignments.push(bestCentroid);
1507
+ }
1508
+ return assignments;
1509
+ }
1510
+ /**
1511
+ * Update centroids based on current assignments
1512
+ */
1513
+ async _updateCentroids(items, assignments, k) {
1514
+ const newCentroids = [];
1515
+ for (let i = 0; i < k; i++) {
1516
+ const clusterItems = items.filter((_, idx) => assignments[idx] === i);
1517
+ if (clusterItems.length > 0) {
1518
+ newCentroids.push(await this._calculateCentroidFromItems(clusterItems));
1519
+ }
1520
+ else {
1521
+ // Keep old centroid if no items assigned
1522
+ newCentroids.push(new Array(items[0].vector.length).fill(0));
1523
+ }
1524
+ }
1525
+ return newCentroids;
1526
+ }
1527
+ /**
1528
+ * Calculate how much assignments have changed between iterations
1529
+ */
1530
+ _calculateAssignmentChangeRate(oldAssignments, newAssignments) {
1531
+ if (oldAssignments.length !== newAssignments.length)
1532
+ return 1.0;
1533
+ let changes = 0;
1534
+ for (let i = 0; i < oldAssignments.length; i++) {
1535
+ if (oldAssignments[i] !== newAssignments[i]) {
1536
+ changes++;
1537
+ }
1538
+ }
1539
+ return changes / oldAssignments.length;
1540
+ }
1541
+ /**
1542
+ * Calculate cluster confidence for k-means clusters
1543
+ */
1544
+ async _calculateKMeansClusterConfidence(clusterItems, centroid) {
1545
+ if (clusterItems.length <= 1)
1546
+ return 1.0;
1547
+ // Calculate average distance to centroid
1548
+ const distances = clusterItems.map(item => this._calculateSquaredDistance(item.vector, centroid));
1549
+ const avgDistance = distances.reduce((sum, d) => sum + d, 0) / distances.length;
1550
+ // Calculate standard deviation
1551
+ const variance = distances.reduce((sum, d) => sum + Math.pow(d - avgDistance, 2), 0) / distances.length;
1552
+ const stdDev = Math.sqrt(variance);
1553
+ // Higher confidence for tighter clusters
1554
+ const tightness = avgDistance > 0 ? Math.max(0, 1 - (stdDev / avgDistance)) : 1.0;
1555
+ return Math.min(1.0, tightness);
1556
+ }
1557
+ // ===== DBSCAN UTILITIES =====
1558
+ /**
1559
+ * Estimate optimal eps parameter using k-nearest neighbor distances
1560
+ */
1561
+ async _estimateOptimalEps(items, minPts) {
1562
+ if (items.length < minPts)
1563
+ return 0.5;
1564
+ // Calculate k-nearest neighbor distances for each point
1565
+ const kDistances = [];
1566
+ for (const item of items) {
1567
+ const distances = [];
1568
+ for (const otherItem of items) {
1569
+ if (item.id !== otherItem.id) {
1570
+ const distance = Math.sqrt(this._calculateSquaredDistance(item.vector, otherItem.vector));
1571
+ distances.push(distance);
1572
+ }
1573
+ }
1574
+ distances.sort((a, b) => a - b);
1575
+ // Get k-th nearest neighbor distance (minPts-1 because we exclude self)
1576
+ const kthDistance = distances[Math.min(minPts - 1, distances.length - 1)];
1577
+ kDistances.push(kthDistance);
1578
+ }
1579
+ kDistances.sort((a, b) => a - b);
1580
+ // Use knee point detection - find point with maximum curvature
1581
+ // Simplified approach: use 90th percentile of k-distances
1582
+ const percentileIndex = Math.floor(kDistances.length * 0.9);
1583
+ return kDistances[percentileIndex] || 0.5;
1584
+ }
1585
+ /**
1586
+ * Find neighbors within epsilon distance using efficient vector operations
1587
+ */
1588
+ async _findNeighborsWithinEps(item, allItems, eps) {
1589
+ const neighbors = [];
1590
+ const epsSquared = eps * eps;
1591
+ for (const otherItem of allItems) {
1592
+ if (item.id !== otherItem.id) {
1593
+ const distanceSquared = this._calculateSquaredDistance(item.vector, otherItem.vector);
1594
+ if (distanceSquared <= epsSquared) {
1595
+ neighbors.push(otherItem);
1596
+ }
1597
+ }
1598
+ }
1599
+ return neighbors;
1600
+ }
1601
+ /**
1602
+ * Expand DBSCAN cluster by adding density-reachable points
1603
+ */
1604
+ async _expandCluster(seedPoint, neighbors, clusterId, eps, minPts, allItems, visited, clusterAssignments) {
1605
+ clusterAssignments.set(seedPoint.id, clusterId);
1606
+ let i = 0;
1607
+ while (i < neighbors.length) {
1608
+ const neighbor = neighbors[i];
1609
+ if (!visited.get(neighbor.id)) {
1610
+ visited.set(neighbor.id, true);
1611
+ const neighborNeighbors = await this._findNeighborsWithinEps(neighbor, allItems, eps);
1612
+ if (neighborNeighbors.length >= minPts) {
1613
+ // Add new neighbors to the list (union operation)
1614
+ for (const newNeighbor of neighborNeighbors) {
1615
+ if (!neighbors.some(n => n.id === newNeighbor.id)) {
1616
+ neighbors.push(newNeighbor);
1617
+ }
1618
+ }
1619
+ }
1620
+ }
1621
+ // If neighbor is not assigned to any cluster, assign to current cluster
1622
+ if (!clusterAssignments.has(neighbor.id)) {
1623
+ clusterAssignments.set(neighbor.id, clusterId);
1624
+ }
1625
+ i++;
1626
+ }
1627
+ }
1628
+ /**
1629
+ * Calculate DBSCAN cluster confidence based on density
1630
+ */
1631
+ async _calculateDBSCANClusterConfidence(clusterItems, eps) {
1632
+ if (clusterItems.length <= 1)
1633
+ return 1.0;
1634
+ // Calculate average density within the cluster
1635
+ let totalNeighborCount = 0;
1636
+ const epsSquared = eps * eps;
1637
+ for (const item of clusterItems) {
1638
+ let neighborCount = 0;
1639
+ for (const otherItem of clusterItems) {
1640
+ if (item !== otherItem) {
1641
+ const distanceSquared = this._calculateSquaredDistance(item.vector, otherItem.vector);
1642
+ if (distanceSquared <= epsSquared) {
1643
+ neighborCount++;
1644
+ }
1645
+ }
1646
+ }
1647
+ totalNeighborCount += neighborCount;
1648
+ }
1649
+ const avgDensity = totalNeighborCount / clusterItems.length;
1650
+ const maxPossibleDensity = clusterItems.length - 1;
1651
+ return maxPossibleDensity > 0 ? avgDensity / maxPossibleDensity : 1.0;
1652
+ }
1653
+ // ===== VECTOR UTILITIES =====
1654
+ /**
1655
+ * Calculate squared Euclidean distance (more efficient than sqrt)
1656
+ */
1657
+ _calculateSquaredDistance(vec1, vec2) {
1658
+ if (vec1.length !== vec2.length)
1659
+ return Infinity;
1660
+ let sum = 0;
1661
+ for (let i = 0; i < vec1.length; i++) {
1662
+ const diff = vec1[i] - vec2[i];
1663
+ sum += diff * diff;
1664
+ }
1665
+ return sum;
1666
+ }
1667
+ /**
1668
+ * Calculate vector coherence for community refinement
1669
+ */
1670
+ async _calculateVectorCoherence(items) {
1671
+ if (items.length <= 1)
1672
+ return 1.0;
1673
+ const centroid = await this._calculateCentroidFromItems(items);
1674
+ // Calculate average distance to centroid
1675
+ const distances = items.map(item => Math.sqrt(this._calculateSquaredDistance(item.vector, centroid)));
1676
+ const avgDistance = distances.reduce((sum, d) => sum + d, 0) / distances.length;
1677
+ // Calculate cohesion as inverse of average distance (normalized)
1678
+ const maxDistance = Math.sqrt(centroid.length); // Rough normalization
1679
+ return Math.max(0, 1 - (avgDistance / maxDistance));
1680
+ }
1681
+ async _getItemsByField(field) {
1682
+ // Implementation would query items by metadata field
1683
+ return [];
1684
+ }
1685
+ // ===== TRIPLE INTELLIGENCE INTEGRATION =====
1686
+ /**
1687
+ * Generate intelligent cluster labels using Triple Intelligence
1688
+ */
1689
+ async _generateIntelligentClusterLabel(members, algorithm) {
1690
+ if (members.length === 0)
1691
+ return `${algorithm}-cluster`;
1692
+ try {
1693
+ // Lazy load Triple Intelligence if available
1694
+ const TripleIntelligenceEngine = await import('../triple/TripleIntelligence.js')
1695
+ .then(m => m.TripleIntelligenceEngine)
1696
+ .catch(() => null);
1697
+ if (!TripleIntelligenceEngine) {
1698
+ return this._generateClusterLabel(members, algorithm);
1699
+ }
1700
+ const intelligence = new TripleIntelligenceEngine(this.brain);
1701
+ // Extract key features from cluster members
1702
+ const memberData = members.map(m => ({
1703
+ id: m.id,
1704
+ type: m.nounType,
1705
+ label: m.label,
1706
+ data: m.data
1707
+ }));
1708
+ // Use Triple Intelligence to analyze the cluster and generate label
1709
+ const prompt = `Analyze this cluster of ${memberData.length} related items and provide a concise, descriptive label (2-4 words):
1710
+
1711
+ Items:
1712
+ ${memberData.map(item => `- ${item.label || item.id} (${item.type})`).join('\n')}
1713
+
1714
+ The items were grouped using ${algorithm} clustering. What is the most appropriate label that captures their common theme or relationship?`;
1715
+ const response = await intelligence.find({
1716
+ like: prompt,
1717
+ limit: 1
1718
+ });
1719
+ // Extract clean label from response
1720
+ const firstResult = response[0];
1721
+ const label = (firstResult?.metadata?.content || firstResult?.id || `${algorithm}-cluster`)
1722
+ .toString()
1723
+ .replace(/^(Label:|Cluster:|Theme:)/i, '')
1724
+ .trim()
1725
+ .replace(/['"]/g, '')
1726
+ .slice(0, 50);
1727
+ return label || `${algorithm}-cluster`;
1728
+ }
1729
+ catch (error) {
1730
+ // Fallback to simple labeling
1731
+ return this._generateClusterLabel(members, algorithm);
1732
+ }
1733
+ }
1734
+ /**
1735
+ * Generate simple cluster labels based on semantic analysis
1736
+ */
1737
+ async _generateClusterLabel(members, algorithm) {
1738
+ if (members.length === 0)
1739
+ return `${algorithm}-cluster`;
1740
+ // Analyze member types and create descriptive label
1741
+ const typeCount = new Map();
1742
+ for (const member of members) {
1743
+ const type = member.nounType || 'unknown';
1744
+ typeCount.set(type, (typeCount.get(type) || 0) + 1);
1745
+ }
1746
+ // Find most common type
1747
+ let dominantType = 'mixed';
1748
+ let maxCount = 0;
1749
+ for (const [type, count] of typeCount) {
1750
+ if (count > maxCount) {
1751
+ maxCount = count;
1752
+ dominantType = type;
1753
+ }
1754
+ }
1755
+ // Generate label based on dominant type and size
1756
+ const size = members.length;
1757
+ const typePercent = Math.round((maxCount / size) * 100);
1758
+ if (typePercent >= 80) {
1759
+ return `${dominantType} group (${size})`;
1760
+ }
1761
+ else if (typePercent >= 60) {
1762
+ return `mostly ${dominantType} (${size})`;
1763
+ }
1764
+ else {
1765
+ const topTypes = Array.from(typeCount.entries())
1766
+ .sort((a, b) => b[1] - a[1])
1767
+ .slice(0, 2)
1768
+ .map(([type]) => type)
1769
+ .join(' & ');
1770
+ return `${topTypes} cluster (${size})`;
1771
+ }
1772
+ }
1773
+ /**
1774
+ * Fuse clustering results using Triple Intelligence consensus
1775
+ */
1776
+ async _fuseClusteringResultsWithTripleIntelligence(clusterSets, options) {
1777
+ if (clusterSets.length === 0)
1778
+ return [];
1779
+ if (clusterSets.length === 1)
1780
+ return clusterSets[0];
1781
+ // Simple weighted fusion if Triple Intelligence is not available
1782
+ const [vectorClusters, graphClusters, semanticClusters] = clusterSets;
1783
+ // Create consensus mapping of items to clusters
1784
+ const itemClusterMapping = new Map();
1785
+ // Collect all cluster assignments
1786
+ const allAlgorithms = ['vector', 'graph', 'semantic'];
1787
+ const algorithmClusters = [vectorClusters, graphClusters, semanticClusters];
1788
+ for (let i = 0; i < algorithmClusters.length; i++) {
1789
+ const algorithm = allAlgorithms[i];
1790
+ const clusters = algorithmClusters[i] || [];
1791
+ for (const cluster of clusters) {
1792
+ for (const memberId of cluster.members) {
1793
+ if (!itemClusterMapping.has(memberId)) {
1794
+ itemClusterMapping.set(memberId, []);
1795
+ }
1796
+ itemClusterMapping.get(memberId).push({
1797
+ algorithm,
1798
+ clusterId: cluster.id,
1799
+ confidence: cluster.confidence
1800
+ });
1801
+ }
1802
+ }
1803
+ }
1804
+ // Find consensus clusters - items that appear together in multiple algorithms
1805
+ const consensusClusters = new Map();
1806
+ const processedItems = new Set();
1807
+ for (const [itemId, assignments] of itemClusterMapping) {
1808
+ if (processedItems.has(itemId))
1809
+ continue;
1810
+ // Find all items that consistently cluster with this item
1811
+ const consensusGroup = new Set([itemId]);
1812
+ // Look for items that share clusters with this item across algorithms
1813
+ for (const assignment of assignments) {
1814
+ const sameClusterItems = this._getItemsInCluster(assignment.clusterId, clusterSets);
1815
+ for (const otherItem of sameClusterItems) {
1816
+ if (!processedItems.has(otherItem) && otherItem !== itemId) {
1817
+ const otherAssignments = itemClusterMapping.get(otherItem) || [];
1818
+ // Check if items co-occur in multiple algorithms
1819
+ const coOccurrences = this._countCoOccurrences(assignments, otherAssignments);
1820
+ if (coOccurrences >= 2) { // Must appear together in at least 2 algorithms
1821
+ consensusGroup.add(otherItem);
1822
+ }
1823
+ }
1824
+ }
1825
+ }
1826
+ // Mark all items in this consensus group as processed
1827
+ for (const groupItem of consensusGroup) {
1828
+ processedItems.add(groupItem);
1829
+ }
1830
+ if (consensusGroup.size >= (options.minClusterSize || 2)) {
1831
+ const consensusId = `fusion-${consensusClusters.size}`;
1832
+ consensusClusters.set(consensusId, consensusGroup);
1833
+ }
1834
+ }
1835
+ // Convert consensus groups to SemanticCluster objects
1836
+ const fusedClusters = [];
1837
+ for (const [clusterId, memberSet] of consensusClusters) {
1838
+ const members = Array.from(memberSet);
1839
+ const membersWithMetadata = await this._getItemsWithMetadata(members);
1840
+ if (membersWithMetadata.length > 0) {
1841
+ const centroid = await this._calculateCentroidFromItems(membersWithMetadata);
1842
+ const label = await this._generateIntelligentClusterLabel(membersWithMetadata, 'multimodal');
1843
+ // Calculate fusion confidence based on algorithm agreement
1844
+ const avgConfidence = this._calculateFusionConfidence(members, itemClusterMapping);
1845
+ fusedClusters.push({
1846
+ id: clusterId,
1847
+ centroid,
1848
+ members,
1849
+ size: members.length,
1850
+ confidence: avgConfidence,
1851
+ label,
1852
+ metadata: {
1853
+ clustering: 'multimodal_fusion',
1854
+ algorithms: allAlgorithms,
1855
+ fusionMethod: 'consensus',
1856
+ agreementLevel: avgConfidence
1857
+ }
1858
+ });
1859
+ }
1860
+ }
1861
+ return fusedClusters;
1862
+ }
1863
+ /**
1864
+ * Get items in a specific cluster from cluster sets
1865
+ */
1866
+ _getItemsInCluster(clusterId, clusterSets) {
1867
+ for (const clusterSet of clusterSets) {
1868
+ for (const cluster of clusterSet) {
1869
+ if (cluster.id === clusterId) {
1870
+ return cluster.members;
1871
+ }
1872
+ }
1873
+ }
1874
+ return [];
1875
+ }
1876
+ /**
1877
+ * Count co-occurrences between two sets of assignments
1878
+ */
1879
+ _countCoOccurrences(assignments1, assignments2) {
1880
+ let count = 0;
1881
+ for (const assignment1 of assignments1) {
1882
+ for (const assignment2 of assignments2) {
1883
+ if (assignment1.algorithm === assignment2.algorithm &&
1884
+ assignment1.clusterId === assignment2.clusterId) {
1885
+ count++;
1886
+ }
1887
+ }
1888
+ }
1889
+ return count;
1890
+ }
1891
+ /**
1892
+ * Calculate fusion confidence based on algorithm agreement
1893
+ */
1894
+ _calculateFusionConfidence(members, itemClusterMapping) {
1895
+ let totalConfidence = 0;
1896
+ let totalAssignments = 0;
1897
+ for (const member of members) {
1898
+ const assignments = itemClusterMapping.get(member) || [];
1899
+ for (const assignment of assignments) {
1900
+ totalConfidence += assignment.confidence;
1901
+ totalAssignments++;
1902
+ }
1903
+ }
1904
+ return totalAssignments > 0 ? totalConfidence / totalAssignments : 0.5;
1905
+ }
1906
+ // ===== ADDITIONAL UTILITIES =====
1907
+ /**
1908
+ * Generate empty clustering result for edge cases
1909
+ */
1910
+ _createEmptyResult(startTime, algorithm) {
1911
+ return {
1912
+ clusters: [],
1913
+ metrics: this._createPerformanceMetrics(startTime, 0, algorithm),
1914
+ metadata: {
1915
+ totalItems: 0,
1916
+ clustersFound: 0,
1917
+ averageClusterSize: 0,
1918
+ timestamp: new Date()
1919
+ }
1920
+ };
1921
+ }
1922
+ // ===== SAMPLING AND PROJECTION UTILITIES =====
1923
+ /**
1924
+ * Get sample using specified strategy for large dataset clustering
1925
+ */
1926
+ async _getSampleUsingStrategy(itemIds, sampleSize, strategy) {
1927
+ if (itemIds.length <= sampleSize)
1928
+ return itemIds;
1929
+ switch (strategy) {
1930
+ case 'random':
1931
+ return this._getRandomSample(itemIds, sampleSize);
1932
+ case 'diverse':
1933
+ return await this._getDiverseSample(itemIds, sampleSize);
1934
+ case 'recent':
1935
+ return await this._getRecentSample(itemIds, sampleSize);
1936
+ case 'important':
1937
+ return await this._getImportantSample(itemIds, sampleSize);
1938
+ default:
1939
+ return this._getRandomSample(itemIds, sampleSize);
1940
+ }
1941
+ }
1942
+ /**
1943
+ * Random sampling
1944
+ */
1945
+ _getRandomSample(itemIds, sampleSize) {
1946
+ const shuffled = [...itemIds].sort(() => Math.random() - 0.5);
1947
+ return shuffled.slice(0, sampleSize);
1948
+ }
1949
+ /**
1950
+ * Diverse sampling using vector space distribution
1951
+ */
1952
+ async _getDiverseSample(itemIds, sampleSize) {
1953
+ // Get vectors for all items
1954
+ const itemsWithVectors = await this._getItemsWithVectors(itemIds);
1955
+ if (itemsWithVectors.length <= sampleSize) {
1956
+ return itemIds;
1957
+ }
1958
+ // Use k-means++ style selection for diversity
1959
+ const sample = [];
1960
+ // Select first item randomly
1961
+ let remainingItems = [...itemsWithVectors];
1962
+ const firstIdx = Math.floor(Math.random() * remainingItems.length);
1963
+ sample.push(remainingItems[firstIdx].id);
1964
+ remainingItems.splice(firstIdx, 1);
1965
+ // Select remaining items based on maximum distance to already selected items
1966
+ while (sample.length < sampleSize && remainingItems.length > 0) {
1967
+ let maxDistance = -1;
1968
+ let bestIdx = 0;
1969
+ for (let i = 0; i < remainingItems.length; i++) {
1970
+ const item = remainingItems[i];
1971
+ // Find minimum distance to any selected item
1972
+ let minDistanceToSelected = Infinity;
1973
+ for (const selectedId of sample) {
1974
+ const selectedItem = itemsWithVectors.find(it => it.id === selectedId);
1975
+ if (selectedItem) {
1976
+ const distance = Math.sqrt(this._calculateSquaredDistance(item.vector, selectedItem.vector));
1977
+ minDistanceToSelected = Math.min(minDistanceToSelected, distance);
1978
+ }
1979
+ }
1980
+ // Select item with maximum minimum distance (most diverse)
1981
+ if (minDistanceToSelected > maxDistance) {
1982
+ maxDistance = minDistanceToSelected;
1983
+ bestIdx = i;
1984
+ }
1985
+ }
1986
+ sample.push(remainingItems[bestIdx].id);
1987
+ remainingItems.splice(bestIdx, 1);
1988
+ }
1989
+ return sample;
1990
+ }
1991
+ /**
1992
+ * Recent sampling based on creation time
1993
+ */
1994
+ async _getRecentSample(itemIds, sampleSize) {
1995
+ const items = await Promise.all(itemIds.map(async (id) => {
1996
+ const noun = await this.brain.getNoun(id);
1997
+ return {
1998
+ id,
1999
+ createdAt: noun?.createdAt || new Date(0)
2000
+ };
2001
+ }));
2002
+ // Sort by creation time (most recent first)
2003
+ items.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
2004
+ return items.slice(0, sampleSize).map(item => item.id);
2005
+ }
2006
+ /**
2007
+ * Important sampling based on connection count and metadata
2008
+ */
2009
+ async _getImportantSample(itemIds, sampleSize) {
2010
+ const items = await Promise.all(itemIds.map(async (id) => {
2011
+ const verbs = await this.brain.getVerbsForNoun(id);
2012
+ const noun = await this.brain.getNoun(id);
2013
+ // Calculate importance score
2014
+ const connectionScore = verbs.length;
2015
+ const dataScore = noun?.data ? Object.keys(noun.data).length : 0;
2016
+ const importanceScore = connectionScore * 2 + dataScore;
2017
+ return {
2018
+ id,
2019
+ importance: importanceScore
2020
+ };
2021
+ }));
2022
+ // Sort by importance (highest first)
2023
+ items.sort((a, b) => b.importance - a.importance);
2024
+ return items.slice(0, sampleSize).map(item => item.id);
2025
+ }
2026
+ /**
2027
+ * Project clusters back to full dataset using HNSW neighbors
2028
+ */
2029
+ async _projectClustersToFullDataset(sampleClusters, fullItemIds, sampleIds) {
2030
+ const projectedClusters = [];
2031
+ // Create mapping of items not in sample
2032
+ const remainingItems = fullItemIds.filter(id => !sampleIds.includes(id));
2033
+ // For each sample cluster, find which remaining items should belong to it
2034
+ for (const sampleCluster of sampleClusters) {
2035
+ const projectedMembers = [...sampleCluster.members];
2036
+ // For each remaining item, find its nearest neighbors in the sample
2037
+ for (const itemId of remainingItems) {
2038
+ try {
2039
+ const neighbors = await this.brain.neural.neighbors(itemId, {
2040
+ limit: 3,
2041
+ includeMetadata: false
2042
+ });
2043
+ // Check if any of the nearest neighbors belong to this cluster
2044
+ let belongsToCluster = false;
2045
+ for (const neighbor of neighbors.neighbors) {
2046
+ if (sampleCluster.members.includes(neighbor.id) && neighbor.similarity > 0.7) {
2047
+ belongsToCluster = true;
2048
+ break;
2049
+ }
2050
+ }
2051
+ if (belongsToCluster) {
2052
+ projectedMembers.push(itemId);
2053
+ }
2054
+ }
2055
+ catch (error) {
2056
+ // Skip items that can't be processed
2057
+ continue;
2058
+ }
2059
+ }
2060
+ // Create projected cluster
2061
+ if (projectedMembers.length > 0) {
2062
+ const membersWithVectors = await this._getItemsWithVectors(projectedMembers);
2063
+ projectedClusters.push({
2064
+ ...sampleCluster,
2065
+ id: `projected-${sampleCluster.id}`,
2066
+ members: projectedMembers,
2067
+ size: projectedMembers.length,
2068
+ centroid: await this._calculateCentroidFromItems(membersWithVectors),
2069
+ confidence: sampleCluster.confidence * 0.9, // Slightly lower confidence for projection
2070
+ metadata: {
2071
+ ...sampleCluster.metadata,
2072
+ isProjected: true,
2073
+ originalSampleSize: sampleCluster.size,
2074
+ projectedSize: projectedMembers.length
2075
+ }
2076
+ });
2077
+ }
2078
+ }
2079
+ return projectedClusters;
2080
+ }
2081
+ _groupByDomain(items, field) {
2082
+ const groups = new Map();
2083
+ for (const item of items) {
2084
+ const domain = item.metadata?.[field] || 'unknown';
2085
+ if (!groups.has(domain)) {
2086
+ groups.set(domain, []);
2087
+ }
2088
+ groups.get(domain).push(item);
2089
+ }
2090
+ return groups;
2091
+ }
2092
+ _calculateDomainConfidence(cluster, domainItems) {
2093
+ // Calculate how well this cluster represents the domain
2094
+ return 0.8; // Placeholder
2095
+ }
2096
+ async _findCrossDomainMembers(cluster, threshold) {
2097
+ // Find members that might belong to multiple domains
2098
+ return [];
2099
+ }
2100
+ async _findCrossDomainClusters(clusters, threshold) {
2101
+ // Find clusters that span multiple domains
2102
+ return [];
2103
+ }
2104
+ async _getItemsByTimeWindow(timeField, window) {
2105
+ // Implementation would query items within time window
2106
+ return [];
2107
+ }
2108
+ async _calculateTemporalMetrics(cluster, items, timeField) {
2109
+ // Calculate temporal characteristics of the cluster
2110
+ return {
2111
+ trend: 'stable',
2112
+ metrics: {
2113
+ startTime: new Date(),
2114
+ endTime: new Date(),
2115
+ peakTime: new Date(),
2116
+ frequency: 1
2117
+ }
2118
+ };
2119
+ }
2120
+ _mergeOverlappingTemporalClusters(clusters) {
2121
+ // Merge clusters from overlapping time windows
2122
+ return clusters;
2123
+ }
2124
+ _adjustThresholdAdaptively(clusters, currentThreshold) {
2125
+ // Adjust clustering threshold based on results
2126
+ return currentThreshold || 0.6;
2127
+ }
2128
+ async _calculateItemToClusterSimilarity(itemId, cluster) {
2129
+ // Calculate similarity between an item and a cluster centroid
2130
+ return 0.5; // Placeholder
2131
+ }
2132
+ async _recalculateClusterCentroid(cluster) {
2133
+ // Recalculate centroid after adding new members
2134
+ return cluster.centroid;
2135
+ }
2136
+ async _calculateSimilarity(id1, id2) {
2137
+ return await this.similar(id1, id2);
2138
+ }
2139
+ _sortNeighbors(neighbors, sortBy) {
2140
+ switch (sortBy) {
2141
+ case 'similarity':
2142
+ neighbors.sort((a, b) => b.similarity - a.similarity);
2143
+ break;
2144
+ case 'importance':
2145
+ neighbors.sort((a, b) => (b.metadata?.importance || 0) - (a.metadata?.importance || 0));
2146
+ break;
2147
+ case 'recency':
2148
+ neighbors.sort((a, b) => {
2149
+ const aTime = new Date(a.metadata?.createdAt || 0).getTime();
2150
+ const bTime = new Date(b.metadata?.createdAt || 0).getTime();
2151
+ return bTime - aTime;
2152
+ });
2153
+ break;
2154
+ }
2155
+ }
2156
+ async _buildSemanticHierarchy(item, options) {
2157
+ // Build semantic hierarchy around an item
2158
+ return {
2159
+ self: { id: item.id, vector: item.vector, metadata: item.metadata }
2160
+ };
2161
+ }
2162
+ async _detectOutliersClusterBased(threshold, options) {
2163
+ // Detect outliers using cluster-based method
2164
+ return [];
2165
+ }
2166
+ async _detectOutliersIsolation(threshold, options) {
2167
+ // Detect outliers using isolation forest method
2168
+ return [];
2169
+ }
2170
+ async _detectOutliersStatistical(threshold, options) {
2171
+ // Detect outliers using statistical methods
2172
+ return [];
2173
+ }
2174
+ async _generateVisualizationNodes(maxNodes, options) {
2175
+ // Generate nodes for visualization
2176
+ return [];
2177
+ }
2178
+ async _generateVisualizationEdges(nodes, options) {
2179
+ // Generate edges for visualization
2180
+ return [];
2181
+ }
2182
+ async _generateVisualizationClusters(nodes) {
2183
+ // Generate cluster information for visualization
2184
+ return [];
2185
+ }
2186
+ async _applyLayoutAlgorithm(nodes, edges, algorithm, dimensions) {
2187
+ // Apply layout algorithm to position nodes
2188
+ return nodes.map((node, i) => ({
2189
+ ...node,
2190
+ x: Math.random() * 100,
2191
+ y: Math.random() * 100,
2192
+ z: dimensions === 3 ? Math.random() * 100 : undefined
2193
+ }));
2194
+ }
2195
+ _manhattanDistance(v1, v2) {
2196
+ let sum = 0;
2197
+ for (let i = 0; i < v1.length; i++) {
2198
+ sum += Math.abs(v1[i] - v2[i]);
2199
+ }
2200
+ return sum;
2201
+ }
2202
+ _calculateConfidence(score, v1, v2) {
2203
+ // Calculate confidence based on vector magnitudes and score
2204
+ return Math.min(1, score + 0.1);
2205
+ }
2206
+ _generateSimilarityExplanation(score, metric) {
2207
+ if (score > 0.9)
2208
+ return `Very high similarity using ${metric} distance`;
2209
+ if (score > 0.7)
2210
+ return `High similarity using ${metric} distance`;
2211
+ if (score > 0.5)
2212
+ return `Moderate similarity using ${metric} distance`;
2213
+ if (score > 0.3)
2214
+ return `Low similarity using ${metric} distance`;
2215
+ return `Very low similarity using ${metric} distance`;
2216
+ }
2217
+ // ===== PUBLIC API: UTILITY & STATUS =====
2218
+ /**
2219
+ * Get performance metrics for monitoring
2220
+ */
2221
+ getPerformanceMetrics(operation) {
2222
+ if (operation) {
2223
+ return this.performanceMetrics.get(operation) || [];
2224
+ }
2225
+ return this.performanceMetrics;
2226
+ }
2227
+ /**
2228
+ * Clear all caches
2229
+ */
2230
+ clearCaches() {
2231
+ this.similarityCache.clear();
2232
+ this.clusterCache.clear();
2233
+ this.hierarchyCache.clear();
2234
+ this.neighborsCache.clear();
2235
+ }
2236
+ /**
2237
+ * Get cache statistics
2238
+ */
2239
+ getCacheStats() {
2240
+ const maxSize = this.config.cacheSize || 1000;
2241
+ return {
2242
+ similarity: { size: this.similarityCache.size, maxSize },
2243
+ clustering: { size: this.clusterCache.size, maxSize },
2244
+ hierarchy: { size: this.hierarchyCache.size, maxSize },
2245
+ neighbors: { size: this.neighborsCache.size, maxSize }
2246
+ };
2247
+ }
2248
+ // ===== MISSING HELPER METHODS =====
2249
+ /**
2250
+ * Analyze data characteristics for algorithm selection
2251
+ */
2252
+ async _analyzeDataCharacteristics(itemIds) {
2253
+ const size = itemIds.length;
2254
+ const items = await this._getItemsWithMetadata(itemIds.slice(0, Math.min(100, size)));
2255
+ const dimensionality = items.length > 0 ? items[0].vector.length : 0;
2256
+ // Calculate graph density by sampling verb relationships
2257
+ let connectionCount = 0;
2258
+ const sampleSize = Math.min(50, itemIds.length);
2259
+ for (let i = 0; i < sampleSize; i++) {
2260
+ try {
2261
+ const verbs = await this.brain.getVerbsForNoun(itemIds[i]);
2262
+ connectionCount += verbs.length;
2263
+ }
2264
+ catch (error) {
2265
+ // Skip items that can't be processed
2266
+ continue;
2267
+ }
2268
+ }
2269
+ const graphDensity = sampleSize > 0 ? connectionCount / (sampleSize * sampleSize) : 0;
2270
+ // Calculate type distribution
2271
+ const typeDistribution = {};
2272
+ for (const item of items) {
2273
+ const type = item.nounType;
2274
+ typeDistribution[type] = (typeDistribution[type] || 0) + 1;
2275
+ }
2276
+ return { size, dimensionality, graphDensity, typeDistribution };
2277
+ }
2278
+ /**
2279
+ * Calculate centroid for a group of items
2280
+ */
2281
+ async _calculateGroupCentroid(items) {
2282
+ return this._calculateCentroidFromItems(items);
2283
+ }
2284
+ /**
2285
+ * Cluster within semantic type using vector similarity
2286
+ */
2287
+ async _clusterWithinSemanticType(items, options) {
2288
+ if (items.length <= 2) {
2289
+ return [{
2290
+ id: `semantic-single-${items[0]?.nounType || 'unknown'}`,
2291
+ centroid: await this._calculateCentroidFromItems(items),
2292
+ members: items.map(item => item.id),
2293
+ size: items.length,
2294
+ confidence: 1.0,
2295
+ label: `${items[0]?.nounType || 'unknown'} group`,
2296
+ metadata: { clustering: 'semantic', nounType: items[0]?.nounType }
2297
+ }];
2298
+ }
2299
+ // Use hierarchical clustering for within-type clustering
2300
+ const result = await this._performHierarchicalClustering(items.map(item => item.id), { ...options, maxClusters: Math.min(Math.ceil(items.length / 3), 10) });
2301
+ return result.clusters;
2302
+ }
2303
+ /**
2304
+ * Find cross-type connections via verbs
2305
+ */
2306
+ async _findCrossTypeConnections(typeGroups, _options) {
2307
+ const connections = [];
2308
+ // Convert Map to array for compatibility
2309
+ const typeGroupsArray = Array.from(typeGroups.entries());
2310
+ for (const [fromType, fromItems] of typeGroupsArray) {
2311
+ for (const [toType, toItems] of typeGroupsArray) {
2312
+ if (fromType !== toType) {
2313
+ for (const fromItem of fromItems.slice(0, 10)) { // Sample to avoid N^2
2314
+ try {
2315
+ const verbs = await this.brain.getVerbsForNoun(fromItem.id);
2316
+ for (const verb of verbs) {
2317
+ const toItem = toItems.find(item => item.id === verb.target);
2318
+ if (toItem) {
2319
+ connections.push({
2320
+ from: fromItem.id,
2321
+ to: toItem.id,
2322
+ strength: verb.confidence || 0.7
2323
+ });
2324
+ }
2325
+ }
2326
+ }
2327
+ catch (error) {
2328
+ // Skip items that can't be processed
2329
+ continue;
2330
+ }
2331
+ }
2332
+ }
2333
+ }
2334
+ }
2335
+ return connections.filter(conn => conn.strength > 0.5);
2336
+ }
2337
+ /**
2338
+ * Merge semantic clusters based on connections
2339
+ */
2340
+ async _mergeSemanticClusters(clusters, connections) {
2341
+ // Simple merging based on strong connections
2342
+ const merged = [...clusters];
2343
+ for (const connection of connections) {
2344
+ if (connection.strength > 0.8) {
2345
+ const fromCluster = merged.find(c => c.members.includes(connection.from));
2346
+ const toCluster = merged.find(c => c.members.includes(connection.to));
2347
+ if (fromCluster && toCluster && fromCluster !== toCluster) {
2348
+ // Merge clusters
2349
+ fromCluster.members = [...fromCluster.members, ...toCluster.members];
2350
+ fromCluster.size = fromCluster.members.length;
2351
+ fromCluster.label = `merged ${fromCluster.label}`;
2352
+ // Remove merged cluster
2353
+ const index = merged.indexOf(toCluster);
2354
+ if (index > -1)
2355
+ merged.splice(index, 1);
2356
+ }
2357
+ }
2358
+ }
2359
+ return merged;
2360
+ }
2361
+ /**
2362
+ * Get optimal clustering level for HNSW
2363
+ */
2364
+ _getOptimalClusteringLevel(totalItems) {
2365
+ if (totalItems < 100)
2366
+ return 0;
2367
+ if (totalItems < 1000)
2368
+ return 1;
2369
+ if (totalItems < 10000)
2370
+ return 2;
2371
+ return 3;
2372
+ }
2373
+ /**
2374
+ * Get nodes at HNSW level
2375
+ */
2376
+ async _getHNSWLevelNodes(level) {
2377
+ // This would use the HNSW index to get nodes at specified level
2378
+ // For now, return a sample of all items
2379
+ const allItems = await this._getAllItemIds();
2380
+ const sampleSize = Math.max(10, Math.floor(allItems.length / Math.pow(2, level + 1)));
2381
+ return this._getRandomSample(allItems, sampleSize);
2382
+ }
2383
+ /**
2384
+ * Find cluster members using HNSW neighbors
2385
+ */
2386
+ async _findClusterMembers(levelNode, _allItems, threshold) {
2387
+ try {
2388
+ const neighbors = await this.brain.neural.neighbors(levelNode, {
2389
+ limit: Math.min(50, Math.floor(_allItems.length / 10)),
2390
+ minSimilarity: threshold
2391
+ });
2392
+ return [levelNode, ...neighbors.neighbors.map((n) => n.id)];
2393
+ }
2394
+ catch (error) {
2395
+ return [levelNode];
2396
+ }
2397
+ }
2398
+ /**
2399
+ * Calculate hierarchical clustering confidence
2400
+ */
2401
+ async _calculateHierarchicalConfidence(members) {
2402
+ if (members.length <= 1)
2403
+ return 1.0;
2404
+ const items = await this._getItemsWithVectors(members);
2405
+ const coherence = await this._calculateVectorCoherence(items);
2406
+ return coherence;
2407
+ }
2408
+ /**
2409
+ * Assign unassigned items to nearest clusters
2410
+ */
2411
+ async _assignUnassignedItems(unassigned, clusters) {
2412
+ for (const itemId of unassigned) {
2413
+ if (clusters.length === 0)
2414
+ break;
2415
+ try {
2416
+ const noun = await this.brain.getNoun(itemId);
2417
+ const itemVector = noun?.vector || [];
2418
+ if (itemVector.length === 0)
2419
+ continue;
2420
+ let bestCluster = clusters[0];
2421
+ let minDistance = Infinity;
2422
+ for (const cluster of clusters) {
2423
+ const distance = Math.sqrt(this._calculateSquaredDistance(itemVector, cluster.centroid));
2424
+ if (distance < minDistance) {
2425
+ minDistance = distance;
2426
+ bestCluster = cluster;
2427
+ }
2428
+ }
2429
+ bestCluster.members.push(itemId);
2430
+ bestCluster.size++;
2431
+ }
2432
+ catch (error) {
2433
+ // Skip items that can't be processed
2434
+ continue;
2435
+ }
2436
+ }
2437
+ }
2438
+ }
2439
+ //# sourceMappingURL=improvedNeuralAPI.js.map