@soulcraft/brainy 2.14.3 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1002,6 +1002,12 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
1002
1002
  * @returns Array of verbs of the specified type
1003
1003
  */
1004
1004
  getVerbsByType(type: string): Promise<GraphVerb[]>;
1005
+ /**
1006
+ * Get all verbs associated with a specific noun (both as source and target)
1007
+ * @param nounId The ID of the noun
1008
+ * @returns Array of verbs where the noun is either source or target
1009
+ */
1010
+ getVerbsForNoun(nounId: string): Promise<GraphVerb[]>;
1005
1011
  /**
1006
1012
  * Delete a verb
1007
1013
  * @param id The ID of the verb to delete
@@ -2985,6 +2985,30 @@ export class BrainyData {
2985
2985
  throw new Error(`Failed to get verbs by type ${type}: ${error}`);
2986
2986
  }
2987
2987
  }
2988
+ /**
2989
+ * Get all verbs associated with a specific noun (both as source and target)
2990
+ * @param nounId The ID of the noun
2991
+ * @returns Array of verbs where the noun is either source or target
2992
+ */
2993
+ async getVerbsForNoun(nounId) {
2994
+ await this.ensureInitialized();
2995
+ try {
2996
+ // Get verbs where this noun is the source
2997
+ const sourceVerbs = await this.getVerbsBySource(nounId);
2998
+ // Get verbs where this noun is the target
2999
+ const targetVerbs = await this.getVerbsByTarget(nounId);
3000
+ // Combine and deduplicate (in case a verb somehow appears in both - shouldn't happen but safety first)
3001
+ const verbMap = new Map();
3002
+ for (const verb of [...sourceVerbs, ...targetVerbs]) {
3003
+ verbMap.set(verb.id, verb);
3004
+ }
3005
+ return Array.from(verbMap.values());
3006
+ }
3007
+ catch (error) {
3008
+ console.error(`Failed to get verbs for noun ${nounId}:`, error);
3009
+ throw error;
3010
+ }
3011
+ }
2988
3012
  /**
2989
3013
  * Delete a verb
2990
3014
  * @param id The ID of the verb to delete
@@ -18,7 +18,7 @@
18
18
  * Private methods are prefixed with _ and not exposed in public API
19
19
  */
20
20
  import { Vector } from '../coreTypes.js';
21
- import { SemanticCluster, DomainCluster, TemporalCluster, SimilarityOptions, SimilarityResult, NeighborOptions, NeighborsResult, SemanticHierarchy, HierarchyOptions, ClusteringOptions, DomainClusteringOptions, TemporalClusteringOptions, StreamClusteringOptions, VisualizationOptions, VisualizationResult, OutlierOptions, Outlier, StreamingBatch, TimeWindow, PerformanceMetrics, NeuralAPIConfig } from './types.js';
21
+ import { SemanticCluster, DomainCluster, TemporalCluster, EnhancedSemanticCluster, SimilarityOptions, SimilarityResult, NeighborOptions, NeighborsResult, SemanticHierarchy, HierarchyOptions, ClusteringOptions, DomainClusteringOptions, TemporalClusteringOptions, StreamClusteringOptions, VisualizationOptions, VisualizationResult, OutlierOptions, Outlier, StreamingBatch, TimeWindow, PerformanceMetrics, NeuralAPIConfig } from './types.js';
22
22
  export declare class ImprovedNeuralAPI {
23
23
  private brain;
24
24
  private config;
@@ -71,6 +71,16 @@ export declare class ImprovedNeuralAPI {
71
71
  * Incremental clustering - add new items to existing clusters
72
72
  */
73
73
  updateClusters(newItems: string[], options?: ClusteringOptions): Promise<SemanticCluster[]>;
74
+ /**
75
+ * Enhanced clustering with relationship analysis using verbs
76
+ * Returns clusters with intra-cluster and inter-cluster relationship information
77
+ *
78
+ * Scalable for millions of nodes - uses efficient batching and filtering
79
+ */
80
+ clustersWithRelationships(input?: string | string[] | ClusteringOptions, options?: {
81
+ batchSize?: number;
82
+ maxRelationships?: number;
83
+ }): Promise<EnhancedSemanticCluster[]>;
74
84
  /**
75
85
  * Find K-nearest semantic neighbors
76
86
  */
@@ -276,6 +286,7 @@ export declare class ImprovedNeuralAPI {
276
286
  private _calculateItemToClusterSimilarity;
277
287
  private _recalculateClusterCentroid;
278
288
  private _calculateSimilarity;
289
+ private _calculateEdgeWeight;
279
290
  private _sortNeighbors;
280
291
  private _buildSemanticHierarchy;
281
292
  private _detectOutliersClusterBased;
@@ -354,6 +354,161 @@ export class ImprovedNeuralAPI {
354
354
  throw new ClusteringError(`Failed to update clusters: ${errorMessage}`, { newItems, options });
355
355
  }
356
356
  }
357
+ /**
358
+ * Enhanced clustering with relationship analysis using verbs
359
+ * Returns clusters with intra-cluster and inter-cluster relationship information
360
+ *
361
+ * Scalable for millions of nodes - uses efficient batching and filtering
362
+ */
363
+ async clustersWithRelationships(input, options) {
364
+ const startTime = performance.now();
365
+ const batchSize = options?.batchSize || 1000;
366
+ const maxRelationships = options?.maxRelationships || 10000;
367
+ let processedCount = 0;
368
+ try {
369
+ // Get basic clusters first
370
+ const basicClusters = await this.clusters(input);
371
+ if (basicClusters.length === 0) {
372
+ return [];
373
+ }
374
+ // Build member lookup for O(1) cluster membership checking
375
+ const memberToClusterMap = new Map();
376
+ const clusterMap = new Map();
377
+ for (const cluster of basicClusters) {
378
+ clusterMap.set(cluster.id, cluster);
379
+ for (const memberId of cluster.members) {
380
+ memberToClusterMap.set(memberId, cluster.id);
381
+ }
382
+ }
383
+ // Initialize cluster edge collections
384
+ const clusterEdges = new Map();
385
+ for (const cluster of basicClusters) {
386
+ clusterEdges.set(cluster.id, {
387
+ intra: [],
388
+ inter: [],
389
+ edgeTypes: {}
390
+ });
391
+ }
392
+ // Process verbs in batches to handle millions of relationships efficiently
393
+ let hasMoreVerbs = true;
394
+ let offset = 0;
395
+ while (hasMoreVerbs && processedCount < maxRelationships) {
396
+ // Get batch of verbs using proper pagination API
397
+ const verbResult = await this.brain.getVerbs({
398
+ pagination: {
399
+ offset: offset,
400
+ limit: batchSize
401
+ }
402
+ });
403
+ const verbBatch = verbResult.data;
404
+ if (verbBatch.length === 0) {
405
+ hasMoreVerbs = false;
406
+ break;
407
+ }
408
+ // Process this batch
409
+ for (const verb of verbBatch) {
410
+ if (processedCount >= maxRelationships)
411
+ break;
412
+ const sourceClusterId = memberToClusterMap.get(verb.sourceId);
413
+ const targetClusterId = memberToClusterMap.get(verb.targetId);
414
+ // Skip verbs that don't involve any clustered nodes
415
+ if (!sourceClusterId && !targetClusterId)
416
+ continue;
417
+ const edgeWeight = this._calculateEdgeWeight(verb);
418
+ const edgeType = verb.verb || verb.type || 'relationship';
419
+ if (sourceClusterId && targetClusterId) {
420
+ if (sourceClusterId === targetClusterId) {
421
+ // Intra-cluster relationship
422
+ const edges = clusterEdges.get(sourceClusterId);
423
+ edges.intra.push({
424
+ id: verb.id,
425
+ source: verb.sourceId,
426
+ target: verb.targetId,
427
+ type: edgeType,
428
+ weight: edgeWeight,
429
+ isInterCluster: false,
430
+ sourceCluster: sourceClusterId,
431
+ targetCluster: sourceClusterId
432
+ });
433
+ edges.edgeTypes[edgeType] = (edges.edgeTypes[edgeType] || 0) + 1;
434
+ }
435
+ else {
436
+ // Inter-cluster relationship
437
+ const sourceEdges = clusterEdges.get(sourceClusterId);
438
+ const targetEdges = clusterEdges.get(targetClusterId);
439
+ const edge = {
440
+ id: verb.id,
441
+ source: verb.sourceId,
442
+ target: verb.targetId,
443
+ type: edgeType,
444
+ weight: edgeWeight,
445
+ isInterCluster: true,
446
+ sourceCluster: sourceClusterId,
447
+ targetCluster: targetClusterId
448
+ };
449
+ sourceEdges.inter.push(edge);
450
+ // Don't duplicate - target cluster will see this as incoming
451
+ sourceEdges.edgeTypes[edgeType] = (sourceEdges.edgeTypes[edgeType] || 0) + 1;
452
+ }
453
+ }
454
+ else {
455
+ // One-way relationship to/from cluster
456
+ const clusterId = sourceClusterId || targetClusterId;
457
+ const edges = clusterEdges.get(clusterId);
458
+ edges.inter.push({
459
+ id: verb.id,
460
+ source: verb.sourceId,
461
+ target: verb.targetId,
462
+ type: edgeType,
463
+ weight: edgeWeight,
464
+ isInterCluster: true,
465
+ sourceCluster: sourceClusterId || 'external',
466
+ targetCluster: targetClusterId || 'external'
467
+ });
468
+ edges.edgeTypes[edgeType] = (edges.edgeTypes[edgeType] || 0) + 1;
469
+ }
470
+ processedCount++;
471
+ }
472
+ offset += batchSize;
473
+ // Memory management: if we have too many edges, break early
474
+ const totalEdges = Array.from(clusterEdges.values())
475
+ .reduce((sum, edges) => sum + edges.intra.length + edges.inter.length, 0);
476
+ if (totalEdges >= maxRelationships) {
477
+ console.warn(`Relationship analysis stopped at ${totalEdges} edges to maintain performance`);
478
+ break;
479
+ }
480
+ // Check if we got fewer verbs than batch size (end of data)
481
+ if (verbBatch.length < batchSize) {
482
+ hasMoreVerbs = false;
483
+ }
484
+ }
485
+ // Build enhanced clusters
486
+ const enhancedClusters = [];
487
+ for (const cluster of basicClusters) {
488
+ const edges = clusterEdges.get(cluster.id);
489
+ enhancedClusters.push({
490
+ ...cluster,
491
+ intraClusterEdges: edges.intra,
492
+ interClusterEdges: edges.inter,
493
+ relationshipSummary: {
494
+ totalEdges: edges.intra.length + edges.inter.length,
495
+ intraClusterEdges: edges.intra.length,
496
+ interClusterEdges: edges.inter.length,
497
+ edgeTypes: edges.edgeTypes
498
+ }
499
+ });
500
+ }
501
+ this._trackPerformance('clustersWithRelationships', startTime, processedCount, 'enhanced-scalable');
502
+ return enhancedClusters;
503
+ }
504
+ catch (error) {
505
+ const errorMessage = error instanceof Error ? error.message : String(error);
506
+ throw new ClusteringError(`Failed to perform relationship-aware clustering: ${errorMessage}`, {
507
+ input: typeof input === 'object' ? JSON.stringify(input) : input,
508
+ processedCount: processedCount || 0
509
+ });
510
+ }
511
+ }
357
512
  // ===== PUBLIC API: NEIGHBORS & HIERARCHY =====
358
513
  /**
359
514
  * Find K-nearest semantic neighbors
@@ -2136,6 +2291,29 @@ The items were grouped using ${algorithm} clustering. What is the most appropria
2136
2291
  async _calculateSimilarity(id1, id2) {
2137
2292
  return await this.similar(id1, id2);
2138
2293
  }
2294
+ _calculateEdgeWeight(verb) {
2295
+ // Calculate edge weight based on verb properties
2296
+ let weight = 1.0;
2297
+ // Factor in connection strength if available
2298
+ if (verb.connections && verb.connections instanceof Map) {
2299
+ const connectionCount = verb.connections.size;
2300
+ weight += Math.log(connectionCount + 1) * 0.1;
2301
+ }
2302
+ // Factor in verb type significance
2303
+ const significantVerbs = ['caused', 'created', 'contains', 'implements', 'extends'];
2304
+ if (verb.verb && significantVerbs.includes(verb.verb.toLowerCase())) {
2305
+ weight += 0.3;
2306
+ }
2307
+ // Factor in recency if available
2308
+ if (verb.metadata?.createdAt) {
2309
+ const now = Date.now();
2310
+ const created = new Date(verb.metadata.createdAt).getTime();
2311
+ const daysSinceCreated = (now - created) / (1000 * 60 * 60 * 24);
2312
+ // Newer relationships get slight boost
2313
+ weight += Math.max(0, (30 - daysSinceCreated) / 100);
2314
+ }
2315
+ return Math.min(weight, 3.0); // Cap at 3.0
2316
+ }
2139
2317
  _sortNeighbors(neighbors, sortBy) {
2140
2318
  switch (sortBy) {
2141
2319
  case 'similarity':
@@ -17,6 +17,26 @@ export interface SemanticCluster {
17
17
  cohesion?: number;
18
18
  level?: number;
19
19
  }
20
+ export interface ClusterEdge {
21
+ id: string;
22
+ source: string;
23
+ target: string;
24
+ type: string;
25
+ weight?: number;
26
+ isInterCluster: boolean;
27
+ sourceCluster?: string;
28
+ targetCluster?: string;
29
+ }
30
+ export interface EnhancedSemanticCluster extends SemanticCluster {
31
+ intraClusterEdges: ClusterEdge[];
32
+ interClusterEdges: ClusterEdge[];
33
+ relationshipSummary: {
34
+ totalEdges: number;
35
+ intraClusterEdges: number;
36
+ interClusterEdges: number;
37
+ edgeTypes: Record<string, number>;
38
+ };
39
+ }
20
40
  export interface DomainCluster extends SemanticCluster {
21
41
  domain: string;
22
42
  domainConfidence: number;
@@ -770,28 +770,40 @@ export class FileSystemStorage extends BaseStorage {
770
770
  * Get verbs by source
771
771
  */
772
772
  async getVerbsBySource_internal(sourceId) {
773
- // This method is deprecated and would require loading metadata for each edge
774
- // For now, return empty array since this is not efficiently implementable with new storage pattern
775
- console.warn('getVerbsBySource_internal is deprecated and not efficiently supported in new storage pattern');
776
- return [];
773
+ console.log(`[DEBUG] getVerbsBySource_internal called for sourceId: ${sourceId}`);
774
+ // Use the working pagination method with source filter
775
+ const result = await this.getVerbsWithPagination({
776
+ limit: 10000,
777
+ filter: { sourceId: [sourceId] }
778
+ });
779
+ console.log(`[DEBUG] Found ${result.items.length} verbs for source ${sourceId}`);
780
+ return result.items;
777
781
  }
778
782
  /**
779
783
  * Get verbs by target
780
784
  */
781
785
  async getVerbsByTarget_internal(targetId) {
782
- // This method is deprecated and would require loading metadata for each edge
783
- // For now, return empty array since this is not efficiently implementable with new storage pattern
784
- console.warn('getVerbsByTarget_internal is deprecated and not efficiently supported in new storage pattern');
785
- return [];
786
+ console.log(`[DEBUG] getVerbsByTarget_internal called for targetId: ${targetId}`);
787
+ // Use the working pagination method with target filter
788
+ const result = await this.getVerbsWithPagination({
789
+ limit: 10000,
790
+ filter: { targetId: [targetId] }
791
+ });
792
+ console.log(`[DEBUG] Found ${result.items.length} verbs for target ${targetId}`);
793
+ return result.items;
786
794
  }
787
795
  /**
788
796
  * Get verbs by type
789
797
  */
790
798
  async getVerbsByType_internal(type) {
791
- // This method is deprecated and would require loading metadata for each edge
792
- // For now, return empty array since this is not efficiently implementable with new storage pattern
793
- console.warn('getVerbsByType_internal is deprecated and not efficiently supported in new storage pattern');
794
- return [];
799
+ console.log(`[DEBUG] getVerbsByType_internal called for type: ${type}`);
800
+ // Use the working pagination method with type filter
801
+ const result = await this.getVerbsWithPagination({
802
+ limit: 10000,
803
+ filter: { verbType: [type] }
804
+ });
805
+ console.log(`[DEBUG] Found ${result.items.length} verbs for type ${type}`);
806
+ return result.items;
795
807
  }
796
808
  /**
797
809
  * Get verbs with pagination
@@ -829,9 +841,22 @@ export class FileSystemStorage extends BaseStorage {
829
841
  const edge = JSON.parse(data);
830
842
  // Get metadata which contains the actual verb information
831
843
  const metadata = await this.getVerbMetadata(id);
832
- // If no metadata exists, skip this verb (it's incomplete)
844
+ // If no metadata exists, try to reconstruct basic metadata from filename
833
845
  if (!metadata) {
834
- console.warn(`Verb ${id} has no metadata, skipping`);
846
+ console.warn(`Verb ${id} has no metadata, trying to create minimal verb`);
847
+ // Create minimal GraphVerb without full metadata
848
+ const minimalVerb = {
849
+ id: edge.id,
850
+ vector: edge.vector,
851
+ connections: edge.connections || new Map(),
852
+ sourceId: 'unknown',
853
+ targetId: 'unknown',
854
+ source: 'unknown',
855
+ target: 'unknown',
856
+ type: 'relationship',
857
+ verb: 'relatedTo'
858
+ };
859
+ verbs.push(minimalVerb);
835
860
  continue;
836
861
  }
837
862
  // Convert connections Map to proper format if needed
@@ -133,9 +133,31 @@ export class BaseStorage extends BaseStorageAdapter {
133
133
  createdBy: verb.createdBy,
134
134
  embedding: verb.embedding
135
135
  };
136
- // Save both the HNSWVerb and metadata
137
- await this.saveVerb_internal(hnswVerb);
138
- await this.saveVerbMetadata(verb.id, metadata);
136
+ // Save both the HNSWVerb and metadata atomically
137
+ try {
138
+ console.log(`[DEBUG] Saving verb ${verb.id}: sourceId=${verb.sourceId}, targetId=${verb.targetId}`);
139
+ // Save the HNSWVerb first
140
+ await this.saveVerb_internal(hnswVerb);
141
+ console.log(`[DEBUG] Successfully saved HNSWVerb file for ${verb.id}`);
142
+ // Then save the metadata
143
+ await this.saveVerbMetadata(verb.id, metadata);
144
+ console.log(`[DEBUG] Successfully saved metadata file for ${verb.id}`);
145
+ }
146
+ catch (error) {
147
+ console.error(`[ERROR] Failed to save verb ${verb.id}:`, error);
148
+ // Attempt cleanup - remove verb file if metadata failed
149
+ try {
150
+ const verbExists = await this.getVerb_internal(verb.id);
151
+ if (verbExists) {
152
+ console.log(`[CLEANUP] Attempting to remove orphaned verb file ${verb.id}`);
153
+ await this.deleteVerb_internal(verb.id);
154
+ }
155
+ }
156
+ catch (cleanupError) {
157
+ console.error(`[ERROR] Failed to cleanup orphaned verb ${verb.id}:`, cleanupError);
158
+ }
159
+ throw new Error(`Failed to save verb ${verb.id}: ${error instanceof Error ? error.message : String(error)}`);
160
+ }
139
161
  }
140
162
  /**
141
163
  * Get a verb from storage
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "2.14.3",
3
+ "version": "2.15.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",