@soulcraft/brainy 3.37.8 β†’ 3.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,6 +62,10 @@ export interface ImportProgress {
62
62
  total?: number;
63
63
  entities?: number;
64
64
  relationships?: number;
65
+ /** Rows per second (v3.38.0) */
66
+ throughput?: number;
67
+ /** Estimated time remaining in ms (v3.38.0) */
68
+ eta?: number;
65
69
  }
66
70
  export interface ImportResult {
67
71
  /** Import ID for history tracking */
@@ -237,13 +237,20 @@ export class ImportCoordinator {
237
237
  enableConceptExtraction: options.enableConceptExtraction !== false,
238
238
  confidenceThreshold: options.confidenceThreshold || 0.6,
239
239
  onProgress: (stats) => {
240
+ // Enhanced progress reporting (v3.38.0) with throughput and ETA
241
+ const message = stats.throughput
242
+ ? `Extracting entities from ${format} (${stats.throughput} rows/sec, ETA: ${Math.round(stats.eta / 1000)}s)...`
243
+ : `Extracting entities from ${format}...`;
240
244
  options.onProgress?.({
241
245
  stage: 'extracting',
242
- message: `Extracting entities from ${format}...`,
246
+ message,
243
247
  processed: stats.processed,
244
248
  total: stats.total,
245
249
  entities: stats.entities,
246
- relationships: stats.relationships
250
+ relationships: stats.relationships,
251
+ // Pass through enhanced metrics if available
252
+ throughput: stats.throughput,
253
+ eta: stats.eta
247
254
  });
248
255
  }
249
256
  };
@@ -25,12 +25,18 @@ export interface SmartExcelOptions extends FormatHandlerOptions {
25
25
  definitionColumn?: string;
26
26
  typeColumn?: string;
27
27
  relatedColumn?: string;
28
- /** Progress callback */
28
+ /** Progress callback (v3.38.0: Enhanced with performance metrics) */
29
29
  onProgress?: (stats: {
30
30
  processed: number;
31
31
  total: number;
32
32
  entities: number;
33
33
  relationships: number;
34
+ /** Rows per second (v3.38.0) */
35
+ throughput?: number;
36
+ /** Estimated time remaining in ms (v3.38.0) */
37
+ eta?: number;
38
+ /** Current phase (v3.38.0) */
39
+ phase?: string;
34
40
  }) => void;
35
41
  }
36
42
  export interface ExtractedRow {
@@ -66,114 +66,141 @@ export class SmartExcelImporter {
66
66
  }
67
67
  // Detect column names
68
68
  const columns = this.detectColumns(rows[0], opts);
69
- // Process each row
69
+ // Process each row with BATCHED PARALLEL PROCESSING (v3.38.0)
70
70
  const extractedRows = [];
71
71
  const entityMap = new Map();
72
72
  const stats = {
73
73
  byType: {},
74
74
  byConfidence: { high: 0, medium: 0, low: 0 }
75
75
  };
76
- for (let i = 0; i < rows.length; i++) {
77
- const row = rows[i];
78
- // Extract data from row
79
- const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
80
- const definition = this.getColumnValue(row, columns.definition) || '';
81
- const type = this.getColumnValue(row, columns.type);
82
- const relatedTerms = this.getColumnValue(row, columns.related);
83
- // Extract entities from definition
84
- let relatedEntities = [];
85
- if (opts.enableNeuralExtraction && definition) {
86
- relatedEntities = await this.extractor.extract(definition, {
87
- confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
88
- neuralMatching: true,
89
- cache: { enabled: true }
90
- });
91
- // Filter out the main term from related entities
92
- relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
93
- }
94
- // Determine main entity type
95
- const mainEntityType = type ?
96
- this.mapTypeString(type) :
97
- (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
98
- // Generate entity ID
99
- const entityId = this.generateEntityId(term);
100
- entityMap.set(term.toLowerCase(), entityId);
101
- // Extract concepts
102
- let concepts = [];
103
- if (opts.enableConceptExtraction && definition) {
104
- try {
105
- concepts = await this.brain.extractConcepts(definition, { limit: 10 });
106
- }
107
- catch (error) {
108
- // Concept extraction is optional
109
- concepts = [];
110
- }
111
- }
112
- // Create main entity
113
- const mainEntity = {
114
- id: entityId,
115
- name: term,
116
- type: mainEntityType,
117
- description: definition,
118
- confidence: 0.95, // Main entity from row has high confidence
119
- metadata: {
120
- source: 'excel',
121
- row: i + 1,
122
- originalData: row,
123
- concepts,
124
- extractedAt: Date.now()
125
- }
126
- };
127
- // Track statistics
128
- this.updateStats(stats, mainEntityType, mainEntity.confidence);
129
- // Infer relationships
130
- const relationships = [];
131
- if (opts.enableRelationshipInference) {
132
- // Extract relationships from definition text
133
- for (const relEntity of relatedEntities) {
134
- const verbType = await this.inferRelationship(term, relEntity.text, definition);
135
- relationships.push({
136
- from: entityId,
137
- to: relEntity.text, // Use entity name directly, will be resolved later
138
- type: verbType,
139
- confidence: relEntity.confidence,
140
- evidence: `Extracted from: "${definition.substring(0, 100)}..."`
141
- });
142
- }
143
- // Parse explicit "Related Terms" column
144
- if (relatedTerms) {
145
- const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
146
- for (const relTerm of terms) {
147
- // Ensure we don't create self-relationships
148
- if (relTerm.toLowerCase() !== term.toLowerCase()) {
149
- relationships.push({
150
- from: entityId,
151
- to: relTerm, // Use term name directly
152
- type: VerbType.RelatedTo,
153
- confidence: 0.9, // Explicit relationships have high confidence
154
- evidence: `Explicitly listed in "Related" column`
155
- });
76
+ // Batch processing configuration
77
+ const CHUNK_SIZE = 10; // Process 10 rows at a time for optimal performance
78
+ let totalProcessed = 0;
79
+ const performanceStartTime = Date.now();
80
+ // Process rows in chunks
81
+ for (let chunkStart = 0; chunkStart < rows.length; chunkStart += CHUNK_SIZE) {
82
+ const chunk = rows.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, rows.length));
83
+ // Process chunk in parallel for massive speedup
84
+ const chunkResults = await Promise.all(chunk.map(async (row, chunkIndex) => {
85
+ const i = chunkStart + chunkIndex;
86
+ // Extract data from row
87
+ const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
88
+ const definition = this.getColumnValue(row, columns.definition) || '';
89
+ const type = this.getColumnValue(row, columns.type);
90
+ const relatedTerms = this.getColumnValue(row, columns.related);
91
+ // Parallel extraction: entities AND concepts at the same time
92
+ const [relatedEntities, concepts] = await Promise.all([
93
+ // Extract entities from definition
94
+ opts.enableNeuralExtraction && definition
95
+ ? this.extractor.extract(definition, {
96
+ confidence: opts.confidenceThreshold * 0.8,
97
+ neuralMatching: true,
98
+ cache: { enabled: true }
99
+ }).then(entities =>
100
+ // Filter out the main term from related entities
101
+ entities.filter(e => e.text.toLowerCase() !== term.toLowerCase()))
102
+ : Promise.resolve([]),
103
+ // Extract concepts (in parallel with entity extraction)
104
+ opts.enableConceptExtraction && definition
105
+ ? this.brain.extractConcepts(definition, { limit: 10 }).catch(() => [])
106
+ : Promise.resolve([])
107
+ ]);
108
+ // Determine main entity type
109
+ const mainEntityType = type ?
110
+ this.mapTypeString(type) :
111
+ (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
112
+ // Generate entity ID
113
+ const entityId = this.generateEntityId(term);
114
+ // Create main entity
115
+ const mainEntity = {
116
+ id: entityId,
117
+ name: term,
118
+ type: mainEntityType,
119
+ description: definition,
120
+ confidence: 0.95,
121
+ metadata: {
122
+ source: 'excel',
123
+ row: i + 1,
124
+ originalData: row,
125
+ concepts,
126
+ extractedAt: Date.now()
127
+ }
128
+ };
129
+ // Infer relationships
130
+ const relationships = [];
131
+ if (opts.enableRelationshipInference) {
132
+ // Extract relationships from definition text
133
+ for (const relEntity of relatedEntities) {
134
+ const verbType = await this.inferRelationship(term, relEntity.text, definition);
135
+ relationships.push({
136
+ from: entityId,
137
+ to: relEntity.text,
138
+ type: verbType,
139
+ confidence: relEntity.confidence,
140
+ evidence: `Extracted from: "${definition.substring(0, 100)}..."`
141
+ });
142
+ }
143
+ // Parse explicit "Related Terms" column
144
+ if (relatedTerms) {
145
+ const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
146
+ for (const relTerm of terms) {
147
+ if (relTerm.toLowerCase() !== term.toLowerCase()) {
148
+ relationships.push({
149
+ from: entityId,
150
+ to: relTerm,
151
+ type: VerbType.RelatedTo,
152
+ confidence: 0.9,
153
+ evidence: `Explicitly listed in "Related" column`
154
+ });
155
+ }
156
156
  }
157
157
  }
158
158
  }
159
+ return {
160
+ term,
161
+ entityId,
162
+ mainEntity,
163
+ mainEntityType,
164
+ relatedEntities,
165
+ relationships,
166
+ concepts
167
+ };
168
+ }));
169
+ // Process chunk results sequentially to maintain order
170
+ for (const result of chunkResults) {
171
+ // Store entity ID mapping
172
+ entityMap.set(result.term.toLowerCase(), result.entityId);
173
+ // Track statistics
174
+ this.updateStats(stats, result.mainEntityType, result.mainEntity.confidence);
175
+ // Add extracted row
176
+ extractedRows.push({
177
+ entity: result.mainEntity,
178
+ relatedEntities: result.relatedEntities.map(e => ({
179
+ name: e.text,
180
+ type: e.type,
181
+ confidence: e.confidence
182
+ })),
183
+ relationships: result.relationships,
184
+ concepts: result.concepts
185
+ });
159
186
  }
160
- // Add extracted row
161
- extractedRows.push({
162
- entity: mainEntity,
163
- relatedEntities: relatedEntities.map(e => ({
164
- name: e.text,
165
- type: e.type,
166
- confidence: e.confidence
167
- })),
168
- relationships,
169
- concepts
170
- });
171
- // Report progress
187
+ // Update progress tracking
188
+ totalProcessed += chunk.length;
189
+ // Calculate performance metrics
190
+ const elapsed = Date.now() - performanceStartTime;
191
+ const rowsPerSecond = totalProcessed / (elapsed / 1000);
192
+ const remainingRows = rows.length - totalProcessed;
193
+ const estimatedTimeRemaining = remainingRows / rowsPerSecond;
194
+ // Report progress with enhanced metrics
172
195
  opts.onProgress({
173
- processed: i + 1,
196
+ processed: totalProcessed,
174
197
  total: rows.length,
175
- entities: extractedRows.length + relatedEntities.length,
176
- relationships: relationships.length
198
+ entities: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
199
+ relationships: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
200
+ // Additional performance metrics (v3.38.0)
201
+ throughput: Math.round(rowsPerSecond * 10) / 10,
202
+ eta: Math.round(estimatedTimeRemaining),
203
+ phase: 'extracting'
177
204
  });
178
205
  }
179
206
  return {
@@ -24,6 +24,8 @@ export declare class NeuralEntityExtractor {
24
24
  private typeEmbeddings;
25
25
  private initialized;
26
26
  private cache;
27
+ private embeddingCache;
28
+ private embeddingCacheStats;
27
29
  constructor(brain: Brainy | Brainy<any>, cacheOptions?: EntityCacheOptions);
28
30
  /**
29
31
  * Initialize type embeddings for neural matching
@@ -61,7 +63,10 @@ export declare class NeuralEntityExtractor {
61
63
  */
62
64
  private classifyByRules;
63
65
  /**
64
- * Get embedding for text
66
+ * Get embedding for text with caching (v3.38.0)
67
+ *
68
+ * PERFORMANCE OPTIMIZATION: Caches embeddings during extraction session
69
+ * to avoid redundant model calls for repeated text (common in large imports)
65
70
  */
66
71
  private getEmbedding;
67
72
  /**
@@ -96,4 +101,27 @@ export declare class NeuralEntityExtractor {
96
101
  * Cleanup expired cache entries
97
102
  */
98
103
  cleanupCache(): number;
104
+ /**
105
+ * Clear embedding cache (v3.38.0)
106
+ *
107
+ * Clears the runtime embedding cache. Useful for:
108
+ * - Freeing memory after large imports
109
+ * - Testing with fresh cache state
110
+ */
111
+ clearEmbeddingCache(): void;
112
+ /**
113
+ * Get embedding cache statistics (v3.38.0)
114
+ *
115
+ * Returns performance metrics for the embedding cache:
116
+ * - hits: Number of cache hits (avoided model calls)
117
+ * - misses: Number of cache misses (required model calls)
118
+ * - size: Current cache size
119
+ * - hitRate: Percentage of requests served from cache
120
+ */
121
+ getEmbeddingCacheStats(): {
122
+ hitRate: number;
123
+ hits: number;
124
+ misses: number;
125
+ size: number;
126
+ };
99
127
  }
@@ -12,6 +12,14 @@ export class NeuralEntityExtractor {
12
12
  // Type embeddings for similarity matching
13
13
  this.typeEmbeddings = new Map();
14
14
  this.initialized = false;
15
+ // Runtime embedding cache for performance (v3.38.0)
16
+ // Caches candidate embeddings during an extraction session to avoid redundant model calls
17
+ this.embeddingCache = new Map();
18
+ this.embeddingCacheStats = {
19
+ hits: 0,
20
+ misses: 0,
21
+ size: 0
22
+ };
15
23
  this.brain = brain;
16
24
  this.cache = new EntityExtractionCache(cacheOptions);
17
25
  }
@@ -253,20 +261,46 @@ export class NeuralEntityExtractor {
253
261
  return { type: NounType.Thing, confidence: 0.3 };
254
262
  }
255
263
  /**
256
- * Get embedding for text
264
+ * Get embedding for text with caching (v3.38.0)
265
+ *
266
+ * PERFORMANCE OPTIMIZATION: Caches embeddings during extraction session
267
+ * to avoid redundant model calls for repeated text (common in large imports)
257
268
  */
258
269
  async getEmbedding(text) {
270
+ // Normalize text for cache key
271
+ const normalizedText = text.trim().toLowerCase();
272
+ // Check cache first
273
+ const cached = this.embeddingCache.get(normalizedText);
274
+ if (cached) {
275
+ this.embeddingCacheStats.hits++;
276
+ return cached;
277
+ }
278
+ // Cache miss - generate embedding
279
+ this.embeddingCacheStats.misses++;
280
+ let vector;
259
281
  if ('embed' in this.brain && typeof this.brain.embed === 'function') {
260
- return await this.brain.embed(text);
282
+ vector = await this.brain.embed(text);
261
283
  }
262
284
  else {
263
285
  // Fallback - create simple hash-based vector
264
- const vector = new Array(384).fill(0);
286
+ vector = new Array(384).fill(0);
265
287
  for (let i = 0; i < text.length; i++) {
266
288
  vector[i % 384] += text.charCodeAt(i) / 255;
267
289
  }
268
- return vector.map(v => v / text.length);
290
+ vector = vector.map(v => v / text.length);
291
+ }
292
+ // Store in cache
293
+ this.embeddingCache.set(normalizedText, vector);
294
+ this.embeddingCacheStats.size = this.embeddingCache.size;
295
+ // Memory management: Clear cache if it grows too large (>10000 entries)
296
+ if (this.embeddingCache.size > 10000) {
297
+ // Keep most recent 5000 entries (simple LRU approximation)
298
+ const entries = Array.from(this.embeddingCache.entries());
299
+ this.embeddingCache.clear();
300
+ entries.slice(-5000).forEach(([k, v]) => this.embeddingCache.set(k, v));
301
+ this.embeddingCacheStats.size = this.embeddingCache.size;
269
302
  }
303
+ return vector;
270
304
  }
271
305
  /**
272
306
  * Calculate cosine similarity between vectors
@@ -355,5 +389,36 @@ export class NeuralEntityExtractor {
355
389
  cleanupCache() {
356
390
  return this.cache.cleanup();
357
391
  }
392
+ /**
393
+ * Clear embedding cache (v3.38.0)
394
+ *
395
+ * Clears the runtime embedding cache. Useful for:
396
+ * - Freeing memory after large imports
397
+ * - Testing with fresh cache state
398
+ */
399
+ clearEmbeddingCache() {
400
+ this.embeddingCache.clear();
401
+ this.embeddingCacheStats = {
402
+ hits: 0,
403
+ misses: 0,
404
+ size: 0
405
+ };
406
+ }
407
+ /**
408
+ * Get embedding cache statistics (v3.38.0)
409
+ *
410
+ * Returns performance metrics for the embedding cache:
411
+ * - hits: Number of cache hits (avoided model calls)
412
+ * - misses: Number of cache misses (required model calls)
413
+ * - size: Current cache size
414
+ * - hitRate: Percentage of requests served from cache
415
+ */
416
+ getEmbeddingCacheStats() {
417
+ const total = this.embeddingCacheStats.hits + this.embeddingCacheStats.misses;
418
+ return {
419
+ ...this.embeddingCacheStats,
420
+ hitRate: total > 0 ? this.embeddingCacheStats.hits / total : 0
421
+ };
422
+ }
358
423
  }
359
424
  //# sourceMappingURL=entityExtractor.js.map
@@ -347,9 +347,7 @@ export class GcsStorage extends BaseStorage {
347
347
  if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
348
348
  this.nounCacheManager.set(node.id, node);
349
349
  }
350
- else {
351
- prodLog.warn(`[saveNode] Not caching node ${node.id.substring(0, 8)}... with empty vector (HNSW lazy mode)`);
352
- }
350
+ // Note: Empty vectors are intentional during HNSW lazy mode - not logged
353
351
  // Increment noun count
354
352
  const metadata = await this.getNounMetadata(node.id);
355
353
  if (metadata && metadata.type) {
@@ -392,53 +390,28 @@ export class GcsStorage extends BaseStorage {
392
390
  */
393
391
  async getNode(id) {
394
392
  await this.ensureInitialized();
395
- // Check cache first WITH LOGGING
393
+ // Check cache first
396
394
  const cached = await this.nounCacheManager.get(id);
397
- // DIAGNOSTIC LOGGING: Reveal cache poisoning
398
- prodLog.info(`[getNode] πŸ” Cache check for ${id.substring(0, 8)}...:`, {
399
- hasCached: cached !== undefined,
400
- isNull: cached === null,
401
- isObject: cached !== null && typeof cached === 'object',
402
- type: typeof cached
403
- });
404
- // CRITICAL FIX (v3.37.8): Validate cached object before returning
395
+ // Validate cached object before returning (v3.37.8+)
405
396
  if (cached !== undefined && cached !== null) {
406
- // Log cached object structure to diagnose incomplete objects
407
- prodLog.info(`[getNode] Cached object structure:`, {
408
- hasId: !!cached.id,
409
- idMatches: cached.id === id,
410
- hasVector: !!cached.vector,
411
- vectorLength: cached.vector?.length,
412
- hasConnections: !!cached.connections,
413
- connectionsType: typeof cached.connections,
414
- hasLevel: cached.level !== undefined,
415
- level: cached.level,
416
- objectKeys: Object.keys(cached || {})
417
- });
418
397
  // Validate cached object has required fields (including non-empty vector!)
419
398
  if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
420
- prodLog.error(`[getNode] ❌ INVALID cached object for ${id.substring(0, 8)}...:`, {
421
- reason: !cached.id ? 'missing id' :
422
- !cached.vector ? 'missing vector' :
423
- !Array.isArray(cached.vector) ? 'vector not array' :
424
- cached.vector.length === 0 ? 'vector is empty array' :
425
- 'unknown'
426
- });
427
- prodLog.error(`[getNode] Removing invalid object from cache and loading from GCS`);
399
+ // Invalid cache detected - log and auto-recover
400
+ prodLog.warn(`[GCS] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
401
+ !cached.vector ? 'missing vector' :
402
+ !Array.isArray(cached.vector) ? 'vector not array' :
403
+ 'empty vector'}) - removing from cache and reloading`);
428
404
  this.nounCacheManager.delete(id);
429
405
  // Fall through to load from GCS
430
406
  }
431
407
  else {
432
- prodLog.info(`[getNode] βœ… Valid cached object - returning`);
408
+ // Valid cache hit
433
409
  this.logger.trace(`Cache hit for noun ${id}`);
434
410
  return cached;
435
411
  }
436
412
  }
437
413
  else if (cached === null) {
438
- prodLog.warn(`[getNode] ⚠️ Cache contains NULL for ${id.substring(0, 8)}... - ignoring and loading from GCS`);
439
- }
440
- else {
441
- prodLog.info(`[getNode] ❌ Cache MISS - loading from GCS for ${id.substring(0, 8)}...`);
414
+ prodLog.warn(`[GCS] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
442
415
  }
443
416
  // Apply backpressure
444
417
  const requestId = await this.applyBackpressure();
@@ -446,20 +419,11 @@ export class GcsStorage extends BaseStorage {
446
419
  this.logger.trace(`Getting node ${id}`);
447
420
  // Get the GCS key with UUID-based sharding
448
421
  const key = this.getNounKey(id);
449
- // DIAGNOSTIC LOGGING: Show exact path being accessed
450
- prodLog.info(`[getNode] πŸ” Attempting to load:`);
451
- prodLog.info(`[getNode] UUID: ${id}`);
452
- prodLog.info(`[getNode] Path: ${key}`);
453
- prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
454
422
  // Download from GCS
455
423
  const file = this.bucket.file(key);
456
- prodLog.info(`[getNode] πŸ“₯ Downloading file...`);
457
424
  const [contents] = await file.download();
458
- prodLog.info(`[getNode] βœ… Download successful: ${contents.length} bytes`);
459
425
  // Parse JSON
460
- prodLog.info(`[getNode] πŸ”§ Parsing JSON...`);
461
426
  const data = JSON.parse(contents.toString());
462
- prodLog.info(`[getNode] βœ… JSON parsed successfully, id: ${data.id}`);
463
427
  // Convert serialized connections back to Map<number, Set<string>>
464
428
  const connections = new Map();
465
429
  for (const [level, nounIds] of Object.entries(data.connections || {})) {
@@ -477,10 +441,9 @@ export class GcsStorage extends BaseStorage {
477
441
  // CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
478
442
  if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
479
443
  this.nounCacheManager.set(id, node);
480
- prodLog.info(`[getNode] πŸ’Ύ Cached node ${id.substring(0, 8)}... successfully`);
481
444
  }
482
445
  else {
483
- prodLog.warn(`[getNode] ⚠️ NOT caching invalid node for ${id.substring(0, 8)}... (missing id/vector or empty vector)`);
446
+ prodLog.warn(`[GCS] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
484
447
  }
485
448
  this.logger.trace(`Successfully retrieved node ${id}`);
486
449
  this.releaseBackpressure(true, requestId);
@@ -868,13 +831,6 @@ export class GcsStorage extends BaseStorage {
868
831
  await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
869
832
  const limit = options.limit || 100;
870
833
  const useCache = options.useCache !== false;
871
- // DIAGNOSTIC LOGGING: Track pagination performance
872
- prodLog.info(`[getNodesWithPagination] Starting pagination: limit=${limit}, cursor=${options.cursor || 'none'}`);
873
- const startTime = Date.now();
874
- let shardsChecked = 0;
875
- let filesFound = 0;
876
- let nodesLoaded = 0;
877
- let nodesFailed = 0;
878
834
  try {
879
835
  const nodes = [];
880
836
  // Parse cursor (format: "shardIndex:gcsPageToken")
@@ -889,7 +845,6 @@ export class GcsStorage extends BaseStorage {
889
845
  for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
890
846
  const shardId = getShardIdByIndex(shardIndex);
891
847
  const shardPrefix = `${this.nounPrefix}${shardId}/`;
892
- shardsChecked++;
893
848
  // List objects in this shard
894
849
  // Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
895
850
  const requestedPageSize = limit - nodes.length;
@@ -899,12 +854,6 @@ export class GcsStorage extends BaseStorage {
899
854
  maxResults: cappedPageSize,
900
855
  pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
901
856
  });
902
- // DIAGNOSTIC LOGGING: Show files found per shard (only log non-empty shards)
903
- if (files && files.length > 0) {
904
- filesFound += files.length;
905
- prodLog.info(`[Shard ${shardId}] Found ${files.length} files in "${shardPrefix}"`);
906
- prodLog.info(`[Shard ${shardId}] Sample file names: ${files.slice(0, 3).map((f) => f.name).join(', ')}`);
907
- }
908
857
  // Extract node IDs from file names
909
858
  if (files && files.length > 0) {
910
859
  const nodeIds = files
@@ -921,21 +870,11 @@ export class GcsStorage extends BaseStorage {
921
870
  return name;
922
871
  })
923
872
  .filter((id) => id && id.length > 0);
924
- // DIAGNOSTIC LOGGING: Show extracted UUIDs
925
- prodLog.info(`[Shard ${shardId}] Extracted ${nodeIds.length} UUIDs: ${nodeIds.slice(0, 3).join(', ')}...`);
926
873
  // Load nodes
927
874
  for (const id of nodeIds) {
928
- // DIAGNOSTIC LOGGING: Show each getNode() attempt
929
- prodLog.info(`[Shard ${shardId}] Calling getNode("${id}")...`);
930
875
  const node = await this.getNode(id);
931
876
  if (node) {
932
877
  nodes.push(node);
933
- nodesLoaded++;
934
- prodLog.info(`[Shard ${shardId}] βœ… Successfully loaded node ${id}`);
935
- }
936
- else {
937
- nodesFailed++;
938
- prodLog.warn(`[Shard ${shardId}] ❌ getNode("${id}") returned null!`);
939
878
  }
940
879
  if (nodes.length >= limit) {
941
880
  break;
@@ -968,14 +907,6 @@ export class GcsStorage extends BaseStorage {
968
907
  // Continue to next shard
969
908
  }
970
909
  // No more shards or nodes
971
- // DIAGNOSTIC LOGGING: Final summary
972
- const elapsedTime = Date.now() - startTime;
973
- prodLog.info(`[getNodesWithPagination] COMPLETED in ${elapsedTime}ms:`);
974
- prodLog.info(` - Shards checked: ${shardsChecked}/${TOTAL_SHARDS}`);
975
- prodLog.info(` - Files found: ${filesFound}`);
976
- prodLog.info(` - Nodes loaded: ${nodesLoaded}`);
977
- prodLog.info(` - Nodes failed: ${nodesFailed}`);
978
- prodLog.info(` - Success rate: ${filesFound > 0 ? ((nodesLoaded / filesFound) * 100).toFixed(1) : 'N/A'}%`);
979
910
  return {
980
911
  nodes,
981
912
  totalCount: this.totalNounCount,
@@ -818,80 +818,47 @@ export class S3CompatibleStorage extends BaseStorage {
818
818
  */
819
819
  async getNode(id) {
820
820
  await this.ensureInitialized();
821
- // Check cache first WITH LOGGING
821
+ // Check cache first
822
822
  const cached = this.nodeCache.get(id);
823
- // DIAGNOSTIC LOGGING: Reveal cache poisoning
824
- prodLog.info(`[getNode] πŸ” Cache check for ${id.substring(0, 8)}...:`, {
825
- hasCached: cached !== undefined,
826
- isNull: cached === null,
827
- isObject: cached !== null && typeof cached === 'object',
828
- type: typeof cached
829
- });
830
- // CRITICAL FIX (v3.37.8): Validate cached object before returning
823
+ // Validate cached object before returning (v3.37.8+)
831
824
  if (cached !== undefined && cached !== null) {
832
- // Log cached object structure to diagnose incomplete objects
833
- prodLog.info(`[getNode] Cached object structure:`, {
834
- hasId: !!cached.id,
835
- idMatches: cached.id === id,
836
- hasVector: !!cached.vector,
837
- vectorLength: cached.vector?.length,
838
- hasConnections: !!cached.connections,
839
- connectionsType: typeof cached.connections,
840
- objectKeys: Object.keys(cached || {})
841
- });
842
825
  // Validate cached object has required fields (including non-empty vector!)
843
826
  if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
844
- prodLog.error(`[getNode] ❌ INVALID cached object for ${id.substring(0, 8)}...:`, {
845
- reason: !cached.id ? 'missing id' :
846
- !cached.vector ? 'missing vector' :
847
- !Array.isArray(cached.vector) ? 'vector not array' :
848
- cached.vector.length === 0 ? 'vector is empty array' :
849
- 'unknown'
850
- });
851
- prodLog.error(`[getNode] Removing invalid object from cache and loading from S3`);
827
+ // Invalid cache detected - log and auto-recover
828
+ prodLog.warn(`[S3] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
829
+ !cached.vector ? 'missing vector' :
830
+ !Array.isArray(cached.vector) ? 'vector not array' :
831
+ 'empty vector'}) - removing from cache and reloading`);
852
832
  this.nodeCache.delete(id);
853
833
  // Fall through to load from S3
854
834
  }
855
835
  else {
856
- prodLog.info(`[getNode] βœ… Valid cached object - returning`);
836
+ // Valid cache hit
857
837
  this.logger.trace(`Cache hit for node ${id}`);
858
838
  return cached;
859
839
  }
860
840
  }
861
841
  else if (cached === null) {
862
- prodLog.warn(`[getNode] ⚠️ Cache contains NULL for ${id.substring(0, 8)}... - ignoring and loading from S3`);
863
- }
864
- else {
865
- prodLog.info(`[getNode] ❌ Cache MISS - loading from S3 for ${id.substring(0, 8)}...`);
842
+ prodLog.warn(`[S3] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
866
843
  }
867
844
  try {
868
845
  // Import the GetObjectCommand only when needed
869
846
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
870
847
  // Use getNounKey() to properly handle sharding
871
848
  const key = this.getNounKey(id);
872
- // DIAGNOSTIC LOGGING: Show exact path being accessed
873
- prodLog.info(`[getNode] πŸ” Attempting to load:`);
874
- prodLog.info(`[getNode] UUID: ${id}`);
875
- prodLog.info(`[getNode] Path: ${key}`);
876
- prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
877
849
  // Try to get the node from the nouns directory
878
- prodLog.info(`[getNode] πŸ“₯ Downloading file...`);
879
850
  const response = await this.s3Client.send(new GetObjectCommand({
880
851
  Bucket: this.bucketName,
881
852
  Key: key
882
853
  }));
883
854
  // Check if response is null or undefined
884
855
  if (!response || !response.Body) {
885
- prodLog.warn(`[getNode] ❌ Response or Body is null/undefined`);
856
+ prodLog.warn(`[S3] Response or Body is null/undefined for ${id.substring(0, 8)}`);
886
857
  return null;
887
858
  }
888
- // Convert the response body to a string
859
+ // Convert the response body to a string and parse JSON
889
860
  const bodyContents = await response.Body.transformToString();
890
- prodLog.info(`[getNode] βœ… Download successful: ${bodyContents.length} bytes`);
891
- // Parse the JSON string
892
- prodLog.info(`[getNode] πŸ”§ Parsing JSON...`);
893
861
  const parsedNode = JSON.parse(bodyContents);
894
- prodLog.info(`[getNode] βœ… JSON parsed successfully, id: ${parsedNode.id}`);
895
862
  // Ensure the parsed node has the expected properties
896
863
  if (!parsedNode ||
897
864
  !parsedNode.id ||
@@ -917,41 +884,25 @@ export class S3CompatibleStorage extends BaseStorage {
917
884
  // CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
918
885
  if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
919
886
  this.nodeCache.set(id, node);
920
- prodLog.info(`[getNode] πŸ’Ύ Cached node ${id.substring(0, 8)}... successfully`);
921
887
  }
922
888
  else {
923
- prodLog.warn(`[getNode] ⚠️ NOT caching invalid node for ${id.substring(0, 8)}... (missing id/vector or empty vector)`);
889
+ prodLog.warn(`[S3] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
924
890
  }
925
891
  this.logger.trace(`Successfully retrieved node ${id}`);
926
892
  return node;
927
893
  }
928
894
  catch (error) {
929
- // DIAGNOSTIC LOGGING: Log EVERY error before any conditional checks
930
- const key = this.getNounKey(id);
931
- prodLog.error(`[getNode] ❌ EXCEPTION CAUGHT:`);
932
- prodLog.error(`[getNode] UUID: ${id}`);
933
- prodLog.error(`[getNode] Path: ${key}`);
934
- prodLog.error(`[getNode] Bucket: ${this.bucketName}`);
935
- prodLog.error(`[getNode] Error type: ${error?.constructor?.name || typeof error}`);
936
- prodLog.error(`[getNode] Error name: ${error?.name}`);
937
- prodLog.error(`[getNode] Error code: ${JSON.stringify(error?.Code || error?.code)}`);
938
- prodLog.error(`[getNode] Error message: ${error?.message || String(error)}`);
939
- prodLog.error(`[getNode] HTTP status: ${error?.$metadata?.httpStatusCode}`);
940
- prodLog.error(`[getNode] Error object:`, JSON.stringify(error, null, 2));
941
895
  // Check if this is a "not found" error (S3 uses "NoSuchKey")
942
896
  if (error?.name === 'NoSuchKey' || error?.Code === 'NoSuchKey' || error?.$metadata?.httpStatusCode === 404) {
943
- prodLog.warn(`[getNode] Identified as 404/NoSuchKey error - returning null WITHOUT caching`);
944
- // CRITICAL FIX: Do NOT cache null values
897
+ // File not found - not cached, just return null
945
898
  return null;
946
899
  }
947
900
  // Handle throttling
948
901
  if (this.isThrottlingError(error)) {
949
- prodLog.warn(`[getNode] Identified as throttling error - rethrowing`);
950
902
  await this.handleThrottling(error);
951
903
  throw error;
952
904
  }
953
905
  // All other errors should throw, not return null
954
- prodLog.error(`[getNode] Unhandled error - rethrowing`);
955
906
  this.logger.error(`Failed to get node ${id}:`, error);
956
907
  throw BrainyError.fromError(error, `getNoun(${id})`);
957
908
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.37.8",
3
+ "version": "3.39.0",
4
4
  "description": "Universal Knowledge Protocolβ„’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ— 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",