@soulcraft/brainy 3.32.2 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +175 -0
  2. package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
  3. package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
  4. package/dist/brainy.d.ts +31 -0
  5. package/dist/brainy.js +119 -34
  6. package/dist/hnsw/hnswIndex.d.ts +24 -0
  7. package/dist/hnsw/hnswIndex.js +137 -0
  8. package/dist/hnsw/hnswIndexOptimized.d.ts +2 -13
  9. package/dist/hnsw/hnswIndexOptimized.js +8 -37
  10. package/dist/importers/SmartExcelImporter.js +12 -0
  11. package/dist/interfaces/IIndex.d.ts +186 -0
  12. package/dist/interfaces/IIndex.js +15 -0
  13. package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
  14. package/dist/neural/embeddedTypeEmbeddings.js +96 -0
  15. package/dist/neural/entityExtractor.d.ts +2 -0
  16. package/dist/neural/entityExtractor.js +21 -42
  17. package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
  18. package/dist/neural/naturalLanguageProcessor.js +17 -31
  19. package/dist/storage/adapters/baseStorageAdapter.d.ts +54 -0
  20. package/dist/storage/adapters/baseStorageAdapter.js +105 -10
  21. package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
  22. package/dist/storage/adapters/fileSystemStorage.js +66 -0
  23. package/dist/storage/adapters/gcsStorage.d.ts +45 -0
  24. package/dist/storage/adapters/gcsStorage.js +122 -4
  25. package/dist/storage/adapters/memoryStorage.d.ts +32 -0
  26. package/dist/storage/adapters/memoryStorage.js +43 -0
  27. package/dist/storage/adapters/opfsStorage.d.ts +36 -0
  28. package/dist/storage/adapters/opfsStorage.js +101 -0
  29. package/dist/storage/adapters/s3CompatibleStorage.d.ts +45 -0
  30. package/dist/storage/adapters/s3CompatibleStorage.js +123 -0
  31. package/package.json +5 -2
@@ -10,6 +10,7 @@
10
10
  */
11
11
  import { PatternLibrary } from './patternLibrary.js';
12
12
  import { NounType, VerbType } from '../types/graphTypes.js';
13
+ import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from './embeddedTypeEmbeddings.js';
13
14
  export class NaturalLanguageProcessor {
14
15
  constructor(brain) {
15
16
  this.initialized = false;
@@ -54,41 +55,26 @@ export class NaturalLanguageProcessor {
54
55
  }
55
56
  /**
56
57
  * Initialize embeddings for all NounTypes and VerbTypes
57
- * These are fixed types that never change - perfect for caching
58
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
59
+ * Zero runtime cost - embeddings are loaded instantly from embedded data
58
60
  */
59
61
  async initializeTypeEmbeddings() {
60
62
  if (this.typeEmbeddingsInitialized)
61
63
  return;
62
- // Embed all NounTypes (30+ types)
63
- for (const [key, value] of Object.entries(NounType)) {
64
- if (typeof value === 'string') {
65
- // Embed both the key (Person) and value (person)
66
- const keyEmbedding = await this.getEmbedding(key);
67
- const valueEmbedding = await this.getEmbedding(value);
68
- this.nounTypeEmbeddings.set(key, keyEmbedding);
69
- this.nounTypeEmbeddings.set(value, valueEmbedding);
70
- // Also embed common variations
71
- const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
72
- if (spaceSeparated !== value) {
73
- const variantEmbedding = await this.getEmbedding(spaceSeparated);
74
- this.nounTypeEmbeddings.set(spaceSeparated, variantEmbedding);
75
- }
76
- }
77
- }
78
- // Embed all VerbTypes (40+ types)
79
- for (const [key, value] of Object.entries(VerbType)) {
80
- if (typeof value === 'string') {
81
- const keyEmbedding = await this.getEmbedding(key);
82
- const valueEmbedding = await this.getEmbedding(value);
83
- this.verbTypeEmbeddings.set(key, keyEmbedding);
84
- this.verbTypeEmbeddings.set(value, valueEmbedding);
85
- // Common variations for verbs
86
- const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
87
- if (spaceSeparated !== value) {
88
- const variantEmbedding = await this.getEmbedding(spaceSeparated);
89
- this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
90
- }
91
- }
64
+ // Load pre-computed embeddings (instant, no computation)
65
+ const nounEmbeddings = getNounTypeEmbeddings();
66
+ const verbEmbeddings = getVerbTypeEmbeddings();
67
+ // Store noun type embeddings with all variations for lookup
68
+ for (const [type, embedding] of nounEmbeddings.entries()) {
69
+ this.nounTypeEmbeddings.set(type, embedding);
70
+ // Also store lowercase version for case-insensitive matching
71
+ this.nounTypeEmbeddings.set(type.toLowerCase(), embedding);
72
+ }
73
+ // Store verb type embeddings with all variations for lookup
74
+ for (const [type, embedding] of verbEmbeddings.entries()) {
75
+ this.verbTypeEmbeddings.set(type, embedding);
76
+ // Also store lowercase version for case-insensitive matching
77
+ this.verbTypeEmbeddings.set(type.toLowerCase(), embedding);
92
78
  }
93
79
  this.typeEmbeddingsInitialized = true;
94
80
  }
@@ -23,6 +23,23 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
23
23
  abstract getNounMetadata(id: string): Promise<any | null>;
24
24
  abstract saveVerbMetadata(id: string, metadata: any): Promise<void>;
25
25
  abstract getVerbMetadata(id: string): Promise<any | null>;
26
+ abstract getNounVector(id: string): Promise<number[] | null>;
27
+ abstract saveHNSWData(nounId: string, hnswData: {
28
+ level: number;
29
+ connections: Record<string, string[]>;
30
+ }): Promise<void>;
31
+ abstract getHNSWData(nounId: string): Promise<{
32
+ level: number;
33
+ connections: Record<string, string[]>;
34
+ } | null>;
35
+ abstract saveHNSWSystem(systemData: {
36
+ entryPointId: string | null;
37
+ maxLevel: number;
38
+ }): Promise<void>;
39
+ abstract getHNSWSystem(): Promise<{
40
+ entryPointId: string | null;
41
+ maxLevel: number;
42
+ } | null>;
26
43
  abstract clear(): Promise<void>;
27
44
  abstract getStorageStatus(): Promise<{
28
45
  type: string;
@@ -263,6 +280,12 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
263
280
  timestamp: number;
264
281
  }>;
265
282
  protected readonly COUNT_CACHE_TTL = 60000;
283
+ protected pendingCountPersist: boolean;
284
+ protected lastCountPersistTime: number;
285
+ protected scheduledCountPersistTimeout: NodeJS.Timeout | null;
286
+ protected pendingCountOperations: number;
287
+ protected countPersistBatchSize: number;
288
+ protected countPersistInterval: number;
266
289
  /**
267
290
  * Get total noun count - O(1) operation
268
291
  * @returns Promise that resolves to the total number of nouns
@@ -303,6 +326,37 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
303
326
  * @param type The verb type
304
327
  */
305
328
  protected decrementVerbCount(type: string): Promise<void>;
329
+ /**
330
+ * Detect if this storage adapter uses cloud storage (network I/O)
331
+ * Cloud storage benefits from batching; local storage does not.
332
+ *
333
+ * Override this method in subclasses for accurate detection.
334
+ * Default implementation checks storage type from getStorageStatus().
335
+ *
336
+ * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
337
+ */
338
+ protected isCloudStorage(): boolean;
339
+ /**
340
+ * Schedule a smart batched persist operation.
341
+ *
342
+ * Strategy:
343
+ * - Local Storage: Persist immediately (fast, no network latency)
344
+ * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
345
+ *
346
+ * This mirrors the statistics batching pattern for consistency.
347
+ */
348
+ protected scheduleCountPersist(): Promise<void>;
349
+ /**
350
+ * Flush counts immediately to storage.
351
+ *
352
+ * Used for:
353
+ * - Graceful shutdown (SIGTERM handler)
354
+ * - Forced persist (batch threshold reached)
355
+ * - Local storage immediate persist
356
+ *
357
+ * This is the public API that shutdown hooks can call.
358
+ */
359
+ flushCounts(): Promise<void>;
306
360
  /**
307
361
  * Initialize counts from storage - must be implemented by each adapter
308
362
  * @protected
@@ -48,6 +48,17 @@ export class BaseStorageAdapter {
48
48
  this.verbCounts = new Map(); // verb type -> count
49
49
  this.countCache = new Map();
50
50
  this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
51
+ // =============================================
52
+ // Smart Count Batching (v3.32.3+)
53
+ // =============================================
54
+ // Count batching state - mirrors statistics batching pattern
55
+ this.pendingCountPersist = false; // Counts changed since last persist?
56
+ this.lastCountPersistTime = 0; // Timestamp of last persist
57
+ this.scheduledCountPersistTimeout = null; // Scheduled persist timer
58
+ this.pendingCountOperations = 0; // Operations since last persist
59
+ // Batching configuration (overridable by subclasses for custom strategies)
60
+ this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
61
+ this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
51
62
  }
52
63
  /**
53
64
  * Save statistics data
@@ -659,10 +670,10 @@ export class BaseStorageAdapter {
659
670
  const mutex = getGlobalMutex();
660
671
  await mutex.runExclusive(`count-entity-${type}`, async () => {
661
672
  this.incrementEntityCount(type);
662
- // CRITICAL FIX: Persist counts on EVERY change for cloud storage adapters
663
- // This ensures counts survive container restarts (GCS, S3, etc.)
664
- // For memory/file storage, this is fast; for cloud storage, it's essential
665
- await this.persistCounts();
673
+ // Smart batching (v3.32.3+): Adapts to storage type
674
+ // - Cloud storage (GCS, S3): Batches 10 ops OR 5 seconds
675
+ // - Local storage (File, Memory): Persists immediately
676
+ await this.scheduleCountPersist();
666
677
  });
667
678
  }
668
679
  /**
@@ -693,8 +704,8 @@ export class BaseStorageAdapter {
693
704
  const mutex = getGlobalMutex();
694
705
  await mutex.runExclusive(`count-entity-${type}`, async () => {
695
706
  this.decrementEntityCount(type);
696
- // CRITICAL FIX: Persist counts on EVERY change for cloud storage adapters
697
- await this.persistCounts();
707
+ // Smart batching (v3.32.3+): Adapts to storage type
708
+ await this.scheduleCountPersist();
698
709
  });
699
710
  }
700
711
  /**
@@ -711,8 +722,8 @@ export class BaseStorageAdapter {
711
722
  count: this.totalVerbCount,
712
723
  timestamp: Date.now()
713
724
  });
714
- // CRITICAL FIX: Persist counts on EVERY change for cloud storage adapters
715
- await this.persistCounts();
725
+ // Smart batching (v3.32.3+): Adapts to storage type
726
+ await this.scheduleCountPersist();
716
727
  });
717
728
  }
718
729
  /**
@@ -737,9 +748,93 @@ export class BaseStorageAdapter {
737
748
  count: this.totalVerbCount,
738
749
  timestamp: Date.now()
739
750
  });
740
- // CRITICAL FIX: Persist counts on EVERY change for cloud storage adapters
741
- await this.persistCounts();
751
+ // Smart batching (v3.32.3+): Adapts to storage type
752
+ await this.scheduleCountPersist();
742
753
  });
743
754
  }
755
+ // =============================================
756
+ // Smart Batching Methods (v3.32.3+)
757
+ // =============================================
758
+ /**
759
+ * Detect if this storage adapter uses cloud storage (network I/O)
760
+ * Cloud storage benefits from batching; local storage does not.
761
+ *
762
+ * Override this method in subclasses for accurate detection.
763
+ * Default implementation checks storage type from getStorageStatus().
764
+ *
765
+ * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
766
+ */
767
+ isCloudStorage() {
768
+ // Default: assume local storage (conservative, prefers reliability over performance)
769
+ // Subclasses should override this for accurate detection
770
+ return false;
771
+ }
772
+ /**
773
+ * Schedule a smart batched persist operation.
774
+ *
775
+ * Strategy:
776
+ * - Local Storage: Persist immediately (fast, no network latency)
777
+ * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
778
+ *
779
+ * This mirrors the statistics batching pattern for consistency.
780
+ */
781
+ async scheduleCountPersist() {
782
+ // Mark counts as pending persist
783
+ this.pendingCountPersist = true;
784
+ this.pendingCountOperations++;
785
+ // Local storage: persist immediately (fast enough, no benefit from batching)
786
+ if (!this.isCloudStorage()) {
787
+ await this.flushCounts();
788
+ return;
789
+ }
790
+ // Cloud storage: use smart batching
791
+ // Persist if we've hit the batch size threshold
792
+ if (this.pendingCountOperations >= this.countPersistBatchSize) {
793
+ await this.flushCounts();
794
+ return;
795
+ }
796
+ // Otherwise, schedule a time-based persist if not already scheduled
797
+ if (!this.scheduledCountPersistTimeout) {
798
+ this.scheduledCountPersistTimeout = setTimeout(() => {
799
+ this.flushCounts().catch(error => {
800
+ console.error('Failed to flush counts on timer:', error);
801
+ });
802
+ }, this.countPersistInterval);
803
+ }
804
+ }
805
+ /**
806
+ * Flush counts immediately to storage.
807
+ *
808
+ * Used for:
809
+ * - Graceful shutdown (SIGTERM handler)
810
+ * - Forced persist (batch threshold reached)
811
+ * - Local storage immediate persist
812
+ *
813
+ * This is the public API that shutdown hooks can call.
814
+ */
815
+ async flushCounts() {
816
+ // Clear any scheduled persist
817
+ if (this.scheduledCountPersistTimeout) {
818
+ clearTimeout(this.scheduledCountPersistTimeout);
819
+ this.scheduledCountPersistTimeout = null;
820
+ }
821
+ // Nothing to flush?
822
+ if (!this.pendingCountPersist) {
823
+ return;
824
+ }
825
+ try {
826
+ // Persist to storage (implemented by subclass)
827
+ await this.persistCounts();
828
+ // Update state
829
+ this.lastCountPersistTime = Date.now();
830
+ this.pendingCountPersist = false;
831
+ this.pendingCountOperations = 0;
832
+ }
833
+ catch (error) {
834
+ console.error('❌ CRITICAL: Failed to flush counts to storage:', error);
835
+ // Keep pending flag set so we retry on next operation
836
+ throw error;
837
+ }
838
+ }
744
839
  }
745
840
  //# sourceMappingURL=baseStorageAdapter.js.map
@@ -361,5 +361,37 @@ export declare class FileSystemStorage extends BaseStorage {
361
361
  * Check if a file exists (handles both sharded and non-sharded)
362
362
  */
363
363
  private fileExists;
364
+ /**
365
+ * Get vector for a noun
366
+ */
367
+ getNounVector(id: string): Promise<number[] | null>;
368
+ /**
369
+ * Save HNSW graph data for a noun
370
+ */
371
+ saveHNSWData(nounId: string, hnswData: {
372
+ level: number;
373
+ connections: Record<string, string[]>;
374
+ }): Promise<void>;
375
+ /**
376
+ * Get HNSW graph data for a noun
377
+ */
378
+ getHNSWData(nounId: string): Promise<{
379
+ level: number;
380
+ connections: Record<string, string[]>;
381
+ } | null>;
382
+ /**
383
+ * Save HNSW system data (entry point, max level)
384
+ */
385
+ saveHNSWSystem(systemData: {
386
+ entryPointId: string | null;
387
+ maxLevel: number;
388
+ }): Promise<void>;
389
+ /**
390
+ * Get HNSW system data
391
+ */
392
+ getHNSWSystem(): Promise<{
393
+ entryPointId: string | null;
394
+ maxLevel: number;
395
+ } | null>;
364
396
  }
365
397
  export {};
@@ -2108,5 +2108,71 @@ export class FileSystemStorage extends BaseStorage {
2108
2108
  return false;
2109
2109
  }
2110
2110
  }
2111
+ // =============================================
2112
+ // HNSW Index Persistence (v3.35.0+)
2113
+ // =============================================
2114
+ /**
2115
+ * Get vector for a noun
2116
+ */
2117
+ async getNounVector(id) {
2118
+ await this.ensureInitialized();
2119
+ const noun = await this.getNode(id);
2120
+ return noun ? noun.vector : null;
2121
+ }
2122
+ /**
2123
+ * Save HNSW graph data for a noun
2124
+ */
2125
+ async saveHNSWData(nounId, hnswData) {
2126
+ await this.ensureInitialized();
2127
+ // Use sharded path for HNSW data
2128
+ const shard = nounId.substring(0, 2).toLowerCase();
2129
+ const hnswDir = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard);
2130
+ await this.ensureDirectoryExists(hnswDir);
2131
+ const filePath = path.join(hnswDir, `${nounId}.json`);
2132
+ await fs.promises.writeFile(filePath, JSON.stringify(hnswData, null, 2));
2133
+ }
2134
+ /**
2135
+ * Get HNSW graph data for a noun
2136
+ */
2137
+ async getHNSWData(nounId) {
2138
+ await this.ensureInitialized();
2139
+ const shard = nounId.substring(0, 2).toLowerCase();
2140
+ const filePath = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard, `${nounId}.json`);
2141
+ try {
2142
+ const data = await fs.promises.readFile(filePath, 'utf-8');
2143
+ return JSON.parse(data);
2144
+ }
2145
+ catch (error) {
2146
+ if (error.code !== 'ENOENT') {
2147
+ console.error(`Error reading HNSW data for ${nounId}:`, error);
2148
+ }
2149
+ return null;
2150
+ }
2151
+ }
2152
+ /**
2153
+ * Save HNSW system data (entry point, max level)
2154
+ */
2155
+ async saveHNSWSystem(systemData) {
2156
+ await this.ensureInitialized();
2157
+ const filePath = path.join(this.systemDir, 'hnsw-system.json');
2158
+ await fs.promises.writeFile(filePath, JSON.stringify(systemData, null, 2));
2159
+ }
2160
+ /**
2161
+ * Get HNSW system data
2162
+ */
2163
+ async getHNSWSystem() {
2164
+ await this.ensureInitialized();
2165
+ const filePath = path.join(this.systemDir, 'hnsw-system.json');
2166
+ try {
2167
+ const data = await fs.promises.readFile(filePath, 'utf-8');
2168
+ return JSON.parse(data);
2169
+ }
2170
+ catch (error) {
2171
+ if (error.code !== 'ENOENT') {
2172
+ console.error('Error reading HNSW system data:', error);
2173
+ }
2174
+ return null;
2175
+ }
2176
+ }
2111
2177
  }
2112
2178
  //# sourceMappingURL=fileSystemStorage.js.map
@@ -102,6 +102,15 @@ export declare class GcsStorage extends BaseStorage {
102
102
  * Override base class method to detect GCS-specific throttling errors
103
103
  */
104
104
  protected isThrottlingError(error: any): boolean;
105
+ /**
106
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
107
+ *
108
+ * GCS is cloud storage with network latency (~50ms per write).
109
+ * Smart batching reduces writes from 1000 ops → 100 batches.
110
+ *
111
+ * @returns true (GCS is cloud storage)
112
+ */
113
+ protected isCloudStorage(): boolean;
105
114
  /**
106
115
  * Apply backpressure before starting an operation
107
116
  * @returns Request ID for tracking
@@ -330,5 +339,41 @@ export declare class GcsStorage extends BaseStorage {
330
339
  * Persist counts to storage
331
340
  */
332
341
  protected persistCounts(): Promise<void>;
342
+ /**
343
+ * Get a noun's vector for HNSW rebuild
344
+ */
345
+ getNounVector(id: string): Promise<number[] | null>;
346
+ /**
347
+ * Save HNSW graph data for a noun
348
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
349
+ */
350
+ saveHNSWData(nounId: string, hnswData: {
351
+ level: number;
352
+ connections: Record<string, string[]>;
353
+ }): Promise<void>;
354
+ /**
355
+ * Get HNSW graph data for a noun
356
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
357
+ */
358
+ getHNSWData(nounId: string): Promise<{
359
+ level: number;
360
+ connections: Record<string, string[]>;
361
+ } | null>;
362
+ /**
363
+ * Save HNSW system data (entry point, max level)
364
+ * Storage path: system/hnsw-system.json
365
+ */
366
+ saveHNSWSystem(systemData: {
367
+ entryPointId: string | null;
368
+ maxLevel: number;
369
+ }): Promise<void>;
370
+ /**
371
+ * Get HNSW system data (entry point, max level)
372
+ * Storage path: system/hnsw-system.json
373
+ */
374
+ getHNSWSystem(): Promise<{
375
+ entryPointId: string | null;
376
+ maxLevel: number;
377
+ } | null>;
333
378
  }
334
379
  export {};
@@ -195,6 +195,17 @@ export class GcsStorage extends BaseStorage {
195
195
  message.includes('rate limit') ||
196
196
  message.includes('too many requests'));
197
197
  }
198
+ /**
199
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
200
+ *
201
+ * GCS is cloud storage with network latency (~50ms per write).
202
+ * Smart batching reduces writes from 1000 ops → 100 batches.
203
+ *
204
+ * @returns true (GCS is cloud storage)
205
+ */
206
+ isCloudStorage() {
207
+ return true; // GCS benefits from batching
208
+ }
198
209
  /**
199
210
  * Apply backpressure before starting an operation
200
211
  * @returns Request ID for tracking
@@ -1128,15 +1139,32 @@ export class GcsStorage extends BaseStorage {
1128
1139
  async initializeCountsFromScan() {
1129
1140
  try {
1130
1141
  prodLog.info('📊 Scanning GCS bucket to initialize counts...');
1142
+ prodLog.info(`🔍 Noun prefix: ${this.nounPrefix}`);
1143
+ prodLog.info(`🔍 Verb prefix: ${this.verbPrefix}`);
1131
1144
  // Count nouns
1132
1145
  const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
1133
- this.totalNounCount = nounFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1146
+ prodLog.info(`🔍 Found ${nounFiles?.length || 0} total files under noun prefix`);
1147
+ const jsonNounFiles = nounFiles?.filter((f) => f.name?.endsWith('.json')) || [];
1148
+ this.totalNounCount = jsonNounFiles.length;
1149
+ if (jsonNounFiles.length > 0 && jsonNounFiles.length <= 5) {
1150
+ prodLog.info(`📄 Sample noun files: ${jsonNounFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
1151
+ }
1134
1152
  // Count verbs
1135
1153
  const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
1136
- this.totalVerbCount = verbFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1154
+ prodLog.info(`🔍 Found ${verbFiles?.length || 0} total files under verb prefix`);
1155
+ const jsonVerbFiles = verbFiles?.filter((f) => f.name?.endsWith('.json')) || [];
1156
+ this.totalVerbCount = jsonVerbFiles.length;
1157
+ if (jsonVerbFiles.length > 0 && jsonVerbFiles.length <= 5) {
1158
+ prodLog.info(`📄 Sample verb files: ${jsonVerbFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
1159
+ }
1137
1160
  // Save initial counts
1138
- await this.persistCounts();
1139
- prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1161
+ if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
1162
+ await this.persistCounts();
1163
+ prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1164
+ }
1165
+ else {
1166
+ prodLog.warn(`⚠️ No entities found during bucket scan. Check that entities exist and prefixes are correct.`);
1167
+ }
1140
1168
  }
1141
1169
  catch (error) {
1142
1170
  // CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
@@ -1167,5 +1195,95 @@ export class GcsStorage extends BaseStorage {
1167
1195
  this.logger.error('Error persisting counts:', error);
1168
1196
  }
1169
1197
  }
1198
+ // HNSW Index Persistence (v3.35.0+)
1199
+ /**
1200
+ * Get a noun's vector for HNSW rebuild
1201
+ */
1202
+ async getNounVector(id) {
1203
+ await this.ensureInitialized();
1204
+ const noun = await this.getNode(id);
1205
+ return noun ? noun.vector : null;
1206
+ }
1207
+ /**
1208
+ * Save HNSW graph data for a noun
1209
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1210
+ */
1211
+ async saveHNSWData(nounId, hnswData) {
1212
+ await this.ensureInitialized();
1213
+ try {
1214
+ // Use sharded path for HNSW data
1215
+ const shard = getShardIdFromUuid(nounId);
1216
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1217
+ const file = this.bucket.file(key);
1218
+ await file.save(JSON.stringify(hnswData, null, 2), {
1219
+ contentType: 'application/json',
1220
+ resumable: false
1221
+ });
1222
+ }
1223
+ catch (error) {
1224
+ this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
1225
+ throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
1226
+ }
1227
+ }
1228
+ /**
1229
+ * Get HNSW graph data for a noun
1230
+ * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1231
+ */
1232
+ async getHNSWData(nounId) {
1233
+ await this.ensureInitialized();
1234
+ try {
1235
+ const shard = getShardIdFromUuid(nounId);
1236
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1237
+ const file = this.bucket.file(key);
1238
+ const [contents] = await file.download();
1239
+ return JSON.parse(contents.toString());
1240
+ }
1241
+ catch (error) {
1242
+ if (error.code === 404) {
1243
+ return null;
1244
+ }
1245
+ this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
1246
+ throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
1247
+ }
1248
+ }
1249
+ /**
1250
+ * Save HNSW system data (entry point, max level)
1251
+ * Storage path: system/hnsw-system.json
1252
+ */
1253
+ async saveHNSWSystem(systemData) {
1254
+ await this.ensureInitialized();
1255
+ try {
1256
+ const key = `${this.systemPrefix}hnsw-system.json`;
1257
+ const file = this.bucket.file(key);
1258
+ await file.save(JSON.stringify(systemData, null, 2), {
1259
+ contentType: 'application/json',
1260
+ resumable: false
1261
+ });
1262
+ }
1263
+ catch (error) {
1264
+ this.logger.error('Failed to save HNSW system data:', error);
1265
+ throw new Error(`Failed to save HNSW system data: ${error}`);
1266
+ }
1267
+ }
1268
+ /**
1269
+ * Get HNSW system data (entry point, max level)
1270
+ * Storage path: system/hnsw-system.json
1271
+ */
1272
+ async getHNSWSystem() {
1273
+ await this.ensureInitialized();
1274
+ try {
1275
+ const key = `${this.systemPrefix}hnsw-system.json`;
1276
+ const file = this.bucket.file(key);
1277
+ const [contents] = await file.download();
1278
+ return JSON.parse(contents.toString());
1279
+ }
1280
+ catch (error) {
1281
+ if (error.code === 404) {
1282
+ return null;
1283
+ }
1284
+ this.logger.error('Failed to get HNSW system data:', error);
1285
+ throw new Error(`Failed to get HNSW system data: ${error}`);
1286
+ }
1287
+ }
1170
1288
  }
1171
1289
  //# sourceMappingURL=gcsStorage.js.map
@@ -174,4 +174,36 @@ export declare class MemoryStorage extends BaseStorage {
174
174
  * Persist counts to storage - no-op for memory storage
175
175
  */
176
176
  protected persistCounts(): Promise<void>;
177
+ /**
178
+ * Get vector for a noun
179
+ */
180
+ getNounVector(id: string): Promise<number[] | null>;
181
+ /**
182
+ * Save HNSW graph data for a noun
183
+ */
184
+ saveHNSWData(nounId: string, hnswData: {
185
+ level: number;
186
+ connections: Record<string, string[]>;
187
+ }): Promise<void>;
188
+ /**
189
+ * Get HNSW graph data for a noun
190
+ */
191
+ getHNSWData(nounId: string): Promise<{
192
+ level: number;
193
+ connections: Record<string, string[]>;
194
+ } | null>;
195
+ /**
196
+ * Save HNSW system data (entry point, max level)
197
+ */
198
+ saveHNSWSystem(systemData: {
199
+ entryPointId: string | null;
200
+ maxLevel: number;
201
+ }): Promise<void>;
202
+ /**
203
+ * Get HNSW system data
204
+ */
205
+ getHNSWSystem(): Promise<{
206
+ entryPointId: string | null;
207
+ maxLevel: number;
208
+ } | null>;
177
209
  }