@soulcraft/brainy 3.32.2 → 3.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +175 -0
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
- package/dist/brainy.d.ts +31 -0
- package/dist/brainy.js +119 -34
- package/dist/hnsw/hnswIndex.d.ts +24 -0
- package/dist/hnsw/hnswIndex.js +137 -0
- package/dist/hnsw/hnswIndexOptimized.d.ts +2 -13
- package/dist/hnsw/hnswIndexOptimized.js +8 -37
- package/dist/importers/SmartExcelImporter.js +12 -0
- package/dist/interfaces/IIndex.d.ts +186 -0
- package/dist/interfaces/IIndex.js +15 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
- package/dist/neural/embeddedTypeEmbeddings.js +96 -0
- package/dist/neural/entityExtractor.d.ts +2 -0
- package/dist/neural/entityExtractor.js +21 -42
- package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
- package/dist/neural/naturalLanguageProcessor.js +17 -31
- package/dist/storage/adapters/baseStorageAdapter.d.ts +54 -0
- package/dist/storage/adapters/baseStorageAdapter.js +105 -10
- package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
- package/dist/storage/adapters/fileSystemStorage.js +66 -0
- package/dist/storage/adapters/gcsStorage.d.ts +45 -0
- package/dist/storage/adapters/gcsStorage.js +122 -4
- package/dist/storage/adapters/memoryStorage.d.ts +32 -0
- package/dist/storage/adapters/memoryStorage.js +43 -0
- package/dist/storage/adapters/opfsStorage.d.ts +36 -0
- package/dist/storage/adapters/opfsStorage.js +101 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +45 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +123 -0
- package/package.json +5 -2
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { PatternLibrary } from './patternLibrary.js';
|
|
12
12
|
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
13
|
+
import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from './embeddedTypeEmbeddings.js';
|
|
13
14
|
export class NaturalLanguageProcessor {
|
|
14
15
|
constructor(brain) {
|
|
15
16
|
this.initialized = false;
|
|
@@ -54,41 +55,26 @@ export class NaturalLanguageProcessor {
|
|
|
54
55
|
}
|
|
55
56
|
/**
|
|
56
57
|
* Initialize embeddings for all NounTypes and VerbTypes
|
|
57
|
-
*
|
|
58
|
+
* PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
|
|
59
|
+
* Zero runtime cost - embeddings are loaded instantly from embedded data
|
|
58
60
|
*/
|
|
59
61
|
async initializeTypeEmbeddings() {
|
|
60
62
|
if (this.typeEmbeddingsInitialized)
|
|
61
63
|
return;
|
|
62
|
-
//
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
// Embed all VerbTypes (40+ types)
|
|
79
|
-
for (const [key, value] of Object.entries(VerbType)) {
|
|
80
|
-
if (typeof value === 'string') {
|
|
81
|
-
const keyEmbedding = await this.getEmbedding(key);
|
|
82
|
-
const valueEmbedding = await this.getEmbedding(value);
|
|
83
|
-
this.verbTypeEmbeddings.set(key, keyEmbedding);
|
|
84
|
-
this.verbTypeEmbeddings.set(value, valueEmbedding);
|
|
85
|
-
// Common variations for verbs
|
|
86
|
-
const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
|
|
87
|
-
if (spaceSeparated !== value) {
|
|
88
|
-
const variantEmbedding = await this.getEmbedding(spaceSeparated);
|
|
89
|
-
this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
64
|
+
// Load pre-computed embeddings (instant, no computation)
|
|
65
|
+
const nounEmbeddings = getNounTypeEmbeddings();
|
|
66
|
+
const verbEmbeddings = getVerbTypeEmbeddings();
|
|
67
|
+
// Store noun type embeddings with all variations for lookup
|
|
68
|
+
for (const [type, embedding] of nounEmbeddings.entries()) {
|
|
69
|
+
this.nounTypeEmbeddings.set(type, embedding);
|
|
70
|
+
// Also store lowercase version for case-insensitive matching
|
|
71
|
+
this.nounTypeEmbeddings.set(type.toLowerCase(), embedding);
|
|
72
|
+
}
|
|
73
|
+
// Store verb type embeddings with all variations for lookup
|
|
74
|
+
for (const [type, embedding] of verbEmbeddings.entries()) {
|
|
75
|
+
this.verbTypeEmbeddings.set(type, embedding);
|
|
76
|
+
// Also store lowercase version for case-insensitive matching
|
|
77
|
+
this.verbTypeEmbeddings.set(type.toLowerCase(), embedding);
|
|
92
78
|
}
|
|
93
79
|
this.typeEmbeddingsInitialized = true;
|
|
94
80
|
}
|
|
@@ -23,6 +23,23 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
23
23
|
abstract getNounMetadata(id: string): Promise<any | null>;
|
|
24
24
|
abstract saveVerbMetadata(id: string, metadata: any): Promise<void>;
|
|
25
25
|
abstract getVerbMetadata(id: string): Promise<any | null>;
|
|
26
|
+
abstract getNounVector(id: string): Promise<number[] | null>;
|
|
27
|
+
abstract saveHNSWData(nounId: string, hnswData: {
|
|
28
|
+
level: number;
|
|
29
|
+
connections: Record<string, string[]>;
|
|
30
|
+
}): Promise<void>;
|
|
31
|
+
abstract getHNSWData(nounId: string): Promise<{
|
|
32
|
+
level: number;
|
|
33
|
+
connections: Record<string, string[]>;
|
|
34
|
+
} | null>;
|
|
35
|
+
abstract saveHNSWSystem(systemData: {
|
|
36
|
+
entryPointId: string | null;
|
|
37
|
+
maxLevel: number;
|
|
38
|
+
}): Promise<void>;
|
|
39
|
+
abstract getHNSWSystem(): Promise<{
|
|
40
|
+
entryPointId: string | null;
|
|
41
|
+
maxLevel: number;
|
|
42
|
+
} | null>;
|
|
26
43
|
abstract clear(): Promise<void>;
|
|
27
44
|
abstract getStorageStatus(): Promise<{
|
|
28
45
|
type: string;
|
|
@@ -263,6 +280,12 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
263
280
|
timestamp: number;
|
|
264
281
|
}>;
|
|
265
282
|
protected readonly COUNT_CACHE_TTL = 60000;
|
|
283
|
+
protected pendingCountPersist: boolean;
|
|
284
|
+
protected lastCountPersistTime: number;
|
|
285
|
+
protected scheduledCountPersistTimeout: NodeJS.Timeout | null;
|
|
286
|
+
protected pendingCountOperations: number;
|
|
287
|
+
protected countPersistBatchSize: number;
|
|
288
|
+
protected countPersistInterval: number;
|
|
266
289
|
/**
|
|
267
290
|
* Get total noun count - O(1) operation
|
|
268
291
|
* @returns Promise that resolves to the total number of nouns
|
|
@@ -303,6 +326,37 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
303
326
|
* @param type The verb type
|
|
304
327
|
*/
|
|
305
328
|
protected decrementVerbCount(type: string): Promise<void>;
|
|
329
|
+
/**
|
|
330
|
+
* Detect if this storage adapter uses cloud storage (network I/O)
|
|
331
|
+
* Cloud storage benefits from batching; local storage does not.
|
|
332
|
+
*
|
|
333
|
+
* Override this method in subclasses for accurate detection.
|
|
334
|
+
* Default implementation checks storage type from getStorageStatus().
|
|
335
|
+
*
|
|
336
|
+
* @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
|
|
337
|
+
*/
|
|
338
|
+
protected isCloudStorage(): boolean;
|
|
339
|
+
/**
|
|
340
|
+
* Schedule a smart batched persist operation.
|
|
341
|
+
*
|
|
342
|
+
* Strategy:
|
|
343
|
+
* - Local Storage: Persist immediately (fast, no network latency)
|
|
344
|
+
* - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
|
|
345
|
+
*
|
|
346
|
+
* This mirrors the statistics batching pattern for consistency.
|
|
347
|
+
*/
|
|
348
|
+
protected scheduleCountPersist(): Promise<void>;
|
|
349
|
+
/**
|
|
350
|
+
* Flush counts immediately to storage.
|
|
351
|
+
*
|
|
352
|
+
* Used for:
|
|
353
|
+
* - Graceful shutdown (SIGTERM handler)
|
|
354
|
+
* - Forced persist (batch threshold reached)
|
|
355
|
+
* - Local storage immediate persist
|
|
356
|
+
*
|
|
357
|
+
* This is the public API that shutdown hooks can call.
|
|
358
|
+
*/
|
|
359
|
+
flushCounts(): Promise<void>;
|
|
306
360
|
/**
|
|
307
361
|
* Initialize counts from storage - must be implemented by each adapter
|
|
308
362
|
* @protected
|
|
@@ -48,6 +48,17 @@ export class BaseStorageAdapter {
|
|
|
48
48
|
this.verbCounts = new Map(); // verb type -> count
|
|
49
49
|
this.countCache = new Map();
|
|
50
50
|
this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
|
|
51
|
+
// =============================================
|
|
52
|
+
// Smart Count Batching (v3.32.3+)
|
|
53
|
+
// =============================================
|
|
54
|
+
// Count batching state - mirrors statistics batching pattern
|
|
55
|
+
this.pendingCountPersist = false; // Counts changed since last persist?
|
|
56
|
+
this.lastCountPersistTime = 0; // Timestamp of last persist
|
|
57
|
+
this.scheduledCountPersistTimeout = null; // Scheduled persist timer
|
|
58
|
+
this.pendingCountOperations = 0; // Operations since last persist
|
|
59
|
+
// Batching configuration (overridable by subclasses for custom strategies)
|
|
60
|
+
this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
|
|
61
|
+
this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
|
|
51
62
|
}
|
|
52
63
|
/**
|
|
53
64
|
* Save statistics data
|
|
@@ -659,10 +670,10 @@ export class BaseStorageAdapter {
|
|
|
659
670
|
const mutex = getGlobalMutex();
|
|
660
671
|
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
661
672
|
this.incrementEntityCount(type);
|
|
662
|
-
//
|
|
663
|
-
//
|
|
664
|
-
//
|
|
665
|
-
await this.
|
|
673
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
674
|
+
// - Cloud storage (GCS, S3): Batches 10 ops OR 5 seconds
|
|
675
|
+
// - Local storage (File, Memory): Persists immediately
|
|
676
|
+
await this.scheduleCountPersist();
|
|
666
677
|
});
|
|
667
678
|
}
|
|
668
679
|
/**
|
|
@@ -693,8 +704,8 @@ export class BaseStorageAdapter {
|
|
|
693
704
|
const mutex = getGlobalMutex();
|
|
694
705
|
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
695
706
|
this.decrementEntityCount(type);
|
|
696
|
-
//
|
|
697
|
-
await this.
|
|
707
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
708
|
+
await this.scheduleCountPersist();
|
|
698
709
|
});
|
|
699
710
|
}
|
|
700
711
|
/**
|
|
@@ -711,8 +722,8 @@ export class BaseStorageAdapter {
|
|
|
711
722
|
count: this.totalVerbCount,
|
|
712
723
|
timestamp: Date.now()
|
|
713
724
|
});
|
|
714
|
-
//
|
|
715
|
-
await this.
|
|
725
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
726
|
+
await this.scheduleCountPersist();
|
|
716
727
|
});
|
|
717
728
|
}
|
|
718
729
|
/**
|
|
@@ -737,9 +748,93 @@ export class BaseStorageAdapter {
|
|
|
737
748
|
count: this.totalVerbCount,
|
|
738
749
|
timestamp: Date.now()
|
|
739
750
|
});
|
|
740
|
-
//
|
|
741
|
-
await this.
|
|
751
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
752
|
+
await this.scheduleCountPersist();
|
|
742
753
|
});
|
|
743
754
|
}
|
|
755
|
+
// =============================================
|
|
756
|
+
// Smart Batching Methods (v3.32.3+)
|
|
757
|
+
// =============================================
|
|
758
|
+
/**
|
|
759
|
+
* Detect if this storage adapter uses cloud storage (network I/O)
|
|
760
|
+
* Cloud storage benefits from batching; local storage does not.
|
|
761
|
+
*
|
|
762
|
+
* Override this method in subclasses for accurate detection.
|
|
763
|
+
* Default implementation checks storage type from getStorageStatus().
|
|
764
|
+
*
|
|
765
|
+
* @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
|
|
766
|
+
*/
|
|
767
|
+
isCloudStorage() {
|
|
768
|
+
// Default: assume local storage (conservative, prefers reliability over performance)
|
|
769
|
+
// Subclasses should override this for accurate detection
|
|
770
|
+
return false;
|
|
771
|
+
}
|
|
772
|
+
/**
|
|
773
|
+
* Schedule a smart batched persist operation.
|
|
774
|
+
*
|
|
775
|
+
* Strategy:
|
|
776
|
+
* - Local Storage: Persist immediately (fast, no network latency)
|
|
777
|
+
* - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
|
|
778
|
+
*
|
|
779
|
+
* This mirrors the statistics batching pattern for consistency.
|
|
780
|
+
*/
|
|
781
|
+
async scheduleCountPersist() {
|
|
782
|
+
// Mark counts as pending persist
|
|
783
|
+
this.pendingCountPersist = true;
|
|
784
|
+
this.pendingCountOperations++;
|
|
785
|
+
// Local storage: persist immediately (fast enough, no benefit from batching)
|
|
786
|
+
if (!this.isCloudStorage()) {
|
|
787
|
+
await this.flushCounts();
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
// Cloud storage: use smart batching
|
|
791
|
+
// Persist if we've hit the batch size threshold
|
|
792
|
+
if (this.pendingCountOperations >= this.countPersistBatchSize) {
|
|
793
|
+
await this.flushCounts();
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
// Otherwise, schedule a time-based persist if not already scheduled
|
|
797
|
+
if (!this.scheduledCountPersistTimeout) {
|
|
798
|
+
this.scheduledCountPersistTimeout = setTimeout(() => {
|
|
799
|
+
this.flushCounts().catch(error => {
|
|
800
|
+
console.error('Failed to flush counts on timer:', error);
|
|
801
|
+
});
|
|
802
|
+
}, this.countPersistInterval);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Flush counts immediately to storage.
|
|
807
|
+
*
|
|
808
|
+
* Used for:
|
|
809
|
+
* - Graceful shutdown (SIGTERM handler)
|
|
810
|
+
* - Forced persist (batch threshold reached)
|
|
811
|
+
* - Local storage immediate persist
|
|
812
|
+
*
|
|
813
|
+
* This is the public API that shutdown hooks can call.
|
|
814
|
+
*/
|
|
815
|
+
async flushCounts() {
|
|
816
|
+
// Clear any scheduled persist
|
|
817
|
+
if (this.scheduledCountPersistTimeout) {
|
|
818
|
+
clearTimeout(this.scheduledCountPersistTimeout);
|
|
819
|
+
this.scheduledCountPersistTimeout = null;
|
|
820
|
+
}
|
|
821
|
+
// Nothing to flush?
|
|
822
|
+
if (!this.pendingCountPersist) {
|
|
823
|
+
return;
|
|
824
|
+
}
|
|
825
|
+
try {
|
|
826
|
+
// Persist to storage (implemented by subclass)
|
|
827
|
+
await this.persistCounts();
|
|
828
|
+
// Update state
|
|
829
|
+
this.lastCountPersistTime = Date.now();
|
|
830
|
+
this.pendingCountPersist = false;
|
|
831
|
+
this.pendingCountOperations = 0;
|
|
832
|
+
}
|
|
833
|
+
catch (error) {
|
|
834
|
+
console.error('❌ CRITICAL: Failed to flush counts to storage:', error);
|
|
835
|
+
// Keep pending flag set so we retry on next operation
|
|
836
|
+
throw error;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
744
839
|
}
|
|
745
840
|
//# sourceMappingURL=baseStorageAdapter.js.map
|
|
@@ -361,5 +361,37 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
361
361
|
* Check if a file exists (handles both sharded and non-sharded)
|
|
362
362
|
*/
|
|
363
363
|
private fileExists;
|
|
364
|
+
/**
|
|
365
|
+
* Get vector for a noun
|
|
366
|
+
*/
|
|
367
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
368
|
+
/**
|
|
369
|
+
* Save HNSW graph data for a noun
|
|
370
|
+
*/
|
|
371
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
372
|
+
level: number;
|
|
373
|
+
connections: Record<string, string[]>;
|
|
374
|
+
}): Promise<void>;
|
|
375
|
+
/**
|
|
376
|
+
* Get HNSW graph data for a noun
|
|
377
|
+
*/
|
|
378
|
+
getHNSWData(nounId: string): Promise<{
|
|
379
|
+
level: number;
|
|
380
|
+
connections: Record<string, string[]>;
|
|
381
|
+
} | null>;
|
|
382
|
+
/**
|
|
383
|
+
* Save HNSW system data (entry point, max level)
|
|
384
|
+
*/
|
|
385
|
+
saveHNSWSystem(systemData: {
|
|
386
|
+
entryPointId: string | null;
|
|
387
|
+
maxLevel: number;
|
|
388
|
+
}): Promise<void>;
|
|
389
|
+
/**
|
|
390
|
+
* Get HNSW system data
|
|
391
|
+
*/
|
|
392
|
+
getHNSWSystem(): Promise<{
|
|
393
|
+
entryPointId: string | null;
|
|
394
|
+
maxLevel: number;
|
|
395
|
+
} | null>;
|
|
364
396
|
}
|
|
365
397
|
export {};
|
|
@@ -2108,5 +2108,71 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
2108
2108
|
return false;
|
|
2109
2109
|
}
|
|
2110
2110
|
}
|
|
2111
|
+
// =============================================
|
|
2112
|
+
// HNSW Index Persistence (v3.35.0+)
|
|
2113
|
+
// =============================================
|
|
2114
|
+
/**
|
|
2115
|
+
* Get vector for a noun
|
|
2116
|
+
*/
|
|
2117
|
+
async getNounVector(id) {
|
|
2118
|
+
await this.ensureInitialized();
|
|
2119
|
+
const noun = await this.getNode(id);
|
|
2120
|
+
return noun ? noun.vector : null;
|
|
2121
|
+
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Save HNSW graph data for a noun
|
|
2124
|
+
*/
|
|
2125
|
+
async saveHNSWData(nounId, hnswData) {
|
|
2126
|
+
await this.ensureInitialized();
|
|
2127
|
+
// Use sharded path for HNSW data
|
|
2128
|
+
const shard = nounId.substring(0, 2).toLowerCase();
|
|
2129
|
+
const hnswDir = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard);
|
|
2130
|
+
await this.ensureDirectoryExists(hnswDir);
|
|
2131
|
+
const filePath = path.join(hnswDir, `${nounId}.json`);
|
|
2132
|
+
await fs.promises.writeFile(filePath, JSON.stringify(hnswData, null, 2));
|
|
2133
|
+
}
|
|
2134
|
+
/**
|
|
2135
|
+
* Get HNSW graph data for a noun
|
|
2136
|
+
*/
|
|
2137
|
+
async getHNSWData(nounId) {
|
|
2138
|
+
await this.ensureInitialized();
|
|
2139
|
+
const shard = nounId.substring(0, 2).toLowerCase();
|
|
2140
|
+
const filePath = path.join(this.rootDir, 'entities', 'nouns', 'hnsw', shard, `${nounId}.json`);
|
|
2141
|
+
try {
|
|
2142
|
+
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
2143
|
+
return JSON.parse(data);
|
|
2144
|
+
}
|
|
2145
|
+
catch (error) {
|
|
2146
|
+
if (error.code !== 'ENOENT') {
|
|
2147
|
+
console.error(`Error reading HNSW data for ${nounId}:`, error);
|
|
2148
|
+
}
|
|
2149
|
+
return null;
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
/**
|
|
2153
|
+
* Save HNSW system data (entry point, max level)
|
|
2154
|
+
*/
|
|
2155
|
+
async saveHNSWSystem(systemData) {
|
|
2156
|
+
await this.ensureInitialized();
|
|
2157
|
+
const filePath = path.join(this.systemDir, 'hnsw-system.json');
|
|
2158
|
+
await fs.promises.writeFile(filePath, JSON.stringify(systemData, null, 2));
|
|
2159
|
+
}
|
|
2160
|
+
/**
|
|
2161
|
+
* Get HNSW system data
|
|
2162
|
+
*/
|
|
2163
|
+
async getHNSWSystem() {
|
|
2164
|
+
await this.ensureInitialized();
|
|
2165
|
+
const filePath = path.join(this.systemDir, 'hnsw-system.json');
|
|
2166
|
+
try {
|
|
2167
|
+
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
2168
|
+
return JSON.parse(data);
|
|
2169
|
+
}
|
|
2170
|
+
catch (error) {
|
|
2171
|
+
if (error.code !== 'ENOENT') {
|
|
2172
|
+
console.error('Error reading HNSW system data:', error);
|
|
2173
|
+
}
|
|
2174
|
+
return null;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2111
2177
|
}
|
|
2112
2178
|
//# sourceMappingURL=fileSystemStorage.js.map
|
|
@@ -102,6 +102,15 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
102
102
|
* Override base class method to detect GCS-specific throttling errors
|
|
103
103
|
*/
|
|
104
104
|
protected isThrottlingError(error: any): boolean;
|
|
105
|
+
/**
|
|
106
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
107
|
+
*
|
|
108
|
+
* GCS is cloud storage with network latency (~50ms per write).
|
|
109
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
110
|
+
*
|
|
111
|
+
* @returns true (GCS is cloud storage)
|
|
112
|
+
*/
|
|
113
|
+
protected isCloudStorage(): boolean;
|
|
105
114
|
/**
|
|
106
115
|
* Apply backpressure before starting an operation
|
|
107
116
|
* @returns Request ID for tracking
|
|
@@ -330,5 +339,41 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
330
339
|
* Persist counts to storage
|
|
331
340
|
*/
|
|
332
341
|
protected persistCounts(): Promise<void>;
|
|
342
|
+
/**
|
|
343
|
+
* Get a noun's vector for HNSW rebuild
|
|
344
|
+
*/
|
|
345
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
346
|
+
/**
|
|
347
|
+
* Save HNSW graph data for a noun
|
|
348
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
349
|
+
*/
|
|
350
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
351
|
+
level: number;
|
|
352
|
+
connections: Record<string, string[]>;
|
|
353
|
+
}): Promise<void>;
|
|
354
|
+
/**
|
|
355
|
+
* Get HNSW graph data for a noun
|
|
356
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
357
|
+
*/
|
|
358
|
+
getHNSWData(nounId: string): Promise<{
|
|
359
|
+
level: number;
|
|
360
|
+
connections: Record<string, string[]>;
|
|
361
|
+
} | null>;
|
|
362
|
+
/**
|
|
363
|
+
* Save HNSW system data (entry point, max level)
|
|
364
|
+
* Storage path: system/hnsw-system.json
|
|
365
|
+
*/
|
|
366
|
+
saveHNSWSystem(systemData: {
|
|
367
|
+
entryPointId: string | null;
|
|
368
|
+
maxLevel: number;
|
|
369
|
+
}): Promise<void>;
|
|
370
|
+
/**
|
|
371
|
+
* Get HNSW system data (entry point, max level)
|
|
372
|
+
* Storage path: system/hnsw-system.json
|
|
373
|
+
*/
|
|
374
|
+
getHNSWSystem(): Promise<{
|
|
375
|
+
entryPointId: string | null;
|
|
376
|
+
maxLevel: number;
|
|
377
|
+
} | null>;
|
|
333
378
|
}
|
|
334
379
|
export {};
|
|
@@ -195,6 +195,17 @@ export class GcsStorage extends BaseStorage {
|
|
|
195
195
|
message.includes('rate limit') ||
|
|
196
196
|
message.includes('too many requests'));
|
|
197
197
|
}
|
|
198
|
+
/**
|
|
199
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
200
|
+
*
|
|
201
|
+
* GCS is cloud storage with network latency (~50ms per write).
|
|
202
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
203
|
+
*
|
|
204
|
+
* @returns true (GCS is cloud storage)
|
|
205
|
+
*/
|
|
206
|
+
isCloudStorage() {
|
|
207
|
+
return true; // GCS benefits from batching
|
|
208
|
+
}
|
|
198
209
|
/**
|
|
199
210
|
* Apply backpressure before starting an operation
|
|
200
211
|
* @returns Request ID for tracking
|
|
@@ -1128,15 +1139,32 @@ export class GcsStorage extends BaseStorage {
|
|
|
1128
1139
|
async initializeCountsFromScan() {
|
|
1129
1140
|
try {
|
|
1130
1141
|
prodLog.info('📊 Scanning GCS bucket to initialize counts...');
|
|
1142
|
+
prodLog.info(`🔍 Noun prefix: ${this.nounPrefix}`);
|
|
1143
|
+
prodLog.info(`🔍 Verb prefix: ${this.verbPrefix}`);
|
|
1131
1144
|
// Count nouns
|
|
1132
1145
|
const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
|
|
1133
|
-
|
|
1146
|
+
prodLog.info(`🔍 Found ${nounFiles?.length || 0} total files under noun prefix`);
|
|
1147
|
+
const jsonNounFiles = nounFiles?.filter((f) => f.name?.endsWith('.json')) || [];
|
|
1148
|
+
this.totalNounCount = jsonNounFiles.length;
|
|
1149
|
+
if (jsonNounFiles.length > 0 && jsonNounFiles.length <= 5) {
|
|
1150
|
+
prodLog.info(`📄 Sample noun files: ${jsonNounFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
|
|
1151
|
+
}
|
|
1134
1152
|
// Count verbs
|
|
1135
1153
|
const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
|
|
1136
|
-
|
|
1154
|
+
prodLog.info(`🔍 Found ${verbFiles?.length || 0} total files under verb prefix`);
|
|
1155
|
+
const jsonVerbFiles = verbFiles?.filter((f) => f.name?.endsWith('.json')) || [];
|
|
1156
|
+
this.totalVerbCount = jsonVerbFiles.length;
|
|
1157
|
+
if (jsonVerbFiles.length > 0 && jsonVerbFiles.length <= 5) {
|
|
1158
|
+
prodLog.info(`📄 Sample verb files: ${jsonVerbFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
|
|
1159
|
+
}
|
|
1137
1160
|
// Save initial counts
|
|
1138
|
-
|
|
1139
|
-
|
|
1161
|
+
if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
|
|
1162
|
+
await this.persistCounts();
|
|
1163
|
+
prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1164
|
+
}
|
|
1165
|
+
else {
|
|
1166
|
+
prodLog.warn(`⚠️ No entities found during bucket scan. Check that entities exist and prefixes are correct.`);
|
|
1167
|
+
}
|
|
1140
1168
|
}
|
|
1141
1169
|
catch (error) {
|
|
1142
1170
|
// CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
|
|
@@ -1167,5 +1195,95 @@ export class GcsStorage extends BaseStorage {
|
|
|
1167
1195
|
this.logger.error('Error persisting counts:', error);
|
|
1168
1196
|
}
|
|
1169
1197
|
}
|
|
1198
|
+
// HNSW Index Persistence (v3.35.0+)
|
|
1199
|
+
/**
|
|
1200
|
+
* Get a noun's vector for HNSW rebuild
|
|
1201
|
+
*/
|
|
1202
|
+
async getNounVector(id) {
|
|
1203
|
+
await this.ensureInitialized();
|
|
1204
|
+
const noun = await this.getNode(id);
|
|
1205
|
+
return noun ? noun.vector : null;
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* Save HNSW graph data for a noun
|
|
1209
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
1210
|
+
*/
|
|
1211
|
+
async saveHNSWData(nounId, hnswData) {
|
|
1212
|
+
await this.ensureInitialized();
|
|
1213
|
+
try {
|
|
1214
|
+
// Use sharded path for HNSW data
|
|
1215
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1216
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1217
|
+
const file = this.bucket.file(key);
|
|
1218
|
+
await file.save(JSON.stringify(hnswData, null, 2), {
|
|
1219
|
+
contentType: 'application/json',
|
|
1220
|
+
resumable: false
|
|
1221
|
+
});
|
|
1222
|
+
}
|
|
1223
|
+
catch (error) {
|
|
1224
|
+
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1225
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Get HNSW graph data for a noun
|
|
1230
|
+
* Storage path: entities/nouns/hnsw/{shard}/{id}.json
|
|
1231
|
+
*/
|
|
1232
|
+
async getHNSWData(nounId) {
|
|
1233
|
+
await this.ensureInitialized();
|
|
1234
|
+
try {
|
|
1235
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1236
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1237
|
+
const file = this.bucket.file(key);
|
|
1238
|
+
const [contents] = await file.download();
|
|
1239
|
+
return JSON.parse(contents.toString());
|
|
1240
|
+
}
|
|
1241
|
+
catch (error) {
|
|
1242
|
+
if (error.code === 404) {
|
|
1243
|
+
return null;
|
|
1244
|
+
}
|
|
1245
|
+
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
1246
|
+
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
/**
|
|
1250
|
+
* Save HNSW system data (entry point, max level)
|
|
1251
|
+
* Storage path: system/hnsw-system.json
|
|
1252
|
+
*/
|
|
1253
|
+
async saveHNSWSystem(systemData) {
|
|
1254
|
+
await this.ensureInitialized();
|
|
1255
|
+
try {
|
|
1256
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1257
|
+
const file = this.bucket.file(key);
|
|
1258
|
+
await file.save(JSON.stringify(systemData, null, 2), {
|
|
1259
|
+
contentType: 'application/json',
|
|
1260
|
+
resumable: false
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
catch (error) {
|
|
1264
|
+
this.logger.error('Failed to save HNSW system data:', error);
|
|
1265
|
+
throw new Error(`Failed to save HNSW system data: ${error}`);
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
/**
|
|
1269
|
+
* Get HNSW system data (entry point, max level)
|
|
1270
|
+
* Storage path: system/hnsw-system.json
|
|
1271
|
+
*/
|
|
1272
|
+
async getHNSWSystem() {
|
|
1273
|
+
await this.ensureInitialized();
|
|
1274
|
+
try {
|
|
1275
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1276
|
+
const file = this.bucket.file(key);
|
|
1277
|
+
const [contents] = await file.download();
|
|
1278
|
+
return JSON.parse(contents.toString());
|
|
1279
|
+
}
|
|
1280
|
+
catch (error) {
|
|
1281
|
+
if (error.code === 404) {
|
|
1282
|
+
return null;
|
|
1283
|
+
}
|
|
1284
|
+
this.logger.error('Failed to get HNSW system data:', error);
|
|
1285
|
+
throw new Error(`Failed to get HNSW system data: ${error}`);
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1170
1288
|
}
|
|
1171
1289
|
//# sourceMappingURL=gcsStorage.js.map
|
|
@@ -174,4 +174,36 @@ export declare class MemoryStorage extends BaseStorage {
|
|
|
174
174
|
* Persist counts to storage - no-op for memory storage
|
|
175
175
|
*/
|
|
176
176
|
protected persistCounts(): Promise<void>;
|
|
177
|
+
/**
|
|
178
|
+
* Get vector for a noun
|
|
179
|
+
*/
|
|
180
|
+
getNounVector(id: string): Promise<number[] | null>;
|
|
181
|
+
/**
|
|
182
|
+
* Save HNSW graph data for a noun
|
|
183
|
+
*/
|
|
184
|
+
saveHNSWData(nounId: string, hnswData: {
|
|
185
|
+
level: number;
|
|
186
|
+
connections: Record<string, string[]>;
|
|
187
|
+
}): Promise<void>;
|
|
188
|
+
/**
|
|
189
|
+
* Get HNSW graph data for a noun
|
|
190
|
+
*/
|
|
191
|
+
getHNSWData(nounId: string): Promise<{
|
|
192
|
+
level: number;
|
|
193
|
+
connections: Record<string, string[]>;
|
|
194
|
+
} | null>;
|
|
195
|
+
/**
|
|
196
|
+
* Save HNSW system data (entry point, max level)
|
|
197
|
+
*/
|
|
198
|
+
saveHNSWSystem(systemData: {
|
|
199
|
+
entryPointId: string | null;
|
|
200
|
+
maxLevel: number;
|
|
201
|
+
}): Promise<void>;
|
|
202
|
+
/**
|
|
203
|
+
* Get HNSW system data
|
|
204
|
+
*/
|
|
205
|
+
getHNSWSystem(): Promise<{
|
|
206
|
+
entryPointId: string | null;
|
|
207
|
+
maxLevel: number;
|
|
208
|
+
} | null>;
|
|
177
209
|
}
|