@soulcraft/brainy 3.25.2 → 3.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/storage/adapters/fileSystemStorage.js +7 -2
- package/dist/storage/adapters/gcsStorage.d.ts +334 -0
- package/dist/storage/adapters/gcsStorage.js +1181 -0
- package/dist/storage/adapters/opfsStorage.js +174 -85
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +43 -5
- package/dist/storage/adapters/s3CompatibleStorage.js +191 -86
- package/dist/storage/sharding.d.ts +103 -0
- package/dist/storage/sharding.js +137 -0
- package/dist/storage/storageFactory.d.ts +31 -4
- package/dist/storage/storageFactory.js +33 -4
- package/package.json +2 -1
|
@@ -0,0 +1,1181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Cloud Storage Adapter (Native)
|
|
3
|
+
* Uses the native @google-cloud/storage library for optimal performance and authentication
|
|
4
|
+
*
|
|
5
|
+
* Supports multiple authentication methods:
|
|
6
|
+
* 1. Application Default Credentials (ADC) - Automatic in Cloud Run/GCE
|
|
7
|
+
* 2. Service Account Key File
|
|
8
|
+
* 3. Service Account Credentials Object
|
|
9
|
+
* 4. HMAC Keys (fallback for backward compatibility)
|
|
10
|
+
*/
|
|
11
|
+
import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
|
|
12
|
+
import { BrainyError } from '../../errors/brainyError.js';
|
|
13
|
+
import { CacheManager } from '../cacheManager.js';
|
|
14
|
+
import { createModuleLogger, prodLog } from '../../utils/logger.js';
|
|
15
|
+
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
16
|
+
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
17
|
+
import { getCoalescer } from '../../utils/requestCoalescer.js';
|
|
18
|
+
import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding.js';
|
|
19
|
+
/**
|
|
20
|
+
* Native Google Cloud Storage adapter for server environments
|
|
21
|
+
* Uses the @google-cloud/storage library with Application Default Credentials
|
|
22
|
+
*
|
|
23
|
+
* Authentication priority:
|
|
24
|
+
* 1. Application Default Credentials (if no credentials provided)
|
|
25
|
+
* 2. Service Account Key File (if keyFilename provided)
|
|
26
|
+
* 3. Service Account Credentials Object (if credentials provided)
|
|
27
|
+
* 4. HMAC Keys (if accessKeyId/secretAccessKey provided)
|
|
28
|
+
*/
|
|
29
|
+
export class GcsStorage extends BaseStorage {
|
|
30
|
+
/**
|
|
31
|
+
* Initialize the storage adapter
|
|
32
|
+
* @param options Configuration options for Google Cloud Storage
|
|
33
|
+
*/
|
|
34
|
+
constructor(options) {
|
|
35
|
+
super();
|
|
36
|
+
this.storage = null;
|
|
37
|
+
this.bucket = null;
|
|
38
|
+
// Statistics caching for better performance
|
|
39
|
+
this.statisticsCache = null;
|
|
40
|
+
// Backpressure and performance management
|
|
41
|
+
this.pendingOperations = 0;
|
|
42
|
+
this.maxConcurrentOperations = 100;
|
|
43
|
+
this.baseBatchSize = 10;
|
|
44
|
+
this.currentBatchSize = 10;
|
|
45
|
+
this.lastMemoryCheck = 0;
|
|
46
|
+
this.memoryCheckInterval = 5000; // Check every 5 seconds
|
|
47
|
+
this.consecutiveErrors = 0;
|
|
48
|
+
this.lastErrorReset = Date.now();
|
|
49
|
+
// Adaptive backpressure for automatic flow control
|
|
50
|
+
this.backpressure = getGlobalBackpressure();
|
|
51
|
+
// Write buffers for bulk operations
|
|
52
|
+
this.nounWriteBuffer = null;
|
|
53
|
+
this.verbWriteBuffer = null;
|
|
54
|
+
// Request coalescer for deduplication
|
|
55
|
+
this.requestCoalescer = null;
|
|
56
|
+
// High-volume mode detection - MUCH more aggressive
|
|
57
|
+
this.highVolumeMode = false;
|
|
58
|
+
this.lastVolumeCheck = 0;
|
|
59
|
+
this.volumeCheckInterval = 1000; // Check every second, not 5
|
|
60
|
+
this.forceHighVolumeMode = false; // Environment variable override
|
|
61
|
+
// Module logger
|
|
62
|
+
this.logger = createModuleLogger('GcsStorage');
|
|
63
|
+
this.bucketName = options.bucketName;
|
|
64
|
+
this.keyFilename = options.keyFilename;
|
|
65
|
+
this.credentials = options.credentials;
|
|
66
|
+
this.accessKeyId = options.accessKeyId;
|
|
67
|
+
this.secretAccessKey = options.secretAccessKey;
|
|
68
|
+
this.readOnly = options.readOnly || false;
|
|
69
|
+
// Set up prefixes for different types of data using entity-based structure
|
|
70
|
+
this.nounPrefix = `${getDirectoryPath('noun', 'vector')}/`;
|
|
71
|
+
this.verbPrefix = `${getDirectoryPath('verb', 'vector')}/`;
|
|
72
|
+
this.metadataPrefix = `${getDirectoryPath('noun', 'metadata')}/`; // Noun metadata
|
|
73
|
+
this.verbMetadataPrefix = `${getDirectoryPath('verb', 'metadata')}/`; // Verb metadata
|
|
74
|
+
this.systemPrefix = `${SYSTEM_DIR}/`; // System data
|
|
75
|
+
// Initialize cache managers
|
|
76
|
+
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
77
|
+
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
78
|
+
// Check for high-volume mode override
|
|
79
|
+
if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
|
|
80
|
+
this.forceHighVolumeMode = true;
|
|
81
|
+
this.highVolumeMode = true;
|
|
82
|
+
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Initialize the storage adapter
|
|
87
|
+
*/
|
|
88
|
+
async init() {
|
|
89
|
+
if (this.isInitialized) {
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
try {
|
|
93
|
+
// Import Google Cloud Storage SDK only when needed
|
|
94
|
+
const { Storage } = await import('@google-cloud/storage');
|
|
95
|
+
// Configure the GCS client based on available credentials
|
|
96
|
+
const clientConfig = {};
|
|
97
|
+
// Priority 1: Service Account Key File
|
|
98
|
+
if (this.keyFilename) {
|
|
99
|
+
clientConfig.keyFilename = this.keyFilename;
|
|
100
|
+
prodLog.info('🔐 GCS: Using Service Account Key File');
|
|
101
|
+
}
|
|
102
|
+
// Priority 2: Service Account Credentials Object
|
|
103
|
+
else if (this.credentials) {
|
|
104
|
+
clientConfig.credentials = this.credentials;
|
|
105
|
+
prodLog.info('🔐 GCS: Using Service Account Credentials');
|
|
106
|
+
}
|
|
107
|
+
// Priority 3: HMAC Keys (S3 compatibility)
|
|
108
|
+
else if (this.accessKeyId && this.secretAccessKey) {
|
|
109
|
+
clientConfig.credentials = {
|
|
110
|
+
client_email: 'hmac-user@example.com',
|
|
111
|
+
private_key: this.secretAccessKey
|
|
112
|
+
};
|
|
113
|
+
prodLog.warn('⚠️ GCS: Using HMAC keys (consider migrating to ADC)');
|
|
114
|
+
}
|
|
115
|
+
// Priority 4: Application Default Credentials (default)
|
|
116
|
+
else {
|
|
117
|
+
// No credentials needed - ADC will be used automatically
|
|
118
|
+
prodLog.info('🔐 GCS: Using Application Default Credentials (ADC)');
|
|
119
|
+
}
|
|
120
|
+
// Create the GCS client
|
|
121
|
+
this.storage = new Storage(clientConfig);
|
|
122
|
+
// Get reference to the bucket
|
|
123
|
+
this.bucket = this.storage.bucket(this.bucketName);
|
|
124
|
+
// Verify bucket exists and is accessible
|
|
125
|
+
const [exists] = await this.bucket.exists();
|
|
126
|
+
if (!exists) {
|
|
127
|
+
throw new Error(`Bucket ${this.bucketName} does not exist or is not accessible`);
|
|
128
|
+
}
|
|
129
|
+
prodLog.info(`✅ Connected to GCS bucket: ${this.bucketName}`);
|
|
130
|
+
// Initialize write buffers for high-volume mode
|
|
131
|
+
const storageId = `gcs-${this.bucketName}`;
|
|
132
|
+
this.nounWriteBuffer = getWriteBuffer(`${storageId}-nouns`, 'noun', async (items) => {
|
|
133
|
+
await this.flushNounBuffer(items);
|
|
134
|
+
});
|
|
135
|
+
this.verbWriteBuffer = getWriteBuffer(`${storageId}-verbs`, 'verb', async (items) => {
|
|
136
|
+
await this.flushVerbBuffer(items);
|
|
137
|
+
});
|
|
138
|
+
// Initialize request coalescer for deduplication
|
|
139
|
+
this.requestCoalescer = getCoalescer(storageId, async (batch) => {
|
|
140
|
+
// Process coalesced operations (placeholder for future optimization)
|
|
141
|
+
this.logger.trace(`Processing coalesced batch: ${batch.length} items`);
|
|
142
|
+
});
|
|
143
|
+
// Initialize counts from storage
|
|
144
|
+
await this.initializeCounts();
|
|
145
|
+
this.isInitialized = true;
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
this.logger.error('Failed to initialize GCS storage:', error);
|
|
149
|
+
throw new Error(`Failed to initialize GCS storage: ${error}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Get the GCS object key for a noun using UUID-based sharding
|
|
154
|
+
*
|
|
155
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
156
|
+
* Path format: entities/nouns/vectors/{shardId}/{uuid}.json
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* getNounKey('ab123456-1234-5678-9abc-def012345678')
|
|
160
|
+
* // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
|
|
161
|
+
*/
|
|
162
|
+
getNounKey(id) {
|
|
163
|
+
const shardId = getShardIdFromUuid(id);
|
|
164
|
+
return `${this.nounPrefix}${shardId}/${id}.json`;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Get the GCS object key for a verb using UUID-based sharding
|
|
168
|
+
*
|
|
169
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
170
|
+
* Path format: entities/verbs/vectors/{shardId}/{uuid}.json
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* getVerbKey('cd987654-4321-8765-cba9-fed543210987')
|
|
174
|
+
* // returns 'entities/verbs/vectors/cd/cd987654-4321-8765-cba9-fed543210987.json'
|
|
175
|
+
*/
|
|
176
|
+
getVerbKey(id) {
|
|
177
|
+
const shardId = getShardIdFromUuid(id);
|
|
178
|
+
return `${this.verbPrefix}${shardId}/${id}.json`;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Override base class method to detect GCS-specific throttling errors
|
|
182
|
+
*/
|
|
183
|
+
isThrottlingError(error) {
|
|
184
|
+
// First check base class detection
|
|
185
|
+
if (super.isThrottlingError(error)) {
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
// GCS-specific throttling detection
|
|
189
|
+
const statusCode = error.code;
|
|
190
|
+
const message = error.message?.toLowerCase() || '';
|
|
191
|
+
return (statusCode === 429 || // Too Many Requests
|
|
192
|
+
statusCode === 503 || // Service Unavailable
|
|
193
|
+
statusCode === 'RATE_LIMIT_EXCEEDED' ||
|
|
194
|
+
message.includes('quota') ||
|
|
195
|
+
message.includes('rate limit') ||
|
|
196
|
+
message.includes('too many requests'));
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Apply backpressure before starting an operation
|
|
200
|
+
* @returns Request ID for tracking
|
|
201
|
+
*/
|
|
202
|
+
async applyBackpressure() {
|
|
203
|
+
const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
204
|
+
await this.backpressure.requestPermission(requestId, 1);
|
|
205
|
+
this.pendingOperations++;
|
|
206
|
+
return requestId;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Release backpressure after completing an operation
|
|
210
|
+
* @param success Whether the operation succeeded
|
|
211
|
+
* @param requestId Request ID from applyBackpressure()
|
|
212
|
+
*/
|
|
213
|
+
releaseBackpressure(success = true, requestId) {
|
|
214
|
+
this.pendingOperations = Math.max(0, this.pendingOperations - 1);
|
|
215
|
+
if (requestId) {
|
|
216
|
+
this.backpressure.releasePermission(requestId, success);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Check if high-volume mode should be enabled
|
|
221
|
+
*/
|
|
222
|
+
checkVolumeMode() {
|
|
223
|
+
if (this.forceHighVolumeMode) {
|
|
224
|
+
return; // Already forced on
|
|
225
|
+
}
|
|
226
|
+
const now = Date.now();
|
|
227
|
+
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
this.lastVolumeCheck = now;
|
|
231
|
+
// Enable high-volume mode if we have many pending operations
|
|
232
|
+
const shouldEnable = this.pendingOperations > 20;
|
|
233
|
+
if (shouldEnable && !this.highVolumeMode) {
|
|
234
|
+
this.highVolumeMode = true;
|
|
235
|
+
prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
|
|
236
|
+
}
|
|
237
|
+
else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
238
|
+
this.highVolumeMode = false;
|
|
239
|
+
prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Flush noun buffer to GCS
|
|
244
|
+
*/
|
|
245
|
+
async flushNounBuffer(items) {
|
|
246
|
+
const writes = Array.from(items.values()).map(async (noun) => {
|
|
247
|
+
try {
|
|
248
|
+
await this.saveNodeDirect(noun);
|
|
249
|
+
}
|
|
250
|
+
catch (error) {
|
|
251
|
+
this.logger.error(`Failed to flush noun ${noun.id}:`, error);
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
await Promise.all(writes);
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Flush verb buffer to GCS
|
|
258
|
+
*/
|
|
259
|
+
async flushVerbBuffer(items) {
|
|
260
|
+
const writes = Array.from(items.values()).map(async (verb) => {
|
|
261
|
+
try {
|
|
262
|
+
await this.saveEdgeDirect(verb);
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
this.logger.error(`Failed to flush verb ${verb.id}:`, error);
|
|
266
|
+
}
|
|
267
|
+
});
|
|
268
|
+
await Promise.all(writes);
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Save a noun to storage (internal implementation)
|
|
272
|
+
*/
|
|
273
|
+
async saveNoun_internal(noun) {
|
|
274
|
+
return this.saveNode(noun);
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Save a node to storage
|
|
278
|
+
*/
|
|
279
|
+
async saveNode(node) {
|
|
280
|
+
await this.ensureInitialized();
|
|
281
|
+
// ALWAYS check if we should use high-volume mode (critical for detection)
|
|
282
|
+
this.checkVolumeMode();
|
|
283
|
+
// Use write buffer in high-volume mode
|
|
284
|
+
if (this.highVolumeMode && this.nounWriteBuffer) {
|
|
285
|
+
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
|
|
286
|
+
await this.nounWriteBuffer.add(node.id, node);
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
else if (!this.highVolumeMode) {
|
|
290
|
+
this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
|
|
291
|
+
}
|
|
292
|
+
// Direct write in normal mode
|
|
293
|
+
await this.saveNodeDirect(node);
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Save a node directly to GCS (bypass buffer)
|
|
297
|
+
*/
|
|
298
|
+
async saveNodeDirect(node) {
|
|
299
|
+
// Apply backpressure before starting operation
|
|
300
|
+
const requestId = await this.applyBackpressure();
|
|
301
|
+
try {
|
|
302
|
+
this.logger.trace(`Saving node ${node.id}`);
|
|
303
|
+
// Convert connections Map to a serializable format
|
|
304
|
+
const serializableNode = {
|
|
305
|
+
...node,
|
|
306
|
+
connections: Object.fromEntries(Array.from(node.connections.entries()).map(([level, nounIds]) => [
|
|
307
|
+
level,
|
|
308
|
+
Array.from(nounIds)
|
|
309
|
+
]))
|
|
310
|
+
};
|
|
311
|
+
// Get the GCS key with UUID-based sharding
|
|
312
|
+
const key = this.getNounKey(node.id);
|
|
313
|
+
// Save to GCS
|
|
314
|
+
const file = this.bucket.file(key);
|
|
315
|
+
await file.save(JSON.stringify(serializableNode, null, 2), {
|
|
316
|
+
contentType: 'application/json',
|
|
317
|
+
resumable: false // For small objects, non-resumable is faster
|
|
318
|
+
});
|
|
319
|
+
// Update cache
|
|
320
|
+
this.nounCacheManager.set(node.id, node);
|
|
321
|
+
// Increment noun count
|
|
322
|
+
const metadata = await this.getNounMetadata(node.id);
|
|
323
|
+
if (metadata && metadata.type) {
|
|
324
|
+
await this.incrementEntityCountSafe(metadata.type);
|
|
325
|
+
}
|
|
326
|
+
this.logger.trace(`Node ${node.id} saved successfully`);
|
|
327
|
+
this.releaseBackpressure(true, requestId);
|
|
328
|
+
}
|
|
329
|
+
catch (error) {
|
|
330
|
+
this.releaseBackpressure(false, requestId);
|
|
331
|
+
// Handle throttling
|
|
332
|
+
if (this.isThrottlingError(error)) {
|
|
333
|
+
await this.handleThrottling(error);
|
|
334
|
+
throw error; // Re-throw for retry at higher level
|
|
335
|
+
}
|
|
336
|
+
this.logger.error(`Failed to save node ${node.id}:`, error);
|
|
337
|
+
throw new Error(`Failed to save node ${node.id}: ${error}`);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Get a noun from storage (internal implementation)
|
|
342
|
+
*/
|
|
343
|
+
async getNoun_internal(id) {
|
|
344
|
+
return this.getNode(id);
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Get a node from storage
|
|
348
|
+
*/
|
|
349
|
+
async getNode(id) {
|
|
350
|
+
await this.ensureInitialized();
|
|
351
|
+
// Check cache first
|
|
352
|
+
const cached = this.nounCacheManager.get(id);
|
|
353
|
+
if (cached) {
|
|
354
|
+
this.logger.trace(`Cache hit for noun ${id}`);
|
|
355
|
+
return cached;
|
|
356
|
+
}
|
|
357
|
+
// Apply backpressure
|
|
358
|
+
const requestId = await this.applyBackpressure();
|
|
359
|
+
try {
|
|
360
|
+
this.logger.trace(`Getting node ${id}`);
|
|
361
|
+
// Get the GCS key with UUID-based sharding
|
|
362
|
+
const key = this.getNounKey(id);
|
|
363
|
+
// Download from GCS
|
|
364
|
+
const file = this.bucket.file(key);
|
|
365
|
+
const [contents] = await file.download();
|
|
366
|
+
// Parse JSON
|
|
367
|
+
const data = JSON.parse(contents.toString());
|
|
368
|
+
// Convert serialized connections back to Map<number, Set<string>>
|
|
369
|
+
const connections = new Map();
|
|
370
|
+
for (const [level, nounIds] of Object.entries(data.connections || {})) {
|
|
371
|
+
connections.set(Number(level), new Set(nounIds));
|
|
372
|
+
}
|
|
373
|
+
const node = {
|
|
374
|
+
id: data.id,
|
|
375
|
+
vector: data.vector,
|
|
376
|
+
connections,
|
|
377
|
+
level: data.level || 0
|
|
378
|
+
};
|
|
379
|
+
// Update cache
|
|
380
|
+
this.nounCacheManager.set(id, node);
|
|
381
|
+
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
382
|
+
this.releaseBackpressure(true, requestId);
|
|
383
|
+
return node;
|
|
384
|
+
}
|
|
385
|
+
catch (error) {
|
|
386
|
+
this.releaseBackpressure(false, requestId);
|
|
387
|
+
// Check if this is a "not found" error
|
|
388
|
+
if (error.code === 404) {
|
|
389
|
+
this.logger.trace(`Node not found: ${id}`);
|
|
390
|
+
return null;
|
|
391
|
+
}
|
|
392
|
+
// Handle throttling
|
|
393
|
+
if (this.isThrottlingError(error)) {
|
|
394
|
+
await this.handleThrottling(error);
|
|
395
|
+
throw error;
|
|
396
|
+
}
|
|
397
|
+
this.logger.error(`Failed to get node ${id}:`, error);
|
|
398
|
+
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Delete a noun from storage (internal implementation)
|
|
403
|
+
*/
|
|
404
|
+
async deleteNoun_internal(id) {
|
|
405
|
+
await this.ensureInitialized();
|
|
406
|
+
const requestId = await this.applyBackpressure();
|
|
407
|
+
try {
|
|
408
|
+
this.logger.trace(`Deleting noun ${id}`);
|
|
409
|
+
// Get the GCS key
|
|
410
|
+
const key = this.getNounKey(id);
|
|
411
|
+
// Delete from GCS
|
|
412
|
+
const file = this.bucket.file(key);
|
|
413
|
+
await file.delete();
|
|
414
|
+
// Remove from cache
|
|
415
|
+
this.nounCacheManager.delete(id);
|
|
416
|
+
// Decrement noun count
|
|
417
|
+
const metadata = await this.getNounMetadata(id);
|
|
418
|
+
if (metadata && metadata.type) {
|
|
419
|
+
await this.decrementEntityCountSafe(metadata.type);
|
|
420
|
+
}
|
|
421
|
+
this.logger.trace(`Noun ${id} deleted successfully`);
|
|
422
|
+
this.releaseBackpressure(true, requestId);
|
|
423
|
+
}
|
|
424
|
+
catch (error) {
|
|
425
|
+
this.releaseBackpressure(false, requestId);
|
|
426
|
+
if (error.code === 404) {
|
|
427
|
+
// Already deleted
|
|
428
|
+
this.logger.trace(`Noun ${id} not found (already deleted)`);
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
// Handle throttling
|
|
432
|
+
if (this.isThrottlingError(error)) {
|
|
433
|
+
await this.handleThrottling(error);
|
|
434
|
+
throw error;
|
|
435
|
+
}
|
|
436
|
+
this.logger.error(`Failed to delete noun ${id}:`, error);
|
|
437
|
+
throw new Error(`Failed to delete noun ${id}: ${error}`);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Save noun metadata to storage (internal implementation)
|
|
442
|
+
*/
|
|
443
|
+
async saveNounMetadata_internal(id, metadata) {
|
|
444
|
+
await this.ensureInitialized();
|
|
445
|
+
try {
|
|
446
|
+
// Use UUID-based sharding for metadata (consistent with noun vectors)
|
|
447
|
+
const shardId = getShardIdFromUuid(id);
|
|
448
|
+
const key = `${this.metadataPrefix}${shardId}/${id}.json`;
|
|
449
|
+
this.logger.trace(`Saving noun metadata for ${id} to key: ${key}`);
|
|
450
|
+
// Save to GCS
|
|
451
|
+
const file = this.bucket.file(key);
|
|
452
|
+
await file.save(JSON.stringify(metadata, null, 2), {
|
|
453
|
+
contentType: 'application/json',
|
|
454
|
+
resumable: false
|
|
455
|
+
});
|
|
456
|
+
this.logger.debug(`Noun metadata for ${id} saved successfully`);
|
|
457
|
+
}
|
|
458
|
+
catch (error) {
|
|
459
|
+
this.logger.error(`Failed to save noun metadata for ${id}:`, error);
|
|
460
|
+
throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Save metadata to storage (public API - delegates to saveNounMetadata_internal)
|
|
465
|
+
*/
|
|
466
|
+
async saveMetadata(id, metadata) {
|
|
467
|
+
return this.saveNounMetadata_internal(id, metadata);
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* Get metadata from storage (public API - delegates to getNounMetadata)
|
|
471
|
+
*/
|
|
472
|
+
async getMetadata(id) {
|
|
473
|
+
return this.getNounMetadata(id);
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Get noun metadata from storage
|
|
477
|
+
*/
|
|
478
|
+
async getNounMetadata(id) {
|
|
479
|
+
await this.ensureInitialized();
|
|
480
|
+
try {
|
|
481
|
+
// Use UUID-based sharding for metadata
|
|
482
|
+
const shardId = getShardIdFromUuid(id);
|
|
483
|
+
const key = `${this.metadataPrefix}${shardId}/${id}.json`;
|
|
484
|
+
this.logger.trace(`Getting noun metadata for ${id} from key: ${key}`);
|
|
485
|
+
// Download from GCS
|
|
486
|
+
const file = this.bucket.file(key);
|
|
487
|
+
const [contents] = await file.download();
|
|
488
|
+
// Parse JSON
|
|
489
|
+
const metadata = JSON.parse(contents.toString());
|
|
490
|
+
this.logger.trace(`Successfully retrieved noun metadata for ${id}`);
|
|
491
|
+
return metadata;
|
|
492
|
+
}
|
|
493
|
+
catch (error) {
|
|
494
|
+
// Check if this is a "not found" error
|
|
495
|
+
if (error.code === 404) {
|
|
496
|
+
this.logger.trace(`Noun metadata not found for ${id}`);
|
|
497
|
+
return null;
|
|
498
|
+
}
|
|
499
|
+
// For other types of errors, convert to BrainyError
|
|
500
|
+
throw BrainyError.fromError(error, `getNounMetadata(${id})`);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
/**
|
|
504
|
+
* Save verb metadata to storage (internal implementation)
|
|
505
|
+
*/
|
|
506
|
+
async saveVerbMetadata_internal(id, metadata) {
|
|
507
|
+
await this.ensureInitialized();
|
|
508
|
+
try {
|
|
509
|
+
const key = `${this.verbMetadataPrefix}${id}.json`;
|
|
510
|
+
this.logger.trace(`Saving verb metadata for ${id} to key: ${key}`);
|
|
511
|
+
// Save to GCS
|
|
512
|
+
const file = this.bucket.file(key);
|
|
513
|
+
await file.save(JSON.stringify(metadata, null, 2), {
|
|
514
|
+
contentType: 'application/json',
|
|
515
|
+
resumable: false
|
|
516
|
+
});
|
|
517
|
+
this.logger.debug(`Verb metadata for ${id} saved successfully`);
|
|
518
|
+
}
|
|
519
|
+
catch (error) {
|
|
520
|
+
this.logger.error(`Failed to save verb metadata for ${id}:`, error);
|
|
521
|
+
throw new Error(`Failed to save verb metadata for ${id}: ${error}`);
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Get verb metadata from storage
|
|
526
|
+
*/
|
|
527
|
+
async getVerbMetadata(id) {
|
|
528
|
+
await this.ensureInitialized();
|
|
529
|
+
try {
|
|
530
|
+
const key = `${this.verbMetadataPrefix}${id}.json`;
|
|
531
|
+
this.logger.trace(`Getting verb metadata for ${id} from key: ${key}`);
|
|
532
|
+
// Download from GCS
|
|
533
|
+
const file = this.bucket.file(key);
|
|
534
|
+
const [contents] = await file.download();
|
|
535
|
+
// Parse JSON
|
|
536
|
+
const metadata = JSON.parse(contents.toString());
|
|
537
|
+
this.logger.trace(`Successfully retrieved verb metadata for ${id}`);
|
|
538
|
+
return metadata;
|
|
539
|
+
}
|
|
540
|
+
catch (error) {
|
|
541
|
+
// Check if this is a "not found" error
|
|
542
|
+
if (error.code === 404) {
|
|
543
|
+
this.logger.trace(`Verb metadata not found for ${id}`);
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
// For other types of errors, convert to BrainyError
|
|
547
|
+
throw BrainyError.fromError(error, `getVerbMetadata(${id})`);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
/**
|
|
551
|
+
* Save a verb to storage (internal implementation)
|
|
552
|
+
*/
|
|
553
|
+
async saveVerb_internal(verb) {
|
|
554
|
+
return this.saveEdge(verb);
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Save an edge to storage
|
|
558
|
+
*/
|
|
559
|
+
async saveEdge(edge) {
|
|
560
|
+
await this.ensureInitialized();
|
|
561
|
+
// Check volume mode
|
|
562
|
+
this.checkVolumeMode();
|
|
563
|
+
// Use write buffer in high-volume mode
|
|
564
|
+
if (this.highVolumeMode && this.verbWriteBuffer) {
|
|
565
|
+
this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
|
|
566
|
+
await this.verbWriteBuffer.add(edge.id, edge);
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
// Direct write in normal mode
|
|
570
|
+
await this.saveEdgeDirect(edge);
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Save an edge directly to GCS (bypass buffer)
|
|
574
|
+
*/
|
|
575
|
+
async saveEdgeDirect(edge) {
|
|
576
|
+
const requestId = await this.applyBackpressure();
|
|
577
|
+
try {
|
|
578
|
+
this.logger.trace(`Saving edge ${edge.id}`);
|
|
579
|
+
// Convert connections Map to serializable format
|
|
580
|
+
const serializableEdge = {
|
|
581
|
+
...edge,
|
|
582
|
+
connections: Object.fromEntries(Array.from(edge.connections.entries()).map(([level, verbIds]) => [
|
|
583
|
+
level,
|
|
584
|
+
Array.from(verbIds)
|
|
585
|
+
]))
|
|
586
|
+
};
|
|
587
|
+
// Get the GCS key with UUID-based sharding
|
|
588
|
+
const key = this.getVerbKey(edge.id);
|
|
589
|
+
// Save to GCS
|
|
590
|
+
const file = this.bucket.file(key);
|
|
591
|
+
await file.save(JSON.stringify(serializableEdge, null, 2), {
|
|
592
|
+
contentType: 'application/json',
|
|
593
|
+
resumable: false
|
|
594
|
+
});
|
|
595
|
+
// Update cache
|
|
596
|
+
this.verbCacheManager.set(edge.id, edge);
|
|
597
|
+
// Increment verb count
|
|
598
|
+
const metadata = await this.getVerbMetadata(edge.id);
|
|
599
|
+
if (metadata && metadata.type) {
|
|
600
|
+
await this.incrementVerbCount(metadata.type);
|
|
601
|
+
}
|
|
602
|
+
this.logger.trace(`Edge ${edge.id} saved successfully`);
|
|
603
|
+
this.releaseBackpressure(true, requestId);
|
|
604
|
+
}
|
|
605
|
+
catch (error) {
|
|
606
|
+
this.releaseBackpressure(false, requestId);
|
|
607
|
+
if (this.isThrottlingError(error)) {
|
|
608
|
+
await this.handleThrottling(error);
|
|
609
|
+
throw error;
|
|
610
|
+
}
|
|
611
|
+
this.logger.error(`Failed to save edge ${edge.id}:`, error);
|
|
612
|
+
throw new Error(`Failed to save edge ${edge.id}: ${error}`);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Get a verb from storage (internal implementation)
|
|
617
|
+
*/
|
|
618
|
+
async getVerb_internal(id) {
|
|
619
|
+
return this.getEdge(id);
|
|
620
|
+
}
|
|
621
|
+
/**
|
|
622
|
+
* Get an edge from storage
|
|
623
|
+
*/
|
|
624
|
+
async getEdge(id) {
|
|
625
|
+
await this.ensureInitialized();
|
|
626
|
+
// Check cache first
|
|
627
|
+
const cached = this.verbCacheManager.get(id);
|
|
628
|
+
if (cached) {
|
|
629
|
+
this.logger.trace(`Cache hit for verb ${id}`);
|
|
630
|
+
return cached;
|
|
631
|
+
}
|
|
632
|
+
const requestId = await this.applyBackpressure();
|
|
633
|
+
try {
|
|
634
|
+
this.logger.trace(`Getting edge ${id}`);
|
|
635
|
+
// Get the GCS key with UUID-based sharding
|
|
636
|
+
const key = this.getVerbKey(id);
|
|
637
|
+
// Download from GCS
|
|
638
|
+
const file = this.bucket.file(key);
|
|
639
|
+
const [contents] = await file.download();
|
|
640
|
+
// Parse JSON
|
|
641
|
+
const data = JSON.parse(contents.toString());
|
|
642
|
+
// Convert serialized connections back to Map
|
|
643
|
+
const connections = new Map();
|
|
644
|
+
for (const [level, verbIds] of Object.entries(data.connections || {})) {
|
|
645
|
+
connections.set(Number(level), new Set(verbIds));
|
|
646
|
+
}
|
|
647
|
+
const edge = {
|
|
648
|
+
id: data.id,
|
|
649
|
+
vector: data.vector,
|
|
650
|
+
connections
|
|
651
|
+
};
|
|
652
|
+
// Update cache
|
|
653
|
+
this.verbCacheManager.set(id, edge);
|
|
654
|
+
this.logger.trace(`Successfully retrieved edge ${id}`);
|
|
655
|
+
this.releaseBackpressure(true, requestId);
|
|
656
|
+
return edge;
|
|
657
|
+
}
|
|
658
|
+
catch (error) {
|
|
659
|
+
this.releaseBackpressure(false, requestId);
|
|
660
|
+
// Check if this is a "not found" error
|
|
661
|
+
if (error.code === 404) {
|
|
662
|
+
this.logger.trace(`Edge not found: ${id}`);
|
|
663
|
+
return null;
|
|
664
|
+
}
|
|
665
|
+
if (this.isThrottlingError(error)) {
|
|
666
|
+
await this.handleThrottling(error);
|
|
667
|
+
throw error;
|
|
668
|
+
}
|
|
669
|
+
this.logger.error(`Failed to get edge ${id}:`, error);
|
|
670
|
+
throw BrainyError.fromError(error, `getVerb(${id})`);
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
/**
|
|
674
|
+
* Delete a verb from storage (internal implementation)
|
|
675
|
+
*/
|
|
676
|
+
async deleteVerb_internal(id) {
|
|
677
|
+
await this.ensureInitialized();
|
|
678
|
+
const requestId = await this.applyBackpressure();
|
|
679
|
+
try {
|
|
680
|
+
this.logger.trace(`Deleting verb ${id}`);
|
|
681
|
+
// Get the GCS key
|
|
682
|
+
const key = this.getVerbKey(id);
|
|
683
|
+
// Delete from GCS
|
|
684
|
+
const file = this.bucket.file(key);
|
|
685
|
+
await file.delete();
|
|
686
|
+
// Remove from cache
|
|
687
|
+
this.verbCacheManager.delete(id);
|
|
688
|
+
// Decrement verb count
|
|
689
|
+
const metadata = await this.getVerbMetadata(id);
|
|
690
|
+
if (metadata && metadata.type) {
|
|
691
|
+
await this.decrementVerbCount(metadata.type);
|
|
692
|
+
}
|
|
693
|
+
this.logger.trace(`Verb ${id} deleted successfully`);
|
|
694
|
+
this.releaseBackpressure(true, requestId);
|
|
695
|
+
}
|
|
696
|
+
catch (error) {
|
|
697
|
+
this.releaseBackpressure(false, requestId);
|
|
698
|
+
if (error.code === 404) {
|
|
699
|
+
// Already deleted
|
|
700
|
+
this.logger.trace(`Verb ${id} not found (already deleted)`);
|
|
701
|
+
return;
|
|
702
|
+
}
|
|
703
|
+
if (this.isThrottlingError(error)) {
|
|
704
|
+
await this.handleThrottling(error);
|
|
705
|
+
throw error;
|
|
706
|
+
}
|
|
707
|
+
this.logger.error(`Failed to delete verb ${id}:`, error);
|
|
708
|
+
throw new Error(`Failed to delete verb ${id}: ${error}`);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
/**
|
|
712
|
+
* Get nouns with pagination
|
|
713
|
+
* Iterates through all UUID-based shards (00-ff) for consistent pagination
|
|
714
|
+
*/
|
|
715
|
+
async getNounsWithPagination(options = {}) {
|
|
716
|
+
await this.ensureInitialized();
|
|
717
|
+
const limit = options.limit || 100;
|
|
718
|
+
const cursor = options.cursor;
|
|
719
|
+
// Get paginated nodes
|
|
720
|
+
const result = await this.getNodesWithPagination({
|
|
721
|
+
limit,
|
|
722
|
+
cursor,
|
|
723
|
+
useCache: true
|
|
724
|
+
});
|
|
725
|
+
// Apply filters if provided
|
|
726
|
+
let filteredNodes = result.nodes;
|
|
727
|
+
if (options.filter) {
|
|
728
|
+
// Filter by noun type
|
|
729
|
+
if (options.filter.nounType) {
|
|
730
|
+
const nounTypes = Array.isArray(options.filter.nounType)
|
|
731
|
+
? options.filter.nounType
|
|
732
|
+
: [options.filter.nounType];
|
|
733
|
+
const filteredByType = [];
|
|
734
|
+
for (const node of filteredNodes) {
|
|
735
|
+
const metadata = await this.getNounMetadata(node.id);
|
|
736
|
+
if (metadata && nounTypes.includes(metadata.type || metadata.noun)) {
|
|
737
|
+
filteredByType.push(node);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
filteredNodes = filteredByType;
|
|
741
|
+
}
|
|
742
|
+
// Additional filter logic can be added here
|
|
743
|
+
}
|
|
744
|
+
return {
|
|
745
|
+
items: filteredNodes,
|
|
746
|
+
totalCount: result.totalCount,
|
|
747
|
+
hasMore: result.hasMore,
|
|
748
|
+
nextCursor: result.nextCursor
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
/**
|
|
752
|
+
* Get nodes with pagination (internal implementation)
|
|
753
|
+
* Iterates through UUID-based shards for consistent pagination
|
|
754
|
+
*/
|
|
755
|
+
async getNodesWithPagination(options) {
|
|
756
|
+
const limit = options.limit || 100;
|
|
757
|
+
const useCache = options.useCache !== false;
|
|
758
|
+
try {
|
|
759
|
+
const nodes = [];
|
|
760
|
+
// Parse cursor (format: "shardIndex:gcsPageToken")
|
|
761
|
+
let startShardIndex = 0;
|
|
762
|
+
let gcsPageToken;
|
|
763
|
+
if (options.cursor) {
|
|
764
|
+
const parts = options.cursor.split(':', 2);
|
|
765
|
+
startShardIndex = parseInt(parts[0]) || 0;
|
|
766
|
+
gcsPageToken = parts[1] || undefined;
|
|
767
|
+
}
|
|
768
|
+
// Iterate through shards starting from cursor position
|
|
769
|
+
for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
770
|
+
const shardId = getShardIdByIndex(shardIndex);
|
|
771
|
+
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
772
|
+
// List objects in this shard
|
|
773
|
+
const [files, , response] = await this.bucket.getFiles({
|
|
774
|
+
prefix: shardPrefix,
|
|
775
|
+
maxResults: limit - nodes.length,
|
|
776
|
+
pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
|
|
777
|
+
});
|
|
778
|
+
// Extract node IDs from file names
|
|
779
|
+
if (files && files.length > 0) {
|
|
780
|
+
const nodeIds = files
|
|
781
|
+
.filter((file) => file && file.name)
|
|
782
|
+
.map((file) => {
|
|
783
|
+
// Extract UUID from: entities/nouns/vectors/ab/ab123456-uuid.json
|
|
784
|
+
let name = file.name;
|
|
785
|
+
if (name.startsWith(shardPrefix)) {
|
|
786
|
+
name = name.substring(shardPrefix.length);
|
|
787
|
+
}
|
|
788
|
+
if (name.endsWith('.json')) {
|
|
789
|
+
name = name.substring(0, name.length - 5);
|
|
790
|
+
}
|
|
791
|
+
return name;
|
|
792
|
+
})
|
|
793
|
+
.filter((id) => id && id.length > 0);
|
|
794
|
+
// Load nodes
|
|
795
|
+
for (const id of nodeIds) {
|
|
796
|
+
const node = await this.getNode(id);
|
|
797
|
+
if (node) {
|
|
798
|
+
nodes.push(node);
|
|
799
|
+
}
|
|
800
|
+
if (nodes.length >= limit) {
|
|
801
|
+
break;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
// Check if we have enough nodes or if there are more files in current shard
|
|
806
|
+
if (nodes.length >= limit) {
|
|
807
|
+
const nextCursor = response?.nextPageToken
|
|
808
|
+
? `${shardIndex}:${response.nextPageToken}`
|
|
809
|
+
: shardIndex + 1 < TOTAL_SHARDS
|
|
810
|
+
? `${shardIndex + 1}:`
|
|
811
|
+
: undefined;
|
|
812
|
+
return {
|
|
813
|
+
nodes,
|
|
814
|
+
hasMore: !!nextCursor,
|
|
815
|
+
nextCursor
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
// If this shard has more pages, create cursor for next page
|
|
819
|
+
if (response?.nextPageToken) {
|
|
820
|
+
return {
|
|
821
|
+
nodes,
|
|
822
|
+
hasMore: true,
|
|
823
|
+
nextCursor: `${shardIndex}:${response.nextPageToken}`
|
|
824
|
+
};
|
|
825
|
+
}
|
|
826
|
+
// Continue to next shard
|
|
827
|
+
}
|
|
828
|
+
// No more shards or nodes
|
|
829
|
+
return {
|
|
830
|
+
nodes,
|
|
831
|
+
hasMore: false,
|
|
832
|
+
nextCursor: undefined
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
catch (error) {
|
|
836
|
+
this.logger.error('Error in getNodesWithPagination:', error);
|
|
837
|
+
throw new Error(`Failed to get nodes with pagination: ${error}`);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
/**
|
|
841
|
+
* Get nouns by noun type (internal implementation)
|
|
842
|
+
*/
|
|
843
|
+
async getNounsByNounType_internal(nounType) {
|
|
844
|
+
const result = await this.getNounsWithPagination({
|
|
845
|
+
limit: 10000, // Large limit for backward compatibility
|
|
846
|
+
filter: { nounType }
|
|
847
|
+
});
|
|
848
|
+
return result.items;
|
|
849
|
+
}
|
|
850
|
+
/**
|
|
851
|
+
* Get verbs by source ID (internal implementation)
|
|
852
|
+
*/
|
|
853
|
+
async getVerbsBySource_internal(sourceId) {
|
|
854
|
+
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
855
|
+
const result = await this.getVerbsWithPagination({
|
|
856
|
+
limit: Number.MAX_SAFE_INTEGER,
|
|
857
|
+
filter: { sourceId: [sourceId] }
|
|
858
|
+
});
|
|
859
|
+
return result.items;
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* Get verbs by target ID (internal implementation)
|
|
863
|
+
*/
|
|
864
|
+
async getVerbsByTarget_internal(targetId) {
|
|
865
|
+
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
866
|
+
const result = await this.getVerbsWithPagination({
|
|
867
|
+
limit: Number.MAX_SAFE_INTEGER,
|
|
868
|
+
filter: { targetId: [targetId] }
|
|
869
|
+
});
|
|
870
|
+
return result.items;
|
|
871
|
+
}
|
|
872
|
+
/**
|
|
873
|
+
* Get verbs by type (internal implementation)
|
|
874
|
+
*/
|
|
875
|
+
async getVerbsByType_internal(type) {
|
|
876
|
+
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
877
|
+
const result = await this.getVerbsWithPagination({
|
|
878
|
+
limit: Number.MAX_SAFE_INTEGER,
|
|
879
|
+
filter: { verbType: type }
|
|
880
|
+
});
|
|
881
|
+
return result.items;
|
|
882
|
+
}
|
|
883
|
+
/**
|
|
884
|
+
* Get verbs with pagination
|
|
885
|
+
*/
|
|
886
|
+
async getVerbsWithPagination(options = {}) {
|
|
887
|
+
await this.ensureInitialized();
|
|
888
|
+
const limit = options.limit || 100;
|
|
889
|
+
try {
|
|
890
|
+
// List verbs (simplified - not sharded yet in original implementation)
|
|
891
|
+
const [files, , response] = await this.bucket.getFiles({
|
|
892
|
+
prefix: this.verbPrefix,
|
|
893
|
+
maxResults: limit,
|
|
894
|
+
pageToken: options.cursor
|
|
895
|
+
});
|
|
896
|
+
// If no files, return empty result
|
|
897
|
+
if (!files || files.length === 0) {
|
|
898
|
+
return {
|
|
899
|
+
items: [],
|
|
900
|
+
totalCount: 0,
|
|
901
|
+
hasMore: false,
|
|
902
|
+
nextCursor: undefined
|
|
903
|
+
};
|
|
904
|
+
}
|
|
905
|
+
// Extract verb IDs and load verbs as HNSW verbs
|
|
906
|
+
const hnswVerbs = [];
|
|
907
|
+
for (const file of files) {
|
|
908
|
+
if (!file.name)
|
|
909
|
+
continue;
|
|
910
|
+
// Extract UUID from path
|
|
911
|
+
let name = file.name;
|
|
912
|
+
if (name.startsWith(this.verbPrefix)) {
|
|
913
|
+
name = name.substring(this.verbPrefix.length);
|
|
914
|
+
}
|
|
915
|
+
if (name.endsWith('.json')) {
|
|
916
|
+
name = name.substring(0, name.length - 5);
|
|
917
|
+
}
|
|
918
|
+
const verb = await this.getEdge(name);
|
|
919
|
+
if (verb) {
|
|
920
|
+
hnswVerbs.push(verb);
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
// Convert HNSWVerbs to GraphVerbs by combining with metadata
|
|
924
|
+
const graphVerbs = [];
|
|
925
|
+
for (const hnswVerb of hnswVerbs) {
|
|
926
|
+
const graphVerb = await this.convertHNSWVerbToGraphVerb(hnswVerb);
|
|
927
|
+
if (graphVerb) {
|
|
928
|
+
graphVerbs.push(graphVerb);
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
// Apply filters
|
|
932
|
+
let filteredVerbs = graphVerbs;
|
|
933
|
+
if (options.filter) {
|
|
934
|
+
filteredVerbs = graphVerbs.filter((graphVerb) => {
|
|
935
|
+
// Filter by sourceId
|
|
936
|
+
if (options.filter.sourceId) {
|
|
937
|
+
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
938
|
+
? options.filter.sourceId
|
|
939
|
+
: [options.filter.sourceId];
|
|
940
|
+
if (!sourceIds.includes(graphVerb.sourceId)) {
|
|
941
|
+
return false;
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
// Filter by targetId
|
|
945
|
+
if (options.filter.targetId) {
|
|
946
|
+
const targetIds = Array.isArray(options.filter.targetId)
|
|
947
|
+
? options.filter.targetId
|
|
948
|
+
: [options.filter.targetId];
|
|
949
|
+
if (!targetIds.includes(graphVerb.targetId)) {
|
|
950
|
+
return false;
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
// Filter by verbType
|
|
954
|
+
if (options.filter.verbType) {
|
|
955
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
956
|
+
? options.filter.verbType
|
|
957
|
+
: [options.filter.verbType];
|
|
958
|
+
const verbType = graphVerb.verb || graphVerb.type || '';
|
|
959
|
+
if (!verbTypes.includes(verbType)) {
|
|
960
|
+
return false;
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
return true;
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
return {
|
|
967
|
+
items: filteredVerbs,
|
|
968
|
+
hasMore: !!response?.nextPageToken,
|
|
969
|
+
nextCursor: response?.nextPageToken
|
|
970
|
+
};
|
|
971
|
+
}
|
|
972
|
+
catch (error) {
|
|
973
|
+
this.logger.error('Error in getVerbsWithPagination:', error);
|
|
974
|
+
throw new Error(`Failed to get verbs with pagination: ${error}`);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Get nouns with filtering and pagination (public API)
|
|
979
|
+
*/
|
|
980
|
+
async getNouns(options) {
|
|
981
|
+
const limit = options?.pagination?.limit || 100;
|
|
982
|
+
const cursor = options?.pagination?.cursor;
|
|
983
|
+
return this.getNounsWithPagination({
|
|
984
|
+
limit,
|
|
985
|
+
cursor,
|
|
986
|
+
filter: options?.filter
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
/**
|
|
990
|
+
* Get verbs with filtering and pagination (public API)
|
|
991
|
+
*/
|
|
992
|
+
async getVerbs(options) {
|
|
993
|
+
const limit = options?.pagination?.limit || 100;
|
|
994
|
+
const cursor = options?.pagination?.cursor;
|
|
995
|
+
return this.getVerbsWithPagination({
|
|
996
|
+
limit,
|
|
997
|
+
cursor,
|
|
998
|
+
filter: options?.filter
|
|
999
|
+
});
|
|
1000
|
+
}
|
|
1001
|
+
/**
|
|
1002
|
+
* Clear all data from storage
|
|
1003
|
+
*/
|
|
1004
|
+
async clear() {
|
|
1005
|
+
await this.ensureInitialized();
|
|
1006
|
+
try {
|
|
1007
|
+
this.logger.info('🧹 Clearing all data from GCS bucket...');
|
|
1008
|
+
// Helper function to delete all objects with a given prefix
|
|
1009
|
+
const deleteObjectsWithPrefix = async (prefix) => {
|
|
1010
|
+
const [files] = await this.bucket.getFiles({ prefix });
|
|
1011
|
+
if (!files || files.length === 0) {
|
|
1012
|
+
return;
|
|
1013
|
+
}
|
|
1014
|
+
// Delete each file
|
|
1015
|
+
for (const file of files) {
|
|
1016
|
+
await file.delete();
|
|
1017
|
+
}
|
|
1018
|
+
};
|
|
1019
|
+
// Clear all data directories
|
|
1020
|
+
await deleteObjectsWithPrefix(this.nounPrefix);
|
|
1021
|
+
await deleteObjectsWithPrefix(this.verbPrefix);
|
|
1022
|
+
await deleteObjectsWithPrefix(this.metadataPrefix);
|
|
1023
|
+
await deleteObjectsWithPrefix(this.verbMetadataPrefix);
|
|
1024
|
+
await deleteObjectsWithPrefix(this.systemPrefix);
|
|
1025
|
+
// Clear caches
|
|
1026
|
+
this.nounCacheManager.clear();
|
|
1027
|
+
this.verbCacheManager.clear();
|
|
1028
|
+
// Reset counts
|
|
1029
|
+
this.totalNounCount = 0;
|
|
1030
|
+
this.totalVerbCount = 0;
|
|
1031
|
+
this.entityCounts.clear();
|
|
1032
|
+
this.verbCounts.clear();
|
|
1033
|
+
this.logger.info('✅ All data cleared from GCS');
|
|
1034
|
+
}
|
|
1035
|
+
catch (error) {
|
|
1036
|
+
this.logger.error('Failed to clear GCS storage:', error);
|
|
1037
|
+
throw new Error(`Failed to clear GCS storage: ${error}`);
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
/**
|
|
1041
|
+
* Get storage status
|
|
1042
|
+
*/
|
|
1043
|
+
async getStorageStatus() {
|
|
1044
|
+
await this.ensureInitialized();
|
|
1045
|
+
try {
|
|
1046
|
+
// Get bucket metadata
|
|
1047
|
+
const [metadata] = await this.bucket.getMetadata();
|
|
1048
|
+
return {
|
|
1049
|
+
type: 'gcs-native',
|
|
1050
|
+
used: 0, // GCS doesn't provide usage info easily
|
|
1051
|
+
quota: null, // No quota in GCS
|
|
1052
|
+
details: {
|
|
1053
|
+
bucket: this.bucketName,
|
|
1054
|
+
location: metadata.location,
|
|
1055
|
+
storageClass: metadata.storageClass,
|
|
1056
|
+
created: metadata.timeCreated
|
|
1057
|
+
}
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
catch (error) {
|
|
1061
|
+
this.logger.error('Failed to get storage status:', error);
|
|
1062
|
+
return {
|
|
1063
|
+
type: 'gcs-native',
|
|
1064
|
+
used: 0,
|
|
1065
|
+
quota: null
|
|
1066
|
+
};
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
/**
|
|
1070
|
+
* Save statistics data to storage
|
|
1071
|
+
*/
|
|
1072
|
+
async saveStatisticsData(statistics) {
|
|
1073
|
+
await this.ensureInitialized();
|
|
1074
|
+
try {
|
|
1075
|
+
const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
|
|
1076
|
+
this.logger.trace(`Saving statistics to ${key}`);
|
|
1077
|
+
const file = this.bucket.file(key);
|
|
1078
|
+
await file.save(JSON.stringify(statistics, null, 2), {
|
|
1079
|
+
contentType: 'application/json',
|
|
1080
|
+
resumable: false
|
|
1081
|
+
});
|
|
1082
|
+
this.logger.trace('Statistics saved successfully');
|
|
1083
|
+
}
|
|
1084
|
+
catch (error) {
|
|
1085
|
+
this.logger.error('Failed to save statistics:', error);
|
|
1086
|
+
throw new Error(`Failed to save statistics: ${error}`);
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
/**
|
|
1090
|
+
* Get statistics data from storage
|
|
1091
|
+
*/
|
|
1092
|
+
async getStatisticsData() {
|
|
1093
|
+
await this.ensureInitialized();
|
|
1094
|
+
try {
|
|
1095
|
+
const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
|
|
1096
|
+
this.logger.trace(`Getting statistics from ${key}`);
|
|
1097
|
+
const file = this.bucket.file(key);
|
|
1098
|
+
const [contents] = await file.download();
|
|
1099
|
+
const statistics = JSON.parse(contents.toString());
|
|
1100
|
+
this.logger.trace('Statistics retrieved successfully');
|
|
1101
|
+
return statistics;
|
|
1102
|
+
}
|
|
1103
|
+
catch (error) {
|
|
1104
|
+
if (error.code === 404) {
|
|
1105
|
+
this.logger.trace('Statistics not found (creating new)');
|
|
1106
|
+
return null;
|
|
1107
|
+
}
|
|
1108
|
+
this.logger.error('Failed to get statistics:', error);
|
|
1109
|
+
return null;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* Initialize counts from storage
|
|
1114
|
+
*/
|
|
1115
|
+
async initializeCounts() {
|
|
1116
|
+
try {
|
|
1117
|
+
const key = `${this.systemPrefix}counts.json`;
|
|
1118
|
+
const file = this.bucket.file(key);
|
|
1119
|
+
const [contents] = await file.download();
|
|
1120
|
+
const counts = JSON.parse(contents.toString());
|
|
1121
|
+
this.totalNounCount = counts.totalNounCount || 0;
|
|
1122
|
+
this.totalVerbCount = counts.totalVerbCount || 0;
|
|
1123
|
+
this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
|
|
1124
|
+
this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
|
|
1125
|
+
prodLog.info(`📊 Loaded counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1126
|
+
}
|
|
1127
|
+
catch (error) {
|
|
1128
|
+
if (error.code === 404) {
|
|
1129
|
+
// No counts file yet - initialize from scan
|
|
1130
|
+
prodLog.info('📊 No counts file found - initializing from storage scan...');
|
|
1131
|
+
await this.initializeCountsFromScan();
|
|
1132
|
+
}
|
|
1133
|
+
else {
|
|
1134
|
+
this.logger.error('Error loading counts:', error);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
/**
|
|
1139
|
+
* Initialize counts from storage scan (expensive - only for first-time init)
|
|
1140
|
+
*/
|
|
1141
|
+
async initializeCountsFromScan() {
|
|
1142
|
+
try {
|
|
1143
|
+
// Count nouns
|
|
1144
|
+
const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
|
|
1145
|
+
this.totalNounCount = nounFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
|
|
1146
|
+
// Count verbs
|
|
1147
|
+
const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
|
|
1148
|
+
this.totalVerbCount = verbFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
|
|
1149
|
+
// Save initial counts
|
|
1150
|
+
await this.persistCounts();
|
|
1151
|
+
prodLog.info(`✅ Initialized counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1152
|
+
}
|
|
1153
|
+
catch (error) {
|
|
1154
|
+
this.logger.error('Error initializing counts from scan:', error);
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
/**
|
|
1158
|
+
* Persist counts to storage
|
|
1159
|
+
*/
|
|
1160
|
+
async persistCounts() {
|
|
1161
|
+
try {
|
|
1162
|
+
const key = `${this.systemPrefix}counts.json`;
|
|
1163
|
+
const counts = {
|
|
1164
|
+
totalNounCount: this.totalNounCount,
|
|
1165
|
+
totalVerbCount: this.totalVerbCount,
|
|
1166
|
+
entityCounts: Object.fromEntries(this.entityCounts),
|
|
1167
|
+
verbCounts: Object.fromEntries(this.verbCounts),
|
|
1168
|
+
lastUpdated: new Date().toISOString()
|
|
1169
|
+
};
|
|
1170
|
+
const file = this.bucket.file(key);
|
|
1171
|
+
await file.save(JSON.stringify(counts, null, 2), {
|
|
1172
|
+
contentType: 'application/json',
|
|
1173
|
+
resumable: false
|
|
1174
|
+
});
|
|
1175
|
+
}
|
|
1176
|
+
catch (error) {
|
|
1177
|
+
this.logger.error('Error persisting counts:', error);
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
//# sourceMappingURL=gcsStorage.js.map
|