@soulcraft/brainy 3.26.0 → 3.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1181 @@
1
+ /**
2
+ * Google Cloud Storage Adapter (Native)
3
+ * Uses the native @google-cloud/storage library for optimal performance and authentication
4
+ *
5
+ * Supports multiple authentication methods:
6
+ * 1. Application Default Credentials (ADC) - Automatic in Cloud Run/GCE
7
+ * 2. Service Account Key File
8
+ * 3. Service Account Credentials Object
9
+ * 4. HMAC Keys (fallback for backward compatibility)
10
+ */
11
+ import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
12
+ import { BrainyError } from '../../errors/brainyError.js';
13
+ import { CacheManager } from '../cacheManager.js';
14
+ import { createModuleLogger, prodLog } from '../../utils/logger.js';
15
+ import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
16
+ import { getWriteBuffer } from '../../utils/writeBuffer.js';
17
+ import { getCoalescer } from '../../utils/requestCoalescer.js';
18
+ import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding.js';
19
+ /**
20
+ * Native Google Cloud Storage adapter for server environments
21
+ * Uses the @google-cloud/storage library with Application Default Credentials
22
+ *
23
+ * Authentication priority:
24
+ * 1. Application Default Credentials (if no credentials provided)
25
+ * 2. Service Account Key File (if keyFilename provided)
26
+ * 3. Service Account Credentials Object (if credentials provided)
27
+ * 4. HMAC Keys (if accessKeyId/secretAccessKey provided)
28
+ */
29
+ export class GcsStorage extends BaseStorage {
30
+ /**
31
+ * Initialize the storage adapter
32
+ * @param options Configuration options for Google Cloud Storage
33
+ */
34
+ constructor(options) {
35
+ super();
36
+ this.storage = null;
37
+ this.bucket = null;
38
+ // Statistics caching for better performance
39
+ this.statisticsCache = null;
40
+ // Backpressure and performance management
41
+ this.pendingOperations = 0;
42
+ this.maxConcurrentOperations = 100;
43
+ this.baseBatchSize = 10;
44
+ this.currentBatchSize = 10;
45
+ this.lastMemoryCheck = 0;
46
+ this.memoryCheckInterval = 5000; // Check every 5 seconds
47
+ this.consecutiveErrors = 0;
48
+ this.lastErrorReset = Date.now();
49
+ // Adaptive backpressure for automatic flow control
50
+ this.backpressure = getGlobalBackpressure();
51
+ // Write buffers for bulk operations
52
+ this.nounWriteBuffer = null;
53
+ this.verbWriteBuffer = null;
54
+ // Request coalescer for deduplication
55
+ this.requestCoalescer = null;
56
+ // High-volume mode detection - MUCH more aggressive
57
+ this.highVolumeMode = false;
58
+ this.lastVolumeCheck = 0;
59
+ this.volumeCheckInterval = 1000; // Check every second, not 5
60
+ this.forceHighVolumeMode = false; // Environment variable override
61
+ // Module logger
62
+ this.logger = createModuleLogger('GcsStorage');
63
+ this.bucketName = options.bucketName;
64
+ this.keyFilename = options.keyFilename;
65
+ this.credentials = options.credentials;
66
+ this.accessKeyId = options.accessKeyId;
67
+ this.secretAccessKey = options.secretAccessKey;
68
+ this.readOnly = options.readOnly || false;
69
+ // Set up prefixes for different types of data using entity-based structure
70
+ this.nounPrefix = `${getDirectoryPath('noun', 'vector')}/`;
71
+ this.verbPrefix = `${getDirectoryPath('verb', 'vector')}/`;
72
+ this.metadataPrefix = `${getDirectoryPath('noun', 'metadata')}/`; // Noun metadata
73
+ this.verbMetadataPrefix = `${getDirectoryPath('verb', 'metadata')}/`; // Verb metadata
74
+ this.systemPrefix = `${SYSTEM_DIR}/`; // System data
75
+ // Initialize cache managers
76
+ this.nounCacheManager = new CacheManager(options.cacheConfig);
77
+ this.verbCacheManager = new CacheManager(options.cacheConfig);
78
+ // Check for high-volume mode override
79
+ if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
80
+ this.forceHighVolumeMode = true;
81
+ this.highVolumeMode = true;
82
+ prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
83
+ }
84
+ }
85
+ /**
86
+ * Initialize the storage adapter
87
+ */
88
+ async init() {
89
+ if (this.isInitialized) {
90
+ return;
91
+ }
92
+ try {
93
+ // Import Google Cloud Storage SDK only when needed
94
+ const { Storage } = await import('@google-cloud/storage');
95
+ // Configure the GCS client based on available credentials
96
+ const clientConfig = {};
97
+ // Priority 1: Service Account Key File
98
+ if (this.keyFilename) {
99
+ clientConfig.keyFilename = this.keyFilename;
100
+ prodLog.info('🔐 GCS: Using Service Account Key File');
101
+ }
102
+ // Priority 2: Service Account Credentials Object
103
+ else if (this.credentials) {
104
+ clientConfig.credentials = this.credentials;
105
+ prodLog.info('🔐 GCS: Using Service Account Credentials');
106
+ }
107
+ // Priority 3: HMAC Keys (S3 compatibility)
108
+ else if (this.accessKeyId && this.secretAccessKey) {
109
+ clientConfig.credentials = {
110
+ client_email: 'hmac-user@example.com',
111
+ private_key: this.secretAccessKey
112
+ };
113
+ prodLog.warn('⚠️ GCS: Using HMAC keys (consider migrating to ADC)');
114
+ }
115
+ // Priority 4: Application Default Credentials (default)
116
+ else {
117
+ // No credentials needed - ADC will be used automatically
118
+ prodLog.info('🔐 GCS: Using Application Default Credentials (ADC)');
119
+ }
120
+ // Create the GCS client
121
+ this.storage = new Storage(clientConfig);
122
+ // Get reference to the bucket
123
+ this.bucket = this.storage.bucket(this.bucketName);
124
+ // Verify bucket exists and is accessible
125
+ const [exists] = await this.bucket.exists();
126
+ if (!exists) {
127
+ throw new Error(`Bucket ${this.bucketName} does not exist or is not accessible`);
128
+ }
129
+ prodLog.info(`✅ Connected to GCS bucket: ${this.bucketName}`);
130
+ // Initialize write buffers for high-volume mode
131
+ const storageId = `gcs-${this.bucketName}`;
132
+ this.nounWriteBuffer = getWriteBuffer(`${storageId}-nouns`, 'noun', async (items) => {
133
+ await this.flushNounBuffer(items);
134
+ });
135
+ this.verbWriteBuffer = getWriteBuffer(`${storageId}-verbs`, 'verb', async (items) => {
136
+ await this.flushVerbBuffer(items);
137
+ });
138
+ // Initialize request coalescer for deduplication
139
+ this.requestCoalescer = getCoalescer(storageId, async (batch) => {
140
+ // Process coalesced operations (placeholder for future optimization)
141
+ this.logger.trace(`Processing coalesced batch: ${batch.length} items`);
142
+ });
143
+ // Initialize counts from storage
144
+ await this.initializeCounts();
145
+ this.isInitialized = true;
146
+ }
147
+ catch (error) {
148
+ this.logger.error('Failed to initialize GCS storage:', error);
149
+ throw new Error(`Failed to initialize GCS storage: ${error}`);
150
+ }
151
+ }
152
+ /**
153
+ * Get the GCS object key for a noun using UUID-based sharding
154
+ *
155
+ * Uses first 2 hex characters of UUID for consistent sharding.
156
+ * Path format: entities/nouns/vectors/{shardId}/{uuid}.json
157
+ *
158
+ * @example
159
+ * getNounKey('ab123456-1234-5678-9abc-def012345678')
160
+ * // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
161
+ */
162
+ getNounKey(id) {
163
+ const shardId = getShardIdFromUuid(id);
164
+ return `${this.nounPrefix}${shardId}/${id}.json`;
165
+ }
166
+ /**
167
+ * Get the GCS object key for a verb using UUID-based sharding
168
+ *
169
+ * Uses first 2 hex characters of UUID for consistent sharding.
170
+ * Path format: entities/verbs/vectors/{shardId}/{uuid}.json
171
+ *
172
+ * @example
173
+ * getVerbKey('cd987654-4321-8765-cba9-fed543210987')
174
+ * // returns 'entities/verbs/vectors/cd/cd987654-4321-8765-cba9-fed543210987.json'
175
+ */
176
+ getVerbKey(id) {
177
+ const shardId = getShardIdFromUuid(id);
178
+ return `${this.verbPrefix}${shardId}/${id}.json`;
179
+ }
180
+ /**
181
+ * Override base class method to detect GCS-specific throttling errors
182
+ */
183
+ isThrottlingError(error) {
184
+ // First check base class detection
185
+ if (super.isThrottlingError(error)) {
186
+ return true;
187
+ }
188
+ // GCS-specific throttling detection
189
+ const statusCode = error.code;
190
+ const message = error.message?.toLowerCase() || '';
191
+ return (statusCode === 429 || // Too Many Requests
192
+ statusCode === 503 || // Service Unavailable
193
+ statusCode === 'RATE_LIMIT_EXCEEDED' ||
194
+ message.includes('quota') ||
195
+ message.includes('rate limit') ||
196
+ message.includes('too many requests'));
197
+ }
198
+ /**
199
+ * Apply backpressure before starting an operation
200
+ * @returns Request ID for tracking
201
+ */
202
+ async applyBackpressure() {
203
+ const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
204
+ await this.backpressure.requestPermission(requestId, 1);
205
+ this.pendingOperations++;
206
+ return requestId;
207
+ }
208
+ /**
209
+ * Release backpressure after completing an operation
210
+ * @param success Whether the operation succeeded
211
+ * @param requestId Request ID from applyBackpressure()
212
+ */
213
+ releaseBackpressure(success = true, requestId) {
214
+ this.pendingOperations = Math.max(0, this.pendingOperations - 1);
215
+ if (requestId) {
216
+ this.backpressure.releasePermission(requestId, success);
217
+ }
218
+ }
219
+ /**
220
+ * Check if high-volume mode should be enabled
221
+ */
222
+ checkVolumeMode() {
223
+ if (this.forceHighVolumeMode) {
224
+ return; // Already forced on
225
+ }
226
+ const now = Date.now();
227
+ if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
228
+ return;
229
+ }
230
+ this.lastVolumeCheck = now;
231
+ // Enable high-volume mode if we have many pending operations
232
+ const shouldEnable = this.pendingOperations > 20;
233
+ if (shouldEnable && !this.highVolumeMode) {
234
+ this.highVolumeMode = true;
235
+ prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
236
+ }
237
+ else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
238
+ this.highVolumeMode = false;
239
+ prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
240
+ }
241
+ }
242
+ /**
243
+ * Flush noun buffer to GCS
244
+ */
245
+ async flushNounBuffer(items) {
246
+ const writes = Array.from(items.values()).map(async (noun) => {
247
+ try {
248
+ await this.saveNodeDirect(noun);
249
+ }
250
+ catch (error) {
251
+ this.logger.error(`Failed to flush noun ${noun.id}:`, error);
252
+ }
253
+ });
254
+ await Promise.all(writes);
255
+ }
256
+ /**
257
+ * Flush verb buffer to GCS
258
+ */
259
+ async flushVerbBuffer(items) {
260
+ const writes = Array.from(items.values()).map(async (verb) => {
261
+ try {
262
+ await this.saveEdgeDirect(verb);
263
+ }
264
+ catch (error) {
265
+ this.logger.error(`Failed to flush verb ${verb.id}:`, error);
266
+ }
267
+ });
268
+ await Promise.all(writes);
269
+ }
270
+ /**
271
+ * Save a noun to storage (internal implementation)
272
+ */
273
+ async saveNoun_internal(noun) {
274
+ return this.saveNode(noun);
275
+ }
276
+ /**
277
+ * Save a node to storage
278
+ */
279
+ async saveNode(node) {
280
+ await this.ensureInitialized();
281
+ // ALWAYS check if we should use high-volume mode (critical for detection)
282
+ this.checkVolumeMode();
283
+ // Use write buffer in high-volume mode
284
+ if (this.highVolumeMode && this.nounWriteBuffer) {
285
+ this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
286
+ await this.nounWriteBuffer.add(node.id, node);
287
+ return;
288
+ }
289
+ else if (!this.highVolumeMode) {
290
+ this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
291
+ }
292
+ // Direct write in normal mode
293
+ await this.saveNodeDirect(node);
294
+ }
295
+ /**
296
+ * Save a node directly to GCS (bypass buffer)
297
+ */
298
+ async saveNodeDirect(node) {
299
+ // Apply backpressure before starting operation
300
+ const requestId = await this.applyBackpressure();
301
+ try {
302
+ this.logger.trace(`Saving node ${node.id}`);
303
+ // Convert connections Map to a serializable format
304
+ const serializableNode = {
305
+ ...node,
306
+ connections: Object.fromEntries(Array.from(node.connections.entries()).map(([level, nounIds]) => [
307
+ level,
308
+ Array.from(nounIds)
309
+ ]))
310
+ };
311
+ // Get the GCS key with UUID-based sharding
312
+ const key = this.getNounKey(node.id);
313
+ // Save to GCS
314
+ const file = this.bucket.file(key);
315
+ await file.save(JSON.stringify(serializableNode, null, 2), {
316
+ contentType: 'application/json',
317
+ resumable: false // For small objects, non-resumable is faster
318
+ });
319
+ // Update cache
320
+ this.nounCacheManager.set(node.id, node);
321
+ // Increment noun count
322
+ const metadata = await this.getNounMetadata(node.id);
323
+ if (metadata && metadata.type) {
324
+ await this.incrementEntityCountSafe(metadata.type);
325
+ }
326
+ this.logger.trace(`Node ${node.id} saved successfully`);
327
+ this.releaseBackpressure(true, requestId);
328
+ }
329
+ catch (error) {
330
+ this.releaseBackpressure(false, requestId);
331
+ // Handle throttling
332
+ if (this.isThrottlingError(error)) {
333
+ await this.handleThrottling(error);
334
+ throw error; // Re-throw for retry at higher level
335
+ }
336
+ this.logger.error(`Failed to save node ${node.id}:`, error);
337
+ throw new Error(`Failed to save node ${node.id}: ${error}`);
338
+ }
339
+ }
340
+ /**
341
+ * Get a noun from storage (internal implementation)
342
+ */
343
+ async getNoun_internal(id) {
344
+ return this.getNode(id);
345
+ }
346
+ /**
347
+ * Get a node from storage
348
+ */
349
+ async getNode(id) {
350
+ await this.ensureInitialized();
351
+ // Check cache first
352
+ const cached = this.nounCacheManager.get(id);
353
+ if (cached) {
354
+ this.logger.trace(`Cache hit for noun ${id}`);
355
+ return cached;
356
+ }
357
+ // Apply backpressure
358
+ const requestId = await this.applyBackpressure();
359
+ try {
360
+ this.logger.trace(`Getting node ${id}`);
361
+ // Get the GCS key with UUID-based sharding
362
+ const key = this.getNounKey(id);
363
+ // Download from GCS
364
+ const file = this.bucket.file(key);
365
+ const [contents] = await file.download();
366
+ // Parse JSON
367
+ const data = JSON.parse(contents.toString());
368
+ // Convert serialized connections back to Map<number, Set<string>>
369
+ const connections = new Map();
370
+ for (const [level, nounIds] of Object.entries(data.connections || {})) {
371
+ connections.set(Number(level), new Set(nounIds));
372
+ }
373
+ const node = {
374
+ id: data.id,
375
+ vector: data.vector,
376
+ connections,
377
+ level: data.level || 0
378
+ };
379
+ // Update cache
380
+ this.nounCacheManager.set(id, node);
381
+ this.logger.trace(`Successfully retrieved node ${id}`);
382
+ this.releaseBackpressure(true, requestId);
383
+ return node;
384
+ }
385
+ catch (error) {
386
+ this.releaseBackpressure(false, requestId);
387
+ // Check if this is a "not found" error
388
+ if (error.code === 404) {
389
+ this.logger.trace(`Node not found: ${id}`);
390
+ return null;
391
+ }
392
+ // Handle throttling
393
+ if (this.isThrottlingError(error)) {
394
+ await this.handleThrottling(error);
395
+ throw error;
396
+ }
397
+ this.logger.error(`Failed to get node ${id}:`, error);
398
+ throw BrainyError.fromError(error, `getNoun(${id})`);
399
+ }
400
+ }
401
+ /**
402
+ * Delete a noun from storage (internal implementation)
403
+ */
404
+ async deleteNoun_internal(id) {
405
+ await this.ensureInitialized();
406
+ const requestId = await this.applyBackpressure();
407
+ try {
408
+ this.logger.trace(`Deleting noun ${id}`);
409
+ // Get the GCS key
410
+ const key = this.getNounKey(id);
411
+ // Delete from GCS
412
+ const file = this.bucket.file(key);
413
+ await file.delete();
414
+ // Remove from cache
415
+ this.nounCacheManager.delete(id);
416
+ // Decrement noun count
417
+ const metadata = await this.getNounMetadata(id);
418
+ if (metadata && metadata.type) {
419
+ await this.decrementEntityCountSafe(metadata.type);
420
+ }
421
+ this.logger.trace(`Noun ${id} deleted successfully`);
422
+ this.releaseBackpressure(true, requestId);
423
+ }
424
+ catch (error) {
425
+ this.releaseBackpressure(false, requestId);
426
+ if (error.code === 404) {
427
+ // Already deleted
428
+ this.logger.trace(`Noun ${id} not found (already deleted)`);
429
+ return;
430
+ }
431
+ // Handle throttling
432
+ if (this.isThrottlingError(error)) {
433
+ await this.handleThrottling(error);
434
+ throw error;
435
+ }
436
+ this.logger.error(`Failed to delete noun ${id}:`, error);
437
+ throw new Error(`Failed to delete noun ${id}: ${error}`);
438
+ }
439
+ }
440
+ /**
441
+ * Save noun metadata to storage (internal implementation)
442
+ */
443
+ async saveNounMetadata_internal(id, metadata) {
444
+ await this.ensureInitialized();
445
+ try {
446
+ // Use UUID-based sharding for metadata (consistent with noun vectors)
447
+ const shardId = getShardIdFromUuid(id);
448
+ const key = `${this.metadataPrefix}${shardId}/${id}.json`;
449
+ this.logger.trace(`Saving noun metadata for ${id} to key: ${key}`);
450
+ // Save to GCS
451
+ const file = this.bucket.file(key);
452
+ await file.save(JSON.stringify(metadata, null, 2), {
453
+ contentType: 'application/json',
454
+ resumable: false
455
+ });
456
+ this.logger.debug(`Noun metadata for ${id} saved successfully`);
457
+ }
458
+ catch (error) {
459
+ this.logger.error(`Failed to save noun metadata for ${id}:`, error);
460
+ throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
461
+ }
462
+ }
463
+ /**
464
+ * Save metadata to storage (public API - delegates to saveNounMetadata_internal)
465
+ */
466
+ async saveMetadata(id, metadata) {
467
+ return this.saveNounMetadata_internal(id, metadata);
468
+ }
469
+ /**
470
+ * Get metadata from storage (public API - delegates to getNounMetadata)
471
+ */
472
+ async getMetadata(id) {
473
+ return this.getNounMetadata(id);
474
+ }
475
+ /**
476
+ * Get noun metadata from storage
477
+ */
478
+ async getNounMetadata(id) {
479
+ await this.ensureInitialized();
480
+ try {
481
+ // Use UUID-based sharding for metadata
482
+ const shardId = getShardIdFromUuid(id);
483
+ const key = `${this.metadataPrefix}${shardId}/${id}.json`;
484
+ this.logger.trace(`Getting noun metadata for ${id} from key: ${key}`);
485
+ // Download from GCS
486
+ const file = this.bucket.file(key);
487
+ const [contents] = await file.download();
488
+ // Parse JSON
489
+ const metadata = JSON.parse(contents.toString());
490
+ this.logger.trace(`Successfully retrieved noun metadata for ${id}`);
491
+ return metadata;
492
+ }
493
+ catch (error) {
494
+ // Check if this is a "not found" error
495
+ if (error.code === 404) {
496
+ this.logger.trace(`Noun metadata not found for ${id}`);
497
+ return null;
498
+ }
499
+ // For other types of errors, convert to BrainyError
500
+ throw BrainyError.fromError(error, `getNounMetadata(${id})`);
501
+ }
502
+ }
503
+ /**
504
+ * Save verb metadata to storage (internal implementation)
505
+ */
506
+ async saveVerbMetadata_internal(id, metadata) {
507
+ await this.ensureInitialized();
508
+ try {
509
+ const key = `${this.verbMetadataPrefix}${id}.json`;
510
+ this.logger.trace(`Saving verb metadata for ${id} to key: ${key}`);
511
+ // Save to GCS
512
+ const file = this.bucket.file(key);
513
+ await file.save(JSON.stringify(metadata, null, 2), {
514
+ contentType: 'application/json',
515
+ resumable: false
516
+ });
517
+ this.logger.debug(`Verb metadata for ${id} saved successfully`);
518
+ }
519
+ catch (error) {
520
+ this.logger.error(`Failed to save verb metadata for ${id}:`, error);
521
+ throw new Error(`Failed to save verb metadata for ${id}: ${error}`);
522
+ }
523
+ }
524
+ /**
525
+ * Get verb metadata from storage
526
+ */
527
+ async getVerbMetadata(id) {
528
+ await this.ensureInitialized();
529
+ try {
530
+ const key = `${this.verbMetadataPrefix}${id}.json`;
531
+ this.logger.trace(`Getting verb metadata for ${id} from key: ${key}`);
532
+ // Download from GCS
533
+ const file = this.bucket.file(key);
534
+ const [contents] = await file.download();
535
+ // Parse JSON
536
+ const metadata = JSON.parse(contents.toString());
537
+ this.logger.trace(`Successfully retrieved verb metadata for ${id}`);
538
+ return metadata;
539
+ }
540
+ catch (error) {
541
+ // Check if this is a "not found" error
542
+ if (error.code === 404) {
543
+ this.logger.trace(`Verb metadata not found for ${id}`);
544
+ return null;
545
+ }
546
+ // For other types of errors, convert to BrainyError
547
+ throw BrainyError.fromError(error, `getVerbMetadata(${id})`);
548
+ }
549
+ }
550
+ /**
551
+ * Save a verb to storage (internal implementation)
552
+ */
553
+ async saveVerb_internal(verb) {
554
+ return this.saveEdge(verb);
555
+ }
556
+ /**
557
+ * Save an edge to storage
558
+ */
559
+ async saveEdge(edge) {
560
+ await this.ensureInitialized();
561
+ // Check volume mode
562
+ this.checkVolumeMode();
563
+ // Use write buffer in high-volume mode
564
+ if (this.highVolumeMode && this.verbWriteBuffer) {
565
+ this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
566
+ await this.verbWriteBuffer.add(edge.id, edge);
567
+ return;
568
+ }
569
+ // Direct write in normal mode
570
+ await this.saveEdgeDirect(edge);
571
+ }
572
+ /**
573
+ * Save an edge directly to GCS (bypass buffer)
574
+ */
575
+ async saveEdgeDirect(edge) {
576
+ const requestId = await this.applyBackpressure();
577
+ try {
578
+ this.logger.trace(`Saving edge ${edge.id}`);
579
+ // Convert connections Map to serializable format
580
+ const serializableEdge = {
581
+ ...edge,
582
+ connections: Object.fromEntries(Array.from(edge.connections.entries()).map(([level, verbIds]) => [
583
+ level,
584
+ Array.from(verbIds)
585
+ ]))
586
+ };
587
+ // Get the GCS key with UUID-based sharding
588
+ const key = this.getVerbKey(edge.id);
589
+ // Save to GCS
590
+ const file = this.bucket.file(key);
591
+ await file.save(JSON.stringify(serializableEdge, null, 2), {
592
+ contentType: 'application/json',
593
+ resumable: false
594
+ });
595
+ // Update cache
596
+ this.verbCacheManager.set(edge.id, edge);
597
+ // Increment verb count
598
+ const metadata = await this.getVerbMetadata(edge.id);
599
+ if (metadata && metadata.type) {
600
+ await this.incrementVerbCount(metadata.type);
601
+ }
602
+ this.logger.trace(`Edge ${edge.id} saved successfully`);
603
+ this.releaseBackpressure(true, requestId);
604
+ }
605
+ catch (error) {
606
+ this.releaseBackpressure(false, requestId);
607
+ if (this.isThrottlingError(error)) {
608
+ await this.handleThrottling(error);
609
+ throw error;
610
+ }
611
+ this.logger.error(`Failed to save edge ${edge.id}:`, error);
612
+ throw new Error(`Failed to save edge ${edge.id}: ${error}`);
613
+ }
614
+ }
615
+ /**
616
+ * Get a verb from storage (internal implementation)
617
+ */
618
+ async getVerb_internal(id) {
619
+ return this.getEdge(id);
620
+ }
621
+ /**
622
+ * Get an edge from storage
623
+ */
624
+ async getEdge(id) {
625
+ await this.ensureInitialized();
626
+ // Check cache first
627
+ const cached = this.verbCacheManager.get(id);
628
+ if (cached) {
629
+ this.logger.trace(`Cache hit for verb ${id}`);
630
+ return cached;
631
+ }
632
+ const requestId = await this.applyBackpressure();
633
+ try {
634
+ this.logger.trace(`Getting edge ${id}`);
635
+ // Get the GCS key with UUID-based sharding
636
+ const key = this.getVerbKey(id);
637
+ // Download from GCS
638
+ const file = this.bucket.file(key);
639
+ const [contents] = await file.download();
640
+ // Parse JSON
641
+ const data = JSON.parse(contents.toString());
642
+ // Convert serialized connections back to Map
643
+ const connections = new Map();
644
+ for (const [level, verbIds] of Object.entries(data.connections || {})) {
645
+ connections.set(Number(level), new Set(verbIds));
646
+ }
647
+ const edge = {
648
+ id: data.id,
649
+ vector: data.vector,
650
+ connections
651
+ };
652
+ // Update cache
653
+ this.verbCacheManager.set(id, edge);
654
+ this.logger.trace(`Successfully retrieved edge ${id}`);
655
+ this.releaseBackpressure(true, requestId);
656
+ return edge;
657
+ }
658
+ catch (error) {
659
+ this.releaseBackpressure(false, requestId);
660
+ // Check if this is a "not found" error
661
+ if (error.code === 404) {
662
+ this.logger.trace(`Edge not found: ${id}`);
663
+ return null;
664
+ }
665
+ if (this.isThrottlingError(error)) {
666
+ await this.handleThrottling(error);
667
+ throw error;
668
+ }
669
+ this.logger.error(`Failed to get edge ${id}:`, error);
670
+ throw BrainyError.fromError(error, `getVerb(${id})`);
671
+ }
672
+ }
673
+ /**
674
+ * Delete a verb from storage (internal implementation)
675
+ */
676
+ async deleteVerb_internal(id) {
677
+ await this.ensureInitialized();
678
+ const requestId = await this.applyBackpressure();
679
+ try {
680
+ this.logger.trace(`Deleting verb ${id}`);
681
+ // Get the GCS key
682
+ const key = this.getVerbKey(id);
683
+ // Delete from GCS
684
+ const file = this.bucket.file(key);
685
+ await file.delete();
686
+ // Remove from cache
687
+ this.verbCacheManager.delete(id);
688
+ // Decrement verb count
689
+ const metadata = await this.getVerbMetadata(id);
690
+ if (metadata && metadata.type) {
691
+ await this.decrementVerbCount(metadata.type);
692
+ }
693
+ this.logger.trace(`Verb ${id} deleted successfully`);
694
+ this.releaseBackpressure(true, requestId);
695
+ }
696
+ catch (error) {
697
+ this.releaseBackpressure(false, requestId);
698
+ if (error.code === 404) {
699
+ // Already deleted
700
+ this.logger.trace(`Verb ${id} not found (already deleted)`);
701
+ return;
702
+ }
703
+ if (this.isThrottlingError(error)) {
704
+ await this.handleThrottling(error);
705
+ throw error;
706
+ }
707
+ this.logger.error(`Failed to delete verb ${id}:`, error);
708
+ throw new Error(`Failed to delete verb ${id}: ${error}`);
709
+ }
710
+ }
711
+ /**
712
+ * Get nouns with pagination
713
+ * Iterates through all UUID-based shards (00-ff) for consistent pagination
714
+ */
715
+ async getNounsWithPagination(options = {}) {
716
+ await this.ensureInitialized();
717
+ const limit = options.limit || 100;
718
+ const cursor = options.cursor;
719
+ // Get paginated nodes
720
+ const result = await this.getNodesWithPagination({
721
+ limit,
722
+ cursor,
723
+ useCache: true
724
+ });
725
+ // Apply filters if provided
726
+ let filteredNodes = result.nodes;
727
+ if (options.filter) {
728
+ // Filter by noun type
729
+ if (options.filter.nounType) {
730
+ const nounTypes = Array.isArray(options.filter.nounType)
731
+ ? options.filter.nounType
732
+ : [options.filter.nounType];
733
+ const filteredByType = [];
734
+ for (const node of filteredNodes) {
735
+ const metadata = await this.getNounMetadata(node.id);
736
+ if (metadata && nounTypes.includes(metadata.type || metadata.noun)) {
737
+ filteredByType.push(node);
738
+ }
739
+ }
740
+ filteredNodes = filteredByType;
741
+ }
742
+ // Additional filter logic can be added here
743
+ }
744
+ return {
745
+ items: filteredNodes,
746
+ totalCount: result.totalCount,
747
+ hasMore: result.hasMore,
748
+ nextCursor: result.nextCursor
749
+ };
750
+ }
751
+ /**
752
+ * Get nodes with pagination (internal implementation)
753
+ * Iterates through UUID-based shards for consistent pagination
754
+ */
755
+ async getNodesWithPagination(options) {
756
+ const limit = options.limit || 100;
757
+ const useCache = options.useCache !== false;
758
+ try {
759
+ const nodes = [];
760
+ // Parse cursor (format: "shardIndex:gcsPageToken")
761
+ let startShardIndex = 0;
762
+ let gcsPageToken;
763
+ if (options.cursor) {
764
+ const parts = options.cursor.split(':', 2);
765
+ startShardIndex = parseInt(parts[0]) || 0;
766
+ gcsPageToken = parts[1] || undefined;
767
+ }
768
+ // Iterate through shards starting from cursor position
769
+ for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
770
+ const shardId = getShardIdByIndex(shardIndex);
771
+ const shardPrefix = `${this.nounPrefix}${shardId}/`;
772
+ // List objects in this shard
773
+ const [files, , response] = await this.bucket.getFiles({
774
+ prefix: shardPrefix,
775
+ maxResults: limit - nodes.length,
776
+ pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
777
+ });
778
+ // Extract node IDs from file names
779
+ if (files && files.length > 0) {
780
+ const nodeIds = files
781
+ .filter((file) => file && file.name)
782
+ .map((file) => {
783
+ // Extract UUID from: entities/nouns/vectors/ab/ab123456-uuid.json
784
+ let name = file.name;
785
+ if (name.startsWith(shardPrefix)) {
786
+ name = name.substring(shardPrefix.length);
787
+ }
788
+ if (name.endsWith('.json')) {
789
+ name = name.substring(0, name.length - 5);
790
+ }
791
+ return name;
792
+ })
793
+ .filter((id) => id && id.length > 0);
794
+ // Load nodes
795
+ for (const id of nodeIds) {
796
+ const node = await this.getNode(id);
797
+ if (node) {
798
+ nodes.push(node);
799
+ }
800
+ if (nodes.length >= limit) {
801
+ break;
802
+ }
803
+ }
804
+ }
805
+ // Check if we have enough nodes or if there are more files in current shard
806
+ if (nodes.length >= limit) {
807
+ const nextCursor = response?.nextPageToken
808
+ ? `${shardIndex}:${response.nextPageToken}`
809
+ : shardIndex + 1 < TOTAL_SHARDS
810
+ ? `${shardIndex + 1}:`
811
+ : undefined;
812
+ return {
813
+ nodes,
814
+ hasMore: !!nextCursor,
815
+ nextCursor
816
+ };
817
+ }
818
+ // If this shard has more pages, create cursor for next page
819
+ if (response?.nextPageToken) {
820
+ return {
821
+ nodes,
822
+ hasMore: true,
823
+ nextCursor: `${shardIndex}:${response.nextPageToken}`
824
+ };
825
+ }
826
+ // Continue to next shard
827
+ }
828
+ // No more shards or nodes
829
+ return {
830
+ nodes,
831
+ hasMore: false,
832
+ nextCursor: undefined
833
+ };
834
+ }
835
+ catch (error) {
836
+ this.logger.error('Error in getNodesWithPagination:', error);
837
+ throw new Error(`Failed to get nodes with pagination: ${error}`);
838
+ }
839
+ }
840
+ /**
841
+ * Get nouns by noun type (internal implementation)
842
+ */
843
+ async getNounsByNounType_internal(nounType) {
844
+ const result = await this.getNounsWithPagination({
845
+ limit: 10000, // Large limit for backward compatibility
846
+ filter: { nounType }
847
+ });
848
+ return result.items;
849
+ }
850
+ /**
851
+ * Get verbs by source ID (internal implementation)
852
+ */
853
+ async getVerbsBySource_internal(sourceId) {
854
+ // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
855
+ const result = await this.getVerbsWithPagination({
856
+ limit: Number.MAX_SAFE_INTEGER,
857
+ filter: { sourceId: [sourceId] }
858
+ });
859
+ return result.items;
860
+ }
861
+ /**
862
+ * Get verbs by target ID (internal implementation)
863
+ */
864
+ async getVerbsByTarget_internal(targetId) {
865
+ // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
866
+ const result = await this.getVerbsWithPagination({
867
+ limit: Number.MAX_SAFE_INTEGER,
868
+ filter: { targetId: [targetId] }
869
+ });
870
+ return result.items;
871
+ }
872
+ /**
873
+ * Get verbs by type (internal implementation)
874
+ */
875
+ async getVerbsByType_internal(type) {
876
+ // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
877
+ const result = await this.getVerbsWithPagination({
878
+ limit: Number.MAX_SAFE_INTEGER,
879
+ filter: { verbType: type }
880
+ });
881
+ return result.items;
882
+ }
883
+ /**
884
+ * Get verbs with pagination
885
+ */
886
+ async getVerbsWithPagination(options = {}) {
887
+ await this.ensureInitialized();
888
+ const limit = options.limit || 100;
889
+ try {
890
+ // List verbs (simplified - not sharded yet in original implementation)
891
+ const [files, , response] = await this.bucket.getFiles({
892
+ prefix: this.verbPrefix,
893
+ maxResults: limit,
894
+ pageToken: options.cursor
895
+ });
896
+ // If no files, return empty result
897
+ if (!files || files.length === 0) {
898
+ return {
899
+ items: [],
900
+ totalCount: 0,
901
+ hasMore: false,
902
+ nextCursor: undefined
903
+ };
904
+ }
905
+ // Extract verb IDs and load verbs as HNSW verbs
906
+ const hnswVerbs = [];
907
+ for (const file of files) {
908
+ if (!file.name)
909
+ continue;
910
+ // Extract UUID from path
911
+ let name = file.name;
912
+ if (name.startsWith(this.verbPrefix)) {
913
+ name = name.substring(this.verbPrefix.length);
914
+ }
915
+ if (name.endsWith('.json')) {
916
+ name = name.substring(0, name.length - 5);
917
+ }
918
+ const verb = await this.getEdge(name);
919
+ if (verb) {
920
+ hnswVerbs.push(verb);
921
+ }
922
+ }
923
+ // Convert HNSWVerbs to GraphVerbs by combining with metadata
924
+ const graphVerbs = [];
925
+ for (const hnswVerb of hnswVerbs) {
926
+ const graphVerb = await this.convertHNSWVerbToGraphVerb(hnswVerb);
927
+ if (graphVerb) {
928
+ graphVerbs.push(graphVerb);
929
+ }
930
+ }
931
+ // Apply filters
932
+ let filteredVerbs = graphVerbs;
933
+ if (options.filter) {
934
+ filteredVerbs = graphVerbs.filter((graphVerb) => {
935
+ // Filter by sourceId
936
+ if (options.filter.sourceId) {
937
+ const sourceIds = Array.isArray(options.filter.sourceId)
938
+ ? options.filter.sourceId
939
+ : [options.filter.sourceId];
940
+ if (!sourceIds.includes(graphVerb.sourceId)) {
941
+ return false;
942
+ }
943
+ }
944
+ // Filter by targetId
945
+ if (options.filter.targetId) {
946
+ const targetIds = Array.isArray(options.filter.targetId)
947
+ ? options.filter.targetId
948
+ : [options.filter.targetId];
949
+ if (!targetIds.includes(graphVerb.targetId)) {
950
+ return false;
951
+ }
952
+ }
953
+ // Filter by verbType
954
+ if (options.filter.verbType) {
955
+ const verbTypes = Array.isArray(options.filter.verbType)
956
+ ? options.filter.verbType
957
+ : [options.filter.verbType];
958
+ const verbType = graphVerb.verb || graphVerb.type || '';
959
+ if (!verbTypes.includes(verbType)) {
960
+ return false;
961
+ }
962
+ }
963
+ return true;
964
+ });
965
+ }
966
+ return {
967
+ items: filteredVerbs,
968
+ hasMore: !!response?.nextPageToken,
969
+ nextCursor: response?.nextPageToken
970
+ };
971
+ }
972
+ catch (error) {
973
+ this.logger.error('Error in getVerbsWithPagination:', error);
974
+ throw new Error(`Failed to get verbs with pagination: ${error}`);
975
+ }
976
+ }
977
+ /**
978
+ * Get nouns with filtering and pagination (public API)
979
+ */
980
+ async getNouns(options) {
981
+ const limit = options?.pagination?.limit || 100;
982
+ const cursor = options?.pagination?.cursor;
983
+ return this.getNounsWithPagination({
984
+ limit,
985
+ cursor,
986
+ filter: options?.filter
987
+ });
988
+ }
989
+ /**
990
+ * Get verbs with filtering and pagination (public API)
991
+ */
992
+ async getVerbs(options) {
993
+ const limit = options?.pagination?.limit || 100;
994
+ const cursor = options?.pagination?.cursor;
995
+ return this.getVerbsWithPagination({
996
+ limit,
997
+ cursor,
998
+ filter: options?.filter
999
+ });
1000
+ }
1001
+ /**
1002
+ * Clear all data from storage
1003
+ */
1004
+ async clear() {
1005
+ await this.ensureInitialized();
1006
+ try {
1007
+ this.logger.info('🧹 Clearing all data from GCS bucket...');
1008
+ // Helper function to delete all objects with a given prefix
1009
+ const deleteObjectsWithPrefix = async (prefix) => {
1010
+ const [files] = await this.bucket.getFiles({ prefix });
1011
+ if (!files || files.length === 0) {
1012
+ return;
1013
+ }
1014
+ // Delete each file
1015
+ for (const file of files) {
1016
+ await file.delete();
1017
+ }
1018
+ };
1019
+ // Clear all data directories
1020
+ await deleteObjectsWithPrefix(this.nounPrefix);
1021
+ await deleteObjectsWithPrefix(this.verbPrefix);
1022
+ await deleteObjectsWithPrefix(this.metadataPrefix);
1023
+ await deleteObjectsWithPrefix(this.verbMetadataPrefix);
1024
+ await deleteObjectsWithPrefix(this.systemPrefix);
1025
+ // Clear caches
1026
+ this.nounCacheManager.clear();
1027
+ this.verbCacheManager.clear();
1028
+ // Reset counts
1029
+ this.totalNounCount = 0;
1030
+ this.totalVerbCount = 0;
1031
+ this.entityCounts.clear();
1032
+ this.verbCounts.clear();
1033
+ this.logger.info('✅ All data cleared from GCS');
1034
+ }
1035
+ catch (error) {
1036
+ this.logger.error('Failed to clear GCS storage:', error);
1037
+ throw new Error(`Failed to clear GCS storage: ${error}`);
1038
+ }
1039
+ }
1040
+ /**
1041
+ * Get storage status
1042
+ */
1043
+ async getStorageStatus() {
1044
+ await this.ensureInitialized();
1045
+ try {
1046
+ // Get bucket metadata
1047
+ const [metadata] = await this.bucket.getMetadata();
1048
+ return {
1049
+ type: 'gcs-native',
1050
+ used: 0, // GCS doesn't provide usage info easily
1051
+ quota: null, // No quota in GCS
1052
+ details: {
1053
+ bucket: this.bucketName,
1054
+ location: metadata.location,
1055
+ storageClass: metadata.storageClass,
1056
+ created: metadata.timeCreated
1057
+ }
1058
+ };
1059
+ }
1060
+ catch (error) {
1061
+ this.logger.error('Failed to get storage status:', error);
1062
+ return {
1063
+ type: 'gcs-native',
1064
+ used: 0,
1065
+ quota: null
1066
+ };
1067
+ }
1068
+ }
1069
+ /**
1070
+ * Save statistics data to storage
1071
+ */
1072
+ async saveStatisticsData(statistics) {
1073
+ await this.ensureInitialized();
1074
+ try {
1075
+ const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
1076
+ this.logger.trace(`Saving statistics to ${key}`);
1077
+ const file = this.bucket.file(key);
1078
+ await file.save(JSON.stringify(statistics, null, 2), {
1079
+ contentType: 'application/json',
1080
+ resumable: false
1081
+ });
1082
+ this.logger.trace('Statistics saved successfully');
1083
+ }
1084
+ catch (error) {
1085
+ this.logger.error('Failed to save statistics:', error);
1086
+ throw new Error(`Failed to save statistics: ${error}`);
1087
+ }
1088
+ }
1089
+ /**
1090
+ * Get statistics data from storage
1091
+ */
1092
+ async getStatisticsData() {
1093
+ await this.ensureInitialized();
1094
+ try {
1095
+ const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
1096
+ this.logger.trace(`Getting statistics from ${key}`);
1097
+ const file = this.bucket.file(key);
1098
+ const [contents] = await file.download();
1099
+ const statistics = JSON.parse(contents.toString());
1100
+ this.logger.trace('Statistics retrieved successfully');
1101
+ return statistics;
1102
+ }
1103
+ catch (error) {
1104
+ if (error.code === 404) {
1105
+ this.logger.trace('Statistics not found (creating new)');
1106
+ return null;
1107
+ }
1108
+ this.logger.error('Failed to get statistics:', error);
1109
+ return null;
1110
+ }
1111
+ }
1112
+ /**
1113
+ * Initialize counts from storage
1114
+ */
1115
+ async initializeCounts() {
1116
+ try {
1117
+ const key = `${this.systemPrefix}counts.json`;
1118
+ const file = this.bucket.file(key);
1119
+ const [contents] = await file.download();
1120
+ const counts = JSON.parse(contents.toString());
1121
+ this.totalNounCount = counts.totalNounCount || 0;
1122
+ this.totalVerbCount = counts.totalVerbCount || 0;
1123
+ this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
1124
+ this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
1125
+ prodLog.info(`📊 Loaded counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1126
+ }
1127
+ catch (error) {
1128
+ if (error.code === 404) {
1129
+ // No counts file yet - initialize from scan
1130
+ prodLog.info('📊 No counts file found - initializing from storage scan...');
1131
+ await this.initializeCountsFromScan();
1132
+ }
1133
+ else {
1134
+ this.logger.error('Error loading counts:', error);
1135
+ }
1136
+ }
1137
+ }
1138
+ /**
1139
+ * Initialize counts from storage scan (expensive - only for first-time init)
1140
+ */
1141
+ async initializeCountsFromScan() {
1142
+ try {
1143
+ // Count nouns
1144
+ const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
1145
+ this.totalNounCount = nounFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1146
+ // Count verbs
1147
+ const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
1148
+ this.totalVerbCount = verbFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1149
+ // Save initial counts
1150
+ await this.persistCounts();
1151
+ prodLog.info(`✅ Initialized counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1152
+ }
1153
+ catch (error) {
1154
+ this.logger.error('Error initializing counts from scan:', error);
1155
+ }
1156
+ }
1157
+ /**
1158
+ * Persist counts to storage
1159
+ */
1160
+ async persistCounts() {
1161
+ try {
1162
+ const key = `${this.systemPrefix}counts.json`;
1163
+ const counts = {
1164
+ totalNounCount: this.totalNounCount,
1165
+ totalVerbCount: this.totalVerbCount,
1166
+ entityCounts: Object.fromEntries(this.entityCounts),
1167
+ verbCounts: Object.fromEntries(this.verbCounts),
1168
+ lastUpdated: new Date().toISOString()
1169
+ };
1170
+ const file = this.bucket.file(key);
1171
+ await file.save(JSON.stringify(counts, null, 2), {
1172
+ contentType: 'application/json',
1173
+ resumable: false
1174
+ });
1175
+ }
1176
+ catch (error) {
1177
+ this.logger.error('Error persisting counts:', error);
1178
+ }
1179
+ }
1180
+ }
1181
+ //# sourceMappingURL=gcsStorage.js.map