@soulcraft/brainy 3.25.2 → 3.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  * Provides persistent storage for the vector database using the Origin Private File System API
4
4
  */
5
5
  import { BaseStorage, NOUNS_DIR, VERBS_DIR, METADATA_DIR, NOUN_METADATA_DIR, VERB_METADATA_DIR, INDEX_DIR } from '../baseStorage.js';
6
+ import { getShardIdFromUuid } from '../sharding.js';
6
7
  import '../../types/fileSystemTypes.js';
7
8
  /**
8
9
  * Helper function to safely get a file from a FileSystemHandle
@@ -145,8 +146,14 @@ export class OPFSStorage extends BaseStorage {
145
146
  ...noun,
146
147
  connections: this.mapToObject(noun.connections, (set) => Array.from(set))
147
148
  };
148
- // Create or get the file for this noun
149
- const fileHandle = await this.nounsDir.getFileHandle(`${noun.id}.json`, {
149
+ // Use UUID-based sharding for nouns
150
+ const shardId = getShardIdFromUuid(noun.id);
151
+ // Get or create the shard directory
152
+ const shardDir = await this.nounsDir.getDirectoryHandle(shardId, {
153
+ create: true
154
+ });
155
+ // Create or get the file in the shard directory
156
+ const fileHandle = await shardDir.getFileHandle(`${noun.id}.json`, {
150
157
  create: true
151
158
  });
152
159
  // Write the noun data to the file
@@ -165,8 +172,12 @@ export class OPFSStorage extends BaseStorage {
165
172
  async getNoun_internal(id) {
166
173
  await this.ensureInitialized();
167
174
  try {
168
- // Get the file handle for this noun
169
- const fileHandle = await this.nounsDir.getFileHandle(`${id}.json`);
175
+ // Use UUID-based sharding for nouns
176
+ const shardId = getShardIdFromUuid(id);
177
+ // Get the shard directory
178
+ const shardDir = await this.nounsDir.getDirectoryHandle(shardId);
179
+ // Get the file handle from the shard directory
180
+ const fileHandle = await shardDir.getFileHandle(`${id}.json`);
170
181
  // Read the noun data from the file
171
182
  const file = await fileHandle.getFile();
172
183
  const text = await file.text();
@@ -205,34 +216,40 @@ export class OPFSStorage extends BaseStorage {
205
216
  await this.ensureInitialized();
206
217
  const nodes = [];
207
218
  try {
208
- // Iterate through all files in the nouns directory
209
- for await (const [name, handle] of this.nounsDir.entries()) {
210
- if (handle.kind === 'file') {
211
- try {
212
- // Read the node data from the file
213
- const file = await safeGetFile(handle);
214
- const text = await file.text();
215
- const data = JSON.parse(text);
216
- // Get the metadata to check the noun type
217
- const metadata = await this.getMetadata(data.id);
218
- // Include the node if its noun type matches the requested type
219
- if (metadata && metadata.noun === nounType) {
220
- // Convert serialized connections back to Map<number, Set<string>>
221
- const connections = new Map();
222
- for (const [level, nodeIds] of Object.entries(data.connections)) {
223
- connections.set(Number(level), new Set(nodeIds));
219
+ // Iterate through all shard directories
220
+ for await (const [shardName, shardHandle] of this.nounsDir.entries()) {
221
+ if (shardHandle.kind === 'directory') {
222
+ const shardDir = shardHandle;
223
+ // Iterate through all files in this shard
224
+ for await (const [fileName, fileHandle] of shardDir.entries()) {
225
+ if (fileHandle.kind === 'file') {
226
+ try {
227
+ // Read the node data from the file
228
+ const file = await safeGetFile(fileHandle);
229
+ const text = await file.text();
230
+ const data = JSON.parse(text);
231
+ // Get the metadata to check the noun type
232
+ const metadata = await this.getMetadata(data.id);
233
+ // Include the node if its noun type matches the requested type
234
+ if (metadata && metadata.noun === nounType) {
235
+ // Convert serialized connections back to Map<number, Set<string>>
236
+ const connections = new Map();
237
+ for (const [level, nodeIds] of Object.entries(data.connections)) {
238
+ connections.set(Number(level), new Set(nodeIds));
239
+ }
240
+ nodes.push({
241
+ id: data.id,
242
+ vector: data.vector,
243
+ connections,
244
+ level: data.level || 0
245
+ });
246
+ }
247
+ }
248
+ catch (error) {
249
+ console.error(`Error reading node file ${shardName}/${fileName}:`, error);
224
250
  }
225
- nodes.push({
226
- id: data.id,
227
- vector: data.vector,
228
- connections,
229
- level: data.level || 0
230
- });
231
251
  }
232
252
  }
233
- catch (error) {
234
- console.error(`Error reading node file ${name}:`, error);
235
- }
236
253
  }
237
254
  }
238
255
  }
@@ -253,7 +270,12 @@ export class OPFSStorage extends BaseStorage {
253
270
  async deleteNode(id) {
254
271
  await this.ensureInitialized();
255
272
  try {
256
- await this.nounsDir.removeEntry(`${id}.json`);
273
+ // Use UUID-based sharding for nouns
274
+ const shardId = getShardIdFromUuid(id);
275
+ // Get the shard directory
276
+ const shardDir = await this.nounsDir.getDirectoryHandle(shardId);
277
+ // Delete the file from the shard directory
278
+ await shardDir.removeEntry(`${id}.json`);
257
279
  }
258
280
  catch (error) {
259
281
  // Ignore NotFoundError, which means the file doesn't exist
@@ -280,8 +302,14 @@ export class OPFSStorage extends BaseStorage {
280
302
  ...edge,
281
303
  connections: this.mapToObject(edge.connections, (set) => Array.from(set))
282
304
  };
283
- // Create or get the file for this verb
284
- const fileHandle = await this.verbsDir.getFileHandle(`${edge.id}.json`, {
305
+ // Use UUID-based sharding for verbs
306
+ const shardId = getShardIdFromUuid(edge.id);
307
+ // Get or create the shard directory
308
+ const shardDir = await this.verbsDir.getDirectoryHandle(shardId, {
309
+ create: true
310
+ });
311
+ // Create or get the file in the shard directory
312
+ const fileHandle = await shardDir.getFileHandle(`${edge.id}.json`, {
285
313
  create: true
286
314
  });
287
315
  // Write the verb data to the file
@@ -306,8 +334,12 @@ export class OPFSStorage extends BaseStorage {
306
334
  async getEdge(id) {
307
335
  await this.ensureInitialized();
308
336
  try {
309
- // Get the file handle for this edge
310
- const fileHandle = await this.verbsDir.getFileHandle(`${id}.json`);
337
+ // Use UUID-based sharding for verbs
338
+ const shardId = getShardIdFromUuid(id);
339
+ // Get the shard directory
340
+ const shardDir = await this.verbsDir.getDirectoryHandle(shardId);
341
+ // Get the file handle from the shard directory
342
+ const fileHandle = await shardDir.getFileHandle(`${id}.json`);
311
343
  // Read the edge data from the file
312
344
  const file = await fileHandle.getFile();
313
345
  const text = await file.text();
@@ -345,37 +377,43 @@ export class OPFSStorage extends BaseStorage {
345
377
  await this.ensureInitialized();
346
378
  const allEdges = [];
347
379
  try {
348
- // Iterate through all files in the verbs directory
349
- for await (const [name, handle] of this.verbsDir.entries()) {
350
- if (handle.kind === 'file') {
351
- try {
352
- // Read the edge data from the file
353
- const file = await safeGetFile(handle);
354
- const text = await file.text();
355
- const data = JSON.parse(text);
356
- // Convert serialized connections back to Map<number, Set<string>>
357
- const connections = new Map();
358
- for (const [level, nodeIds] of Object.entries(data.connections)) {
359
- connections.set(Number(level), new Set(nodeIds));
380
+ // Iterate through all shard directories
381
+ for await (const [shardName, shardHandle] of this.verbsDir.entries()) {
382
+ if (shardHandle.kind === 'directory') {
383
+ const shardDir = shardHandle;
384
+ // Iterate through all files in this shard
385
+ for await (const [fileName, fileHandle] of shardDir.entries()) {
386
+ if (fileHandle.kind === 'file') {
387
+ try {
388
+ // Read the edge data from the file
389
+ const file = await safeGetFile(fileHandle);
390
+ const text = await file.text();
391
+ const data = JSON.parse(text);
392
+ // Convert serialized connections back to Map<number, Set<string>>
393
+ const connections = new Map();
394
+ for (const [level, nodeIds] of Object.entries(data.connections)) {
395
+ connections.set(Number(level), new Set(nodeIds));
396
+ }
397
+ // Create default timestamp if not present
398
+ const defaultTimestamp = {
399
+ seconds: Math.floor(Date.now() / 1000),
400
+ nanoseconds: (Date.now() % 1000) * 1000000
401
+ };
402
+ // Create default createdBy if not present
403
+ const defaultCreatedBy = {
404
+ augmentation: 'unknown',
405
+ version: '1.0'
406
+ };
407
+ allEdges.push({
408
+ id: data.id,
409
+ vector: data.vector,
410
+ connections
411
+ });
412
+ }
413
+ catch (error) {
414
+ console.error(`Error reading edge file ${shardName}/${fileName}:`, error);
415
+ }
360
416
  }
361
- // Create default timestamp if not present
362
- const defaultTimestamp = {
363
- seconds: Math.floor(Date.now() / 1000),
364
- nanoseconds: (Date.now() % 1000) * 1000000
365
- };
366
- // Create default createdBy if not present
367
- const defaultCreatedBy = {
368
- augmentation: 'unknown',
369
- version: '1.0'
370
- };
371
- allEdges.push({
372
- id: data.id,
373
- vector: data.vector,
374
- connections
375
- });
376
- }
377
- catch (error) {
378
- console.error(`Error reading edge file ${name}:`, error);
379
417
  }
380
418
  }
381
419
  }
@@ -457,7 +495,12 @@ export class OPFSStorage extends BaseStorage {
457
495
  async deleteEdge(id) {
458
496
  await this.ensureInitialized();
459
497
  try {
460
- await this.verbsDir.removeEntry(`${id}.json`);
498
+ // Use UUID-based sharding for verbs
499
+ const shardId = getShardIdFromUuid(id);
500
+ // Get the shard directory
501
+ const shardDir = await this.verbsDir.getDirectoryHandle(shardId);
502
+ // Delete the file from the shard directory
503
+ await shardDir.removeEntry(`${id}.json`);
461
504
  }
462
505
  catch (error) {
463
506
  // Ignore NotFoundError, which means the file doesn't exist
@@ -542,8 +585,13 @@ export class OPFSStorage extends BaseStorage {
542
585
  */
543
586
  async saveVerbMetadata_internal(id, metadata) {
544
587
  await this.ensureInitialized();
588
+ // Use UUID-based sharding for metadata (consistent with verb vectors)
589
+ const shardId = getShardIdFromUuid(id);
590
+ // Get or create the shard directory
591
+ const shardDir = await this.verbMetadataDir.getDirectoryHandle(shardId, { create: true });
592
+ // Create or get the file in the shard directory
545
593
  const fileName = `${id}.json`;
546
- const fileHandle = await this.verbMetadataDir.getFileHandle(fileName, { create: true });
594
+ const fileHandle = await shardDir.getFileHandle(fileName, { create: true });
547
595
  const writable = await fileHandle.createWritable();
548
596
  await writable.write(JSON.stringify(metadata, null, 2));
549
597
  await writable.close();
@@ -553,9 +601,14 @@ export class OPFSStorage extends BaseStorage {
553
601
  */
554
602
  async getVerbMetadata(id) {
555
603
  await this.ensureInitialized();
604
+ // Use UUID-based sharding for metadata (consistent with verb vectors)
605
+ const shardId = getShardIdFromUuid(id);
556
606
  const fileName = `${id}.json`;
557
607
  try {
558
- const fileHandle = await this.verbMetadataDir.getFileHandle(fileName);
608
+ // Get the shard directory
609
+ const shardDir = await this.verbMetadataDir.getDirectoryHandle(shardId);
610
+ // Get the file from the shard directory
611
+ const fileHandle = await shardDir.getFileHandle(fileName);
559
612
  const file = await safeGetFile(fileHandle);
560
613
  const text = await file.text();
561
614
  return JSON.parse(text);
@@ -572,8 +625,13 @@ export class OPFSStorage extends BaseStorage {
572
625
  */
573
626
  async saveNounMetadata_internal(id, metadata) {
574
627
  await this.ensureInitialized();
628
+ // Use UUID-based sharding for metadata (consistent with noun vectors)
629
+ const shardId = getShardIdFromUuid(id);
630
+ // Get or create the shard directory
631
+ const shardDir = await this.nounMetadataDir.getDirectoryHandle(shardId, { create: true });
632
+ // Create or get the file in the shard directory
575
633
  const fileName = `${id}.json`;
576
- const fileHandle = await this.nounMetadataDir.getFileHandle(fileName, { create: true });
634
+ const fileHandle = await shardDir.getFileHandle(fileName, { create: true });
577
635
  const writable = await fileHandle.createWritable();
578
636
  await writable.write(JSON.stringify(metadata, null, 2));
579
637
  await writable.close();
@@ -583,9 +641,14 @@ export class OPFSStorage extends BaseStorage {
583
641
  */
584
642
  async getNounMetadata(id) {
585
643
  await this.ensureInitialized();
644
+ // Use UUID-based sharding for metadata (consistent with noun vectors)
645
+ const shardId = getShardIdFromUuid(id);
586
646
  const fileName = `${id}.json`;
587
647
  try {
588
- const fileHandle = await this.nounMetadataDir.getFileHandle(fileName);
648
+ // Get the shard directory
649
+ const shardDir = await this.nounMetadataDir.getDirectoryHandle(shardId);
650
+ // Get the file from the shard directory
651
+ const fileHandle = await shardDir.getFileHandle(fileName);
589
652
  const file = await safeGetFile(fileHandle);
590
653
  const text = await file.text();
591
654
  return JSON.parse(text);
@@ -1117,12 +1180,19 @@ export class OPFSStorage extends BaseStorage {
1117
1180
  await this.ensureInitialized();
1118
1181
  const limit = options.limit || 100;
1119
1182
  const cursor = options.cursor;
1120
- // Get all noun files
1183
+ // Get all noun files from all shards
1121
1184
  const nounFiles = [];
1122
1185
  if (this.nounsDir) {
1123
- for await (const [name, handle] of this.nounsDir.entries()) {
1124
- if (handle.kind === 'file' && name.endsWith('.json')) {
1125
- nounFiles.push(name);
1186
+ // Iterate through all shard directories
1187
+ for await (const [shardName, shardHandle] of this.nounsDir.entries()) {
1188
+ if (shardHandle.kind === 'directory') {
1189
+ // Iterate through files in this shard
1190
+ const shardDir = shardHandle;
1191
+ for await (const [fileName, fileHandle] of shardDir.entries()) {
1192
+ if (fileHandle.kind === 'file' && fileName.endsWith('.json')) {
1193
+ nounFiles.push(`${shardName}/${fileName}`);
1194
+ }
1195
+ }
1126
1196
  }
1127
1197
  }
1128
1198
  }
@@ -1141,7 +1211,8 @@ export class OPFSStorage extends BaseStorage {
1141
1211
  // Load nouns from files
1142
1212
  const items = [];
1143
1213
  for (const fileName of pageFiles) {
1144
- const id = fileName.replace('.json', '');
1214
+ // fileName is in format "shard/uuid.json", extract just the UUID
1215
+ const id = fileName.split('/')[1].replace('.json', '');
1145
1216
  const noun = await this.getNoun_internal(id);
1146
1217
  if (noun) {
1147
1218
  // Apply filters if provided
@@ -1205,12 +1276,19 @@ export class OPFSStorage extends BaseStorage {
1205
1276
  await this.ensureInitialized();
1206
1277
  const limit = options.limit || 100;
1207
1278
  const cursor = options.cursor;
1208
- // Get all verb files
1279
+ // Get all verb files from all shards
1209
1280
  const verbFiles = [];
1210
1281
  if (this.verbsDir) {
1211
- for await (const [name, handle] of this.verbsDir.entries()) {
1212
- if (handle.kind === 'file' && name.endsWith('.json')) {
1213
- verbFiles.push(name);
1282
+ // Iterate through all shard directories
1283
+ for await (const [shardName, shardHandle] of this.verbsDir.entries()) {
1284
+ if (shardHandle.kind === 'directory') {
1285
+ // Iterate through files in this shard
1286
+ const shardDir = shardHandle;
1287
+ for await (const [fileName, fileHandle] of shardDir.entries()) {
1288
+ if (fileHandle.kind === 'file' && fileName.endsWith('.json')) {
1289
+ verbFiles.push(`${shardName}/${fileName}`);
1290
+ }
1291
+ }
1214
1292
  }
1215
1293
  }
1216
1294
  }
@@ -1229,7 +1307,8 @@ export class OPFSStorage extends BaseStorage {
1229
1307
  // Load verbs from files and convert to GraphVerb
1230
1308
  const items = [];
1231
1309
  for (const fileName of pageFiles) {
1232
- const id = fileName.replace('.json', '');
1310
+ // fileName is in format "shard/uuid.json", extract just the UUID
1311
+ const id = fileName.split('/')[1].replace('.json', '');
1233
1312
  const hnswVerb = await this.getVerb_internal(id);
1234
1313
  if (hnswVerb) {
1235
1314
  // Convert HNSWVerb to GraphVerb
@@ -1330,16 +1409,26 @@ export class OPFSStorage extends BaseStorage {
1330
1409
  */
1331
1410
  async initializeCountsFromScan() {
1332
1411
  try {
1333
- // Count nouns
1412
+ // Count nouns across all shards
1334
1413
  let nounCount = 0;
1335
- for await (const [,] of this.nounsDir.entries()) {
1336
- nounCount++;
1414
+ for await (const [shardName, shardHandle] of this.nounsDir.entries()) {
1415
+ if (shardHandle.kind === 'directory') {
1416
+ const shardDir = shardHandle;
1417
+ for await (const [,] of shardDir.entries()) {
1418
+ nounCount++;
1419
+ }
1420
+ }
1337
1421
  }
1338
1422
  this.totalNounCount = nounCount;
1339
- // Count verbs
1423
+ // Count verbs across all shards
1340
1424
  let verbCount = 0;
1341
- for await (const [,] of this.verbsDir.entries()) {
1342
- verbCount++;
1425
+ for await (const [shardName, shardHandle] of this.verbsDir.entries()) {
1426
+ if (shardHandle.kind === 'directory') {
1427
+ const shardDir = shardHandle;
1428
+ for await (const [,] of shardDir.entries()) {
1429
+ verbCount++;
1430
+ }
1431
+ }
1343
1432
  }
1344
1433
  this.totalVerbCount = verbCount;
1345
1434
  // Save initial counts
@@ -73,7 +73,6 @@ export declare class S3CompatibleStorage extends BaseStorage {
73
73
  private nounWriteBuffer;
74
74
  private verbWriteBuffer;
75
75
  private coordinator?;
76
- private shardManager?;
77
76
  private cacheSync?;
78
77
  private readWriteSeparation?;
79
78
  private requestCoalescer;
@@ -112,7 +111,9 @@ export declare class S3CompatibleStorage extends BaseStorage {
112
111
  init(): Promise<void>;
113
112
  /**
114
113
  * Set distributed components for multi-node coordination
115
- * Zero-config: Automatically optimizes based on components provided
114
+ *
115
+ * Note: Sharding is always enabled via UUID-based prefixes (00-ff).
116
+ * ShardManager is no longer required - sharding is deterministic based on UUID.
116
117
  */
117
118
  setDistributedComponents(components: {
118
119
  coordinator?: any;
@@ -121,11 +122,25 @@ export declare class S3CompatibleStorage extends BaseStorage {
121
122
  readWriteSeparation?: any;
122
123
  }): void;
123
124
  /**
124
- * Get the S3 key for a noun, using sharding if available
125
+ * Get the S3 key for a noun using UUID-based sharding
126
+ *
127
+ * Uses first 2 hex characters of UUID for consistent sharding.
128
+ * Path format: entities/nouns/vectors/{shardId}/{uuid}.json
129
+ *
130
+ * @example
131
+ * getNounKey('ab123456-1234-5678-9abc-def012345678')
132
+ * // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
125
133
  */
126
134
  private getNounKey;
127
135
  /**
128
- * Get the S3 key for a verb, using sharding if available
136
+ * Get the S3 key for a verb using UUID-based sharding
137
+ *
138
+ * Uses first 2 hex characters of UUID for consistent sharding.
139
+ * Path format: verbs/{shardId}/{uuid}.json
140
+ *
141
+ * @example
142
+ * getVerbKey('cd987654-4321-8765-cba9-fed543210987')
143
+ * // returns 'verbs/cd/cd987654-4321-8765-cba9-fed543210987.json'
129
144
  */
130
145
  private getVerbKey;
131
146
  /**
@@ -221,9 +236,23 @@ export declare class S3CompatibleStorage extends BaseStorage {
221
236
  */
222
237
  protected getAllNodes(): Promise<HNSWNode[]>;
223
238
  /**
224
- * Get nodes with pagination
239
+ * Get nodes with pagination using UUID-based sharding
240
+ *
241
+ * Iterates through 256 UUID-based shards (00-ff) to retrieve nodes.
242
+ * Cursor format: "shardIndex:s3ContinuationToken" to support pagination across shards.
243
+ *
225
244
  * @param options Pagination options
226
245
  * @returns Promise that resolves to a paginated result of nodes
246
+ *
247
+ * @example
248
+ * // First page
249
+ * const page1 = await getNodesWithPagination({ limit: 100 })
250
+ * // page1.nodes contains up to 100 nodes
251
+ * // page1.nextCursor might be "5:some-s3-token" (currently in shard 05)
252
+ *
253
+ * // Next page
254
+ * const page2 = await getNodesWithPagination({ limit: 100, cursor: page1.nextCursor })
255
+ * // Continues from where page1 left off
227
256
  */
228
257
  protected getNodesWithPagination(options?: {
229
258
  limit?: number;
@@ -234,6 +263,10 @@ export declare class S3CompatibleStorage extends BaseStorage {
234
263
  hasMore: boolean;
235
264
  nextCursor?: string;
236
265
  }>;
266
+ /**
267
+ * Load nodes by IDs efficiently using cache or direct fetch
268
+ */
269
+ private loadNodesByIds;
237
270
  /**
238
271
  * Get nouns by noun type (internal implementation)
239
272
  * @param nounType The noun type to filter by
@@ -517,6 +550,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
517
550
  hasMore: boolean;
518
551
  nextCursor?: string;
519
552
  }>;
553
+ /**
554
+ * Estimate total noun count by listing objects across all shards
555
+ * This is more efficient than loading all nouns
556
+ */
557
+ private estimateTotalNounCount;
520
558
  /**
521
559
  * Initialize counts from S3 storage
522
560
  */