@soulcraft/brainy 3.25.2 → 3.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/storage/adapters/fileSystemStorage.js +7 -2
- package/dist/storage/adapters/gcsStorage.d.ts +334 -0
- package/dist/storage/adapters/gcsStorage.js +1181 -0
- package/dist/storage/adapters/opfsStorage.js +174 -85
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +43 -5
- package/dist/storage/adapters/s3CompatibleStorage.js +191 -86
- package/dist/storage/sharding.d.ts +103 -0
- package/dist/storage/sharding.js +137 -0
- package/dist/storage/storageFactory.d.ts +31 -4
- package/dist/storage/storageFactory.js +33 -4
- package/package.json +2 -1
|
@@ -13,6 +13,7 @@ import { getGlobalSocketManager } from '../../utils/adaptiveSocketManager.js';
|
|
|
13
13
|
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
14
14
|
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
15
15
|
import { getCoalescer } from '../../utils/requestCoalescer.js';
|
|
16
|
+
import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding.js';
|
|
16
17
|
// Export R2Storage as an alias for S3CompatibleStorage
|
|
17
18
|
export { S3CompatibleStorage as R2Storage };
|
|
18
19
|
/**
|
|
@@ -69,6 +70,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
69
70
|
// Write buffers for bulk operations
|
|
70
71
|
this.nounWriteBuffer = null;
|
|
71
72
|
this.verbWriteBuffer = null;
|
|
73
|
+
// Note: Sharding is always enabled via UUID-based prefixes (00-ff)
|
|
74
|
+
// ShardManager is no longer used - sharding is deterministic
|
|
72
75
|
// Request coalescer for deduplication
|
|
73
76
|
this.requestCoalescer = null;
|
|
74
77
|
// High-volume mode detection - MUCH more aggressive
|
|
@@ -242,17 +245,16 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
242
245
|
}
|
|
243
246
|
/**
|
|
244
247
|
* Set distributed components for multi-node coordination
|
|
245
|
-
*
|
|
248
|
+
*
|
|
249
|
+
* Note: Sharding is always enabled via UUID-based prefixes (00-ff).
|
|
250
|
+
* ShardManager is no longer required - sharding is deterministic based on UUID.
|
|
246
251
|
*/
|
|
247
252
|
setDistributedComponents(components) {
|
|
248
253
|
this.coordinator = components.coordinator;
|
|
249
|
-
this.shardManager = components.shardManager;
|
|
250
254
|
this.cacheSync = components.cacheSync;
|
|
251
255
|
this.readWriteSeparation = components.readWriteSeparation;
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
console.log(`🎯 S3 Storage: Sharding enabled with ${this.shardManager.config?.shardCount || 64} shards`);
|
|
255
|
-
}
|
|
256
|
+
// Note: UUID-based sharding is always active (256 shards: 00-ff)
|
|
257
|
+
console.log(`🎯 S3 Storage: UUID-based sharding active (256 shards: 00-ff)`);
|
|
256
258
|
if (this.coordinator) {
|
|
257
259
|
console.log(`🤝 S3 Storage: Distributed coordination active (node: ${this.coordinator.nodeId})`);
|
|
258
260
|
}
|
|
@@ -264,24 +266,32 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
264
266
|
}
|
|
265
267
|
}
|
|
266
268
|
/**
|
|
267
|
-
* Get the S3 key for a noun
|
|
269
|
+
* Get the S3 key for a noun using UUID-based sharding
|
|
270
|
+
*
|
|
271
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
272
|
+
* Path format: entities/nouns/vectors/{shardId}/{uuid}.json
|
|
273
|
+
*
|
|
274
|
+
* @example
|
|
275
|
+
* getNounKey('ab123456-1234-5678-9abc-def012345678')
|
|
276
|
+
* // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
|
|
268
277
|
*/
|
|
269
278
|
getNounKey(id) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
return `shards/${shardId}/${this.nounPrefix}${id}.json`;
|
|
273
|
-
}
|
|
274
|
-
return `${this.nounPrefix}${id}.json`;
|
|
279
|
+
const shardId = getShardIdFromUuid(id);
|
|
280
|
+
return `${this.nounPrefix}${shardId}/${id}.json`;
|
|
275
281
|
}
|
|
276
282
|
/**
|
|
277
|
-
* Get the S3 key for a verb
|
|
283
|
+
* Get the S3 key for a verb using UUID-based sharding
|
|
284
|
+
*
|
|
285
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
286
|
+
* Path format: verbs/{shardId}/{uuid}.json
|
|
287
|
+
*
|
|
288
|
+
* @example
|
|
289
|
+
* getVerbKey('cd987654-4321-8765-cba9-fed543210987')
|
|
290
|
+
* // returns 'verbs/cd/cd987654-4321-8765-cba9-fed543210987.json'
|
|
278
291
|
*/
|
|
279
292
|
getVerbKey(id) {
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
return `shards/${shardId}/${this.verbPrefix}${id}.json`;
|
|
283
|
-
}
|
|
284
|
-
return `${this.verbPrefix}${id}.json`;
|
|
293
|
+
const shardId = getShardIdFromUuid(id);
|
|
294
|
+
return `${this.verbPrefix}${shardId}/${id}.json`;
|
|
285
295
|
}
|
|
286
296
|
/**
|
|
287
297
|
* Override base class method to detect S3-specific throttling errors
|
|
@@ -775,7 +785,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
775
785
|
try {
|
|
776
786
|
// Import the GetObjectCommand only when needed
|
|
777
787
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
778
|
-
|
|
788
|
+
// Use getNounKey() to properly handle sharding
|
|
789
|
+
const key = this.getNounKey(id);
|
|
779
790
|
this.logger.trace(`Getting node ${id} from key: ${key}`);
|
|
780
791
|
// Try to get the node from the nouns directory
|
|
781
792
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
@@ -853,86 +864,97 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
853
864
|
}
|
|
854
865
|
}
|
|
855
866
|
/**
|
|
856
|
-
* Get nodes with pagination
|
|
867
|
+
* Get nodes with pagination using UUID-based sharding
|
|
868
|
+
*
|
|
869
|
+
* Iterates through 256 UUID-based shards (00-ff) to retrieve nodes.
|
|
870
|
+
* Cursor format: "shardIndex:s3ContinuationToken" to support pagination across shards.
|
|
871
|
+
*
|
|
857
872
|
* @param options Pagination options
|
|
858
873
|
* @returns Promise that resolves to a paginated result of nodes
|
|
874
|
+
*
|
|
875
|
+
* @example
|
|
876
|
+
* // First page
|
|
877
|
+
* const page1 = await getNodesWithPagination({ limit: 100 })
|
|
878
|
+
* // page1.nodes contains up to 100 nodes
|
|
879
|
+
* // page1.nextCursor might be "5:some-s3-token" (currently in shard 05)
|
|
880
|
+
*
|
|
881
|
+
* // Next page
|
|
882
|
+
* const page2 = await getNodesWithPagination({ limit: 100, cursor: page1.nextCursor })
|
|
883
|
+
* // Continues from where page1 left off
|
|
859
884
|
*/
|
|
860
885
|
async getNodesWithPagination(options = {}) {
|
|
861
886
|
await this.ensureInitialized();
|
|
862
887
|
const limit = options.limit || 100;
|
|
863
888
|
const useCache = options.useCache !== false;
|
|
864
889
|
try {
|
|
865
|
-
// Import the ListObjectsV2Command and GetObjectCommand only when needed
|
|
866
890
|
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
867
|
-
// List objects with pagination
|
|
868
|
-
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
869
|
-
Bucket: this.bucketName,
|
|
870
|
-
Prefix: this.nounPrefix,
|
|
871
|
-
MaxKeys: limit,
|
|
872
|
-
ContinuationToken: options.cursor
|
|
873
|
-
}));
|
|
874
|
-
// If listResponse is null/undefined or there are no objects, return an empty result
|
|
875
|
-
if (!listResponse ||
|
|
876
|
-
!listResponse.Contents ||
|
|
877
|
-
listResponse.Contents.length === 0) {
|
|
878
|
-
return {
|
|
879
|
-
nodes: [],
|
|
880
|
-
hasMore: false
|
|
881
|
-
};
|
|
882
|
-
}
|
|
883
|
-
// Extract node IDs from the keys
|
|
884
|
-
const nodeIds = listResponse.Contents
|
|
885
|
-
.filter((object) => object && object.Key)
|
|
886
|
-
.map((object) => object.Key.replace(this.nounPrefix, '').replace('.json', ''));
|
|
887
|
-
// Use the cache manager to get nodes efficiently
|
|
888
891
|
const nodes = [];
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
//
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
return null;
|
|
892
|
+
// Parse cursor (format: "shardIndex:s3ContinuationToken")
|
|
893
|
+
let startShardIndex = 0;
|
|
894
|
+
let s3ContinuationToken;
|
|
895
|
+
if (options.cursor) {
|
|
896
|
+
const parts = options.cursor.split(':', 2);
|
|
897
|
+
startShardIndex = parseInt(parts[0]) || 0;
|
|
898
|
+
s3ContinuationToken = parts[1] || undefined;
|
|
899
|
+
}
|
|
900
|
+
// Iterate through shards starting from cursor position
|
|
901
|
+
for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
902
|
+
const shardId = getShardIdByIndex(shardIndex);
|
|
903
|
+
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
904
|
+
// List objects in this shard
|
|
905
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
906
|
+
Bucket: this.bucketName,
|
|
907
|
+
Prefix: shardPrefix,
|
|
908
|
+
MaxKeys: limit - nodes.length,
|
|
909
|
+
ContinuationToken: shardIndex === startShardIndex ? s3ContinuationToken : undefined
|
|
910
|
+
}));
|
|
911
|
+
// Extract node IDs from keys
|
|
912
|
+
if (listResponse.Contents && listResponse.Contents.length > 0) {
|
|
913
|
+
const nodeIds = listResponse.Contents
|
|
914
|
+
.filter((obj) => obj && obj.Key)
|
|
915
|
+
.map((obj) => {
|
|
916
|
+
// Extract UUID from: entities/nouns/vectors/ab/ab123456-uuid.json
|
|
917
|
+
let key = obj.Key;
|
|
918
|
+
if (key.startsWith(shardPrefix)) {
|
|
919
|
+
key = key.substring(shardPrefix.length);
|
|
918
920
|
}
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
for (const node of batchNodes) {
|
|
922
|
-
if (node) {
|
|
923
|
-
nodes.push(node);
|
|
921
|
+
if (key.endsWith('.json')) {
|
|
922
|
+
key = key.substring(0, key.length - 5);
|
|
924
923
|
}
|
|
925
|
-
|
|
924
|
+
return key;
|
|
925
|
+
});
|
|
926
|
+
// Load nodes for this shard (use direct loading for pagination scans)
|
|
927
|
+
const shardNodes = await this.loadNodesByIds(nodeIds, false);
|
|
928
|
+
nodes.push(...shardNodes);
|
|
929
|
+
}
|
|
930
|
+
// Check if we've reached the limit
|
|
931
|
+
if (nodes.length >= limit) {
|
|
932
|
+
const hasMore = !!listResponse.IsTruncated || shardIndex < TOTAL_SHARDS - 1;
|
|
933
|
+
const nextCursor = listResponse.IsTruncated
|
|
934
|
+
? `${shardIndex}:${listResponse.NextContinuationToken}`
|
|
935
|
+
: shardIndex < TOTAL_SHARDS - 1
|
|
936
|
+
? `${shardIndex + 1}:`
|
|
937
|
+
: undefined;
|
|
938
|
+
return {
|
|
939
|
+
nodes: nodes.slice(0, limit),
|
|
940
|
+
hasMore,
|
|
941
|
+
nextCursor
|
|
942
|
+
};
|
|
943
|
+
}
|
|
944
|
+
// If this shard has more data but we haven't hit limit, continue to next shard
|
|
945
|
+
if (listResponse.IsTruncated) {
|
|
946
|
+
return {
|
|
947
|
+
nodes,
|
|
948
|
+
hasMore: true,
|
|
949
|
+
nextCursor: `${shardIndex}:${listResponse.NextContinuationToken}`
|
|
950
|
+
};
|
|
926
951
|
}
|
|
927
952
|
}
|
|
928
|
-
//
|
|
929
|
-
const hasMore = !!listResponse.IsTruncated;
|
|
930
|
-
// Set next cursor if there are more nodes
|
|
931
|
-
const nextCursor = listResponse.NextContinuationToken;
|
|
953
|
+
// All shards exhausted
|
|
932
954
|
return {
|
|
933
955
|
nodes,
|
|
934
|
-
hasMore,
|
|
935
|
-
nextCursor
|
|
956
|
+
hasMore: false,
|
|
957
|
+
nextCursor: undefined
|
|
936
958
|
};
|
|
937
959
|
}
|
|
938
960
|
catch (error) {
|
|
@@ -943,6 +965,43 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
943
965
|
};
|
|
944
966
|
}
|
|
945
967
|
}
|
|
968
|
+
/**
|
|
969
|
+
* Load nodes by IDs efficiently using cache or direct fetch
|
|
970
|
+
*/
|
|
971
|
+
async loadNodesByIds(nodeIds, useCache) {
|
|
972
|
+
const nodes = [];
|
|
973
|
+
if (useCache) {
|
|
974
|
+
const cachedNodes = await this.nounCacheManager.getMany(nodeIds);
|
|
975
|
+
for (const id of nodeIds) {
|
|
976
|
+
const node = cachedNodes.get(id);
|
|
977
|
+
if (node) {
|
|
978
|
+
nodes.push(node);
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
else {
|
|
983
|
+
// Load directly in batches
|
|
984
|
+
const batchSize = 50;
|
|
985
|
+
for (let i = 0; i < nodeIds.length; i += batchSize) {
|
|
986
|
+
const batch = nodeIds.slice(i, i + batchSize);
|
|
987
|
+
const batchNodes = await Promise.all(batch.map(async (id) => {
|
|
988
|
+
try {
|
|
989
|
+
return await this.getNoun_internal(id);
|
|
990
|
+
}
|
|
991
|
+
catch (error) {
|
|
992
|
+
this.logger.warn(`Failed to load node ${id}:`, error);
|
|
993
|
+
return null;
|
|
994
|
+
}
|
|
995
|
+
}));
|
|
996
|
+
for (const node of batchNodes) {
|
|
997
|
+
if (node) {
|
|
998
|
+
nodes.push(node);
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
return nodes;
|
|
1004
|
+
}
|
|
946
1005
|
/**
|
|
947
1006
|
* Get nouns by noun type (internal implementation)
|
|
948
1007
|
* @param nounType The noun type to filter by
|
|
@@ -1098,7 +1157,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1098
1157
|
try {
|
|
1099
1158
|
// Import the GetObjectCommand only when needed
|
|
1100
1159
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1101
|
-
const key =
|
|
1160
|
+
const key = this.getVerbKey(id);
|
|
1102
1161
|
this.logger.trace(`Getting edge ${id} from key: ${key}`);
|
|
1103
1162
|
// Try to get the edge from the verbs directory
|
|
1104
1163
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
@@ -1572,7 +1631,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1572
1631
|
try {
|
|
1573
1632
|
// Import the PutObjectCommand only when needed
|
|
1574
1633
|
const { PutObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1575
|
-
|
|
1634
|
+
// Use UUID-based sharding for metadata (consistent with noun vectors)
|
|
1635
|
+
const shardId = getShardIdFromUuid(id);
|
|
1636
|
+
const key = `${this.metadataPrefix}${shardId}/${id}.json`;
|
|
1576
1637
|
const body = JSON.stringify(metadata, null, 2);
|
|
1577
1638
|
this.logger.trace(`Saving noun metadata for ${id} to key: ${key}`);
|
|
1578
1639
|
// Save the noun metadata to S3-compatible storage
|
|
@@ -1701,7 +1762,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1701
1762
|
try {
|
|
1702
1763
|
// Import the GetObjectCommand only when needed
|
|
1703
1764
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1704
|
-
|
|
1765
|
+
// Use UUID-based sharding for metadata (consistent with noun vectors)
|
|
1766
|
+
const shardId = getShardIdFromUuid(id);
|
|
1767
|
+
const key = `${this.metadataPrefix}${shardId}/${id}.json`;
|
|
1705
1768
|
this.logger.trace(`Getting noun metadata for ${id} from key: ${key}`);
|
|
1706
1769
|
// Try to get the noun metadata
|
|
1707
1770
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
@@ -2698,12 +2761,54 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
2698
2761
|
filteredNodes = filteredByMetadata;
|
|
2699
2762
|
}
|
|
2700
2763
|
}
|
|
2764
|
+
// Calculate total count efficiently
|
|
2765
|
+
// For the first page (no cursor), we can estimate total count
|
|
2766
|
+
let totalCount;
|
|
2767
|
+
if (!cursor) {
|
|
2768
|
+
try {
|
|
2769
|
+
totalCount = await this.estimateTotalNounCount();
|
|
2770
|
+
}
|
|
2771
|
+
catch (error) {
|
|
2772
|
+
this.logger.warn('Failed to estimate total noun count:', error);
|
|
2773
|
+
// totalCount remains undefined
|
|
2774
|
+
}
|
|
2775
|
+
}
|
|
2701
2776
|
return {
|
|
2702
2777
|
items: filteredNodes,
|
|
2778
|
+
totalCount,
|
|
2703
2779
|
hasMore: result.hasMore,
|
|
2704
2780
|
nextCursor: result.nextCursor
|
|
2705
2781
|
};
|
|
2706
2782
|
}
|
|
2783
|
+
/**
|
|
2784
|
+
* Estimate total noun count by listing objects across all shards
|
|
2785
|
+
* This is more efficient than loading all nouns
|
|
2786
|
+
*/
|
|
2787
|
+
async estimateTotalNounCount() {
|
|
2788
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
2789
|
+
let totalCount = 0;
|
|
2790
|
+
// Count across all UUID-based shards (00-ff)
|
|
2791
|
+
for (let shardIndex = 0; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
2792
|
+
const shardId = getShardIdByIndex(shardIndex);
|
|
2793
|
+
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
2794
|
+
let shardCursor;
|
|
2795
|
+
let hasMore = true;
|
|
2796
|
+
while (hasMore) {
|
|
2797
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
2798
|
+
Bucket: this.bucketName,
|
|
2799
|
+
Prefix: shardPrefix,
|
|
2800
|
+
MaxKeys: 1000,
|
|
2801
|
+
ContinuationToken: shardCursor
|
|
2802
|
+
}));
|
|
2803
|
+
if (listResponse.Contents) {
|
|
2804
|
+
totalCount += listResponse.Contents.length;
|
|
2805
|
+
}
|
|
2806
|
+
hasMore = !!listResponse.IsTruncated;
|
|
2807
|
+
shardCursor = listResponse.NextContinuationToken;
|
|
2808
|
+
}
|
|
2809
|
+
}
|
|
2810
|
+
return totalCount;
|
|
2811
|
+
}
|
|
2707
2812
|
/**
|
|
2708
2813
|
* Initialize counts from S3 storage
|
|
2709
2814
|
*/
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified UUID-based sharding for all storage adapters
|
|
3
|
+
*
|
|
4
|
+
* Uses first 2 hex characters of UUID for consistent, predictable sharding
|
|
5
|
+
* that scales from hundreds to millions of entities without configuration.
|
|
6
|
+
*
|
|
7
|
+
* Sharding characteristics:
|
|
8
|
+
* - 256 buckets (00-ff)
|
|
9
|
+
* - Deterministic (same UUID always maps to same shard)
|
|
10
|
+
* - No configuration required
|
|
11
|
+
* - Works across all storage types (filesystem, S3, GCS, memory)
|
|
12
|
+
* - Efficient for list operations and pagination
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Extract shard ID from UUID
|
|
16
|
+
*
|
|
17
|
+
* Uses first 2 hex characters of the UUID as the shard ID.
|
|
18
|
+
* This provides 256 evenly-distributed buckets (00-ff).
|
|
19
|
+
*
|
|
20
|
+
* @param uuid - UUID string (with or without hyphens)
|
|
21
|
+
* @returns 2-character hex shard ID (00-ff)
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* getShardIdFromUuid('ab123456-1234-5678-9abc-def012345678') // returns 'ab'
|
|
26
|
+
* getShardIdFromUuid('cd987654-4321-8765-cba9-fed543210987') // returns 'cd'
|
|
27
|
+
* getShardIdFromUuid('00000000-0000-0000-0000-000000000000') // returns '00'
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export declare function getShardIdFromUuid(uuid: string): string;
|
|
31
|
+
/**
|
|
32
|
+
* Get all possible shard IDs (00-ff)
|
|
33
|
+
*
|
|
34
|
+
* Returns array of 256 shard IDs in ascending order.
|
|
35
|
+
* Useful for iterating through all shards during pagination.
|
|
36
|
+
*
|
|
37
|
+
* @returns Array of 256 shard IDs
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```typescript
|
|
41
|
+
* const shards = getAllShardIds()
|
|
42
|
+
* // ['00', '01', '02', ..., 'fd', 'fe', 'ff']
|
|
43
|
+
*
|
|
44
|
+
* for (const shardId of shards) {
|
|
45
|
+
* const prefix = `entities/nouns/vectors/${shardId}/`
|
|
46
|
+
* // List objects with this prefix
|
|
47
|
+
* }
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
export declare function getAllShardIds(): string[];
|
|
51
|
+
/**
|
|
52
|
+
* Get shard ID for a given index (0-255)
|
|
53
|
+
*
|
|
54
|
+
* @param index - Shard index (0-255)
|
|
55
|
+
* @returns 2-character hex shard ID
|
|
56
|
+
*
|
|
57
|
+
* @example
|
|
58
|
+
* ```typescript
|
|
59
|
+
* getShardIdByIndex(0) // '00'
|
|
60
|
+
* getShardIdByIndex(15) // '0f'
|
|
61
|
+
* getShardIdByIndex(255) // 'ff'
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
export declare function getShardIdByIndex(index: number): string;
|
|
65
|
+
/**
|
|
66
|
+
* Get shard index from shard ID (0-255)
|
|
67
|
+
*
|
|
68
|
+
* @param shardId - 2-character hex shard ID
|
|
69
|
+
* @returns Shard index (0-255)
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```typescript
|
|
73
|
+
* getShardIndexFromId('00') // 0
|
|
74
|
+
* getShardIndexFromId('0f') // 15
|
|
75
|
+
* getShardIndexFromId('ff') // 255
|
|
76
|
+
* ```
|
|
77
|
+
*/
|
|
78
|
+
export declare function getShardIndexFromId(shardId: string): number;
|
|
79
|
+
/**
|
|
80
|
+
* Total number of shards in the system
|
|
81
|
+
*/
|
|
82
|
+
export declare const TOTAL_SHARDS = 256;
|
|
83
|
+
/**
|
|
84
|
+
* Shard configuration (read-only)
|
|
85
|
+
*/
|
|
86
|
+
export declare const SHARD_CONFIG: {
|
|
87
|
+
/**
|
|
88
|
+
* Total number of shards (256)
|
|
89
|
+
*/
|
|
90
|
+
readonly count: 256;
|
|
91
|
+
/**
|
|
92
|
+
* Number of hex characters used for sharding (2)
|
|
93
|
+
*/
|
|
94
|
+
readonly prefixLength: 2;
|
|
95
|
+
/**
|
|
96
|
+
* Sharding method description
|
|
97
|
+
*/
|
|
98
|
+
readonly method: "uuid-prefix";
|
|
99
|
+
/**
|
|
100
|
+
* Whether sharding is always enabled
|
|
101
|
+
*/
|
|
102
|
+
readonly alwaysEnabled: true;
|
|
103
|
+
};
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified UUID-based sharding for all storage adapters
|
|
3
|
+
*
|
|
4
|
+
* Uses first 2 hex characters of UUID for consistent, predictable sharding
|
|
5
|
+
* that scales from hundreds to millions of entities without configuration.
|
|
6
|
+
*
|
|
7
|
+
* Sharding characteristics:
|
|
8
|
+
* - 256 buckets (00-ff)
|
|
9
|
+
* - Deterministic (same UUID always maps to same shard)
|
|
10
|
+
* - No configuration required
|
|
11
|
+
* - Works across all storage types (filesystem, S3, GCS, memory)
|
|
12
|
+
* - Efficient for list operations and pagination
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Extract shard ID from UUID
|
|
16
|
+
*
|
|
17
|
+
* Uses first 2 hex characters of the UUID as the shard ID.
|
|
18
|
+
* This provides 256 evenly-distributed buckets (00-ff).
|
|
19
|
+
*
|
|
20
|
+
* @param uuid - UUID string (with or without hyphens)
|
|
21
|
+
* @returns 2-character hex shard ID (00-ff)
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* getShardIdFromUuid('ab123456-1234-5678-9abc-def012345678') // returns 'ab'
|
|
26
|
+
* getShardIdFromUuid('cd987654-4321-8765-cba9-fed543210987') // returns 'cd'
|
|
27
|
+
* getShardIdFromUuid('00000000-0000-0000-0000-000000000000') // returns '00'
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export function getShardIdFromUuid(uuid) {
|
|
31
|
+
if (!uuid) {
|
|
32
|
+
throw new Error('UUID is required for sharding');
|
|
33
|
+
}
|
|
34
|
+
// Remove hyphens and convert to lowercase
|
|
35
|
+
const normalized = uuid.toLowerCase().replace(/-/g, '');
|
|
36
|
+
// Validate UUID format (32 hex characters)
|
|
37
|
+
if (normalized.length !== 32) {
|
|
38
|
+
throw new Error(`Invalid UUID format: ${uuid} (expected 32 hex chars, got ${normalized.length})`);
|
|
39
|
+
}
|
|
40
|
+
// Extract first 2 characters
|
|
41
|
+
const shardId = normalized.substring(0, 2);
|
|
42
|
+
// Validate hex format
|
|
43
|
+
if (!/^[0-9a-f]{2}$/.test(shardId)) {
|
|
44
|
+
throw new Error(`Invalid UUID prefix: ${shardId} (expected 2 hex chars)`);
|
|
45
|
+
}
|
|
46
|
+
return shardId;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get all possible shard IDs (00-ff)
|
|
50
|
+
*
|
|
51
|
+
* Returns array of 256 shard IDs in ascending order.
|
|
52
|
+
* Useful for iterating through all shards during pagination.
|
|
53
|
+
*
|
|
54
|
+
* @returns Array of 256 shard IDs
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
* ```typescript
|
|
58
|
+
* const shards = getAllShardIds()
|
|
59
|
+
* // ['00', '01', '02', ..., 'fd', 'fe', 'ff']
|
|
60
|
+
*
|
|
61
|
+
* for (const shardId of shards) {
|
|
62
|
+
* const prefix = `entities/nouns/vectors/${shardId}/`
|
|
63
|
+
* // List objects with this prefix
|
|
64
|
+
* }
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export function getAllShardIds() {
|
|
68
|
+
const shards = [];
|
|
69
|
+
for (let i = 0; i < 256; i++) {
|
|
70
|
+
shards.push(i.toString(16).padStart(2, '0'));
|
|
71
|
+
}
|
|
72
|
+
return shards;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Get shard ID for a given index (0-255)
|
|
76
|
+
*
|
|
77
|
+
* @param index - Shard index (0-255)
|
|
78
|
+
* @returns 2-character hex shard ID
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* ```typescript
|
|
82
|
+
* getShardIdByIndex(0) // '00'
|
|
83
|
+
* getShardIdByIndex(15) // '0f'
|
|
84
|
+
* getShardIdByIndex(255) // 'ff'
|
|
85
|
+
* ```
|
|
86
|
+
*/
|
|
87
|
+
export function getShardIdByIndex(index) {
|
|
88
|
+
if (index < 0 || index > 255) {
|
|
89
|
+
throw new Error(`Shard index out of range: ${index} (expected 0-255)`);
|
|
90
|
+
}
|
|
91
|
+
return index.toString(16).padStart(2, '0');
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Get shard index from shard ID (0-255)
|
|
95
|
+
*
|
|
96
|
+
* @param shardId - 2-character hex shard ID
|
|
97
|
+
* @returns Shard index (0-255)
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```typescript
|
|
101
|
+
* getShardIndexFromId('00') // 0
|
|
102
|
+
* getShardIndexFromId('0f') // 15
|
|
103
|
+
* getShardIndexFromId('ff') // 255
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
export function getShardIndexFromId(shardId) {
|
|
107
|
+
if (!/^[0-9a-f]{2}$/.test(shardId)) {
|
|
108
|
+
throw new Error(`Invalid shard ID: ${shardId} (expected 2 hex chars)`);
|
|
109
|
+
}
|
|
110
|
+
return parseInt(shardId, 16);
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Total number of shards in the system
|
|
114
|
+
*/
|
|
115
|
+
export const TOTAL_SHARDS = 256;
|
|
116
|
+
/**
|
|
117
|
+
* Shard configuration (read-only)
|
|
118
|
+
*/
|
|
119
|
+
export const SHARD_CONFIG = {
|
|
120
|
+
/**
|
|
121
|
+
* Total number of shards (256)
|
|
122
|
+
*/
|
|
123
|
+
count: TOTAL_SHARDS,
|
|
124
|
+
/**
|
|
125
|
+
* Number of hex characters used for sharding (2)
|
|
126
|
+
*/
|
|
127
|
+
prefixLength: 2,
|
|
128
|
+
/**
|
|
129
|
+
* Sharding method description
|
|
130
|
+
*/
|
|
131
|
+
method: 'uuid-prefix',
|
|
132
|
+
/**
|
|
133
|
+
* Whether sharding is always enabled
|
|
134
|
+
*/
|
|
135
|
+
alwaysEnabled: true
|
|
136
|
+
};
|
|
137
|
+
//# sourceMappingURL=sharding.js.map
|