@soulcraft/brainy 4.11.1 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +163 -2
- package/README.md +37 -0
- package/dist/augmentations/brainyAugmentation.d.ts +76 -0
- package/dist/augmentations/brainyAugmentation.js +126 -0
- package/dist/brainy.d.ts +161 -0
- package/dist/brainy.js +451 -0
- package/dist/cli/commands/cow.d.ts +60 -0
- package/dist/cli/commands/cow.js +444 -0
- package/dist/cli/index.js +50 -0
- package/dist/hnsw/hnswIndex.d.ts +41 -0
- package/dist/hnsw/hnswIndex.js +96 -1
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +9 -0
- package/dist/hnsw/typeAwareHNSWIndex.js +22 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +10 -0
- package/dist/neural/signals/PatternSignal.js +7 -1
- package/dist/storage/baseStorage.d.ts +21 -0
- package/dist/storage/baseStorage.js +108 -0
- package/dist/storage/cow/BlobStorage.d.ts +231 -0
- package/dist/storage/cow/BlobStorage.js +435 -0
- package/dist/storage/cow/CommitLog.d.ts +199 -0
- package/dist/storage/cow/CommitLog.js +363 -0
- package/dist/storage/cow/CommitObject.d.ts +276 -0
- package/dist/storage/cow/CommitObject.js +431 -0
- package/dist/storage/cow/RefManager.d.ts +213 -0
- package/dist/storage/cow/RefManager.js +409 -0
- package/dist/storage/cow/TreeObject.d.ts +177 -0
- package/dist/storage/cow/TreeObject.js +293 -0
- package/dist/storage/storageFactory.d.ts +7 -0
- package/dist/storage/storageFactory.js +91 -74
- package/dist/types/brainy.types.d.ts +2 -0
- package/package.json +1 -1
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -13,7 +13,6 @@ const DEFAULT_CONFIG = {
|
|
|
13
13
|
ml: 16 // Max level
|
|
14
14
|
};
|
|
15
15
|
export class HNSWIndex {
|
|
16
|
-
// Always-adaptive caching (v3.36.0+) - no "mode" concept, system adapts automatically
|
|
17
16
|
constructor(config = {}, distanceFunction = euclideanDistance, options = {}) {
|
|
18
17
|
this.nouns = new Map();
|
|
19
18
|
this.entryPointId = null;
|
|
@@ -24,6 +23,11 @@ export class HNSWIndex {
|
|
|
24
23
|
this.dimension = null;
|
|
25
24
|
this.useParallelization = true; // Whether to use parallelization for performance-critical operations
|
|
26
25
|
this.storage = null; // Storage adapter for HNSW persistence (v3.35.0+)
|
|
26
|
+
// Always-adaptive caching (v3.36.0+) - no "mode" concept, system adapts automatically
|
|
27
|
+
// COW (Copy-on-Write) support - v5.0.0
|
|
28
|
+
this.cowEnabled = false;
|
|
29
|
+
this.cowModifiedNodes = new Set();
|
|
30
|
+
this.cowParent = null;
|
|
27
31
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
28
32
|
this.distanceFunction = distanceFunction;
|
|
29
33
|
this.useParallelization =
|
|
@@ -46,6 +50,87 @@ export class HNSWIndex {
|
|
|
46
50
|
getUseParallelization() {
|
|
47
51
|
return this.useParallelization;
|
|
48
52
|
}
|
|
53
|
+
/**
|
|
54
|
+
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
55
|
+
*
|
|
56
|
+
* Snowflake-style instant fork: O(1) shallow copy of Maps, lazy deep copy on write.
|
|
57
|
+
*
|
|
58
|
+
* @param parent - Parent HNSW index to copy from
|
|
59
|
+
*
|
|
60
|
+
* Performance:
|
|
61
|
+
* - Fork time: <10ms for 1M+ nodes (just copies Map references)
|
|
62
|
+
* - Memory: Shared reads, only modified nodes duplicated (~10-20% overhead)
|
|
63
|
+
* - Reads: Same speed as parent (shared data structures)
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* const parent = new HNSWIndex(config)
|
|
68
|
+
* // ... parent has 1M nodes ...
|
|
69
|
+
*
|
|
70
|
+
* const fork = new HNSWIndex(config)
|
|
71
|
+
* fork.enableCOW(parent) // <10ms - instant!
|
|
72
|
+
*
|
|
73
|
+
* // Reads share data
|
|
74
|
+
* await fork.search(query) // Fast, uses parent's data
|
|
75
|
+
*
|
|
76
|
+
* // Writes trigger COW
|
|
77
|
+
* await fork.addItem(newItem) // Deep copies only modified nodes
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
enableCOW(parent) {
|
|
81
|
+
this.cowEnabled = true;
|
|
82
|
+
this.cowParent = parent;
|
|
83
|
+
// Shallow copy Maps - O(1) per Map, just copies references
|
|
84
|
+
// All nodes/connections are shared until first write
|
|
85
|
+
this.nouns = new Map(parent.nouns);
|
|
86
|
+
this.highLevelNodes = new Map();
|
|
87
|
+
for (const [level, nodeSet] of parent.highLevelNodes.entries()) {
|
|
88
|
+
this.highLevelNodes.set(level, new Set(nodeSet));
|
|
89
|
+
}
|
|
90
|
+
// Copy scalar values
|
|
91
|
+
this.entryPointId = parent.entryPointId;
|
|
92
|
+
this.maxLevel = parent.maxLevel;
|
|
93
|
+
this.dimension = parent.dimension;
|
|
94
|
+
// Share cache (COW at cache level)
|
|
95
|
+
this.unifiedCache = parent.unifiedCache;
|
|
96
|
+
// Share config and distance function
|
|
97
|
+
this.config = parent.config;
|
|
98
|
+
this.distanceFunction = parent.distanceFunction;
|
|
99
|
+
this.useParallelization = parent.useParallelization;
|
|
100
|
+
prodLog.info(`HNSW COW enabled: ${parent.nouns.size} nodes shallow copied`);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Ensure node is copied before modification (lazy COW)
|
|
104
|
+
*
|
|
105
|
+
* Deep copies a node only when first modified. Subsequent modifications
|
|
106
|
+
* use the already-copied node.
|
|
107
|
+
*
|
|
108
|
+
* @param nodeId - Node ID to ensure is copied
|
|
109
|
+
* @private
|
|
110
|
+
*/
|
|
111
|
+
ensureCOW(nodeId) {
|
|
112
|
+
if (!this.cowEnabled)
|
|
113
|
+
return;
|
|
114
|
+
if (this.cowModifiedNodes.has(nodeId))
|
|
115
|
+
return; // Already copied
|
|
116
|
+
const original = this.nouns.get(nodeId);
|
|
117
|
+
if (!original)
|
|
118
|
+
return;
|
|
119
|
+
// Deep copy connections Map (separate Map + Sets for each level)
|
|
120
|
+
const connectionsCopy = new Map();
|
|
121
|
+
for (const [level, ids] of original.connections.entries()) {
|
|
122
|
+
connectionsCopy.set(level, new Set(ids));
|
|
123
|
+
}
|
|
124
|
+
// Deep copy node
|
|
125
|
+
const nodeCopy = {
|
|
126
|
+
id: original.id,
|
|
127
|
+
vector: [...original.vector], // Deep copy vector array
|
|
128
|
+
connections: connectionsCopy,
|
|
129
|
+
level: original.level
|
|
130
|
+
};
|
|
131
|
+
this.nouns.set(nodeId, nodeCopy);
|
|
132
|
+
this.cowModifiedNodes.add(nodeId);
|
|
133
|
+
}
|
|
49
134
|
/**
|
|
50
135
|
* Calculate distances between a query vector and multiple vectors in parallel
|
|
51
136
|
* This is used to optimize performance for search operations
|
|
@@ -186,6 +271,8 @@ export class HNSWIndex {
|
|
|
186
271
|
// Skip neighbors that don't exist (expected during rapid additions/deletions)
|
|
187
272
|
continue;
|
|
188
273
|
}
|
|
274
|
+
// COW: Ensure neighbor is copied before modification
|
|
275
|
+
this.ensureCOW(neighborId);
|
|
189
276
|
noun.connections.get(level).add(neighborId);
|
|
190
277
|
// Add reverse connection
|
|
191
278
|
if (!neighbor.connections.has(level)) {
|
|
@@ -392,10 +479,14 @@ export class HNSWIndex {
|
|
|
392
479
|
if (!this.nouns.has(id)) {
|
|
393
480
|
return false;
|
|
394
481
|
}
|
|
482
|
+
// COW: Ensure node is copied before modification
|
|
483
|
+
this.ensureCOW(id);
|
|
395
484
|
const noun = this.nouns.get(id);
|
|
396
485
|
// Remove connections to this noun from all neighbors
|
|
397
486
|
for (const [level, connections] of noun.connections.entries()) {
|
|
398
487
|
for (const neighborId of connections) {
|
|
488
|
+
// COW: Ensure neighbor is copied before modification
|
|
489
|
+
this.ensureCOW(neighborId);
|
|
399
490
|
const neighbor = this.nouns.get(neighborId);
|
|
400
491
|
if (!neighbor) {
|
|
401
492
|
// Skip neighbors that don't exist (expected during rapid additions/deletions)
|
|
@@ -412,6 +503,8 @@ export class HNSWIndex {
|
|
|
412
503
|
for (const [nounId, otherNoun] of this.nouns.entries()) {
|
|
413
504
|
if (nounId === id)
|
|
414
505
|
continue; // Skip the noun being removed
|
|
506
|
+
// COW: Ensure noun is copied before modification
|
|
507
|
+
this.ensureCOW(nounId);
|
|
415
508
|
for (const [level, connections] of otherNoun.connections.entries()) {
|
|
416
509
|
if (connections.has(id)) {
|
|
417
510
|
connections.delete(id);
|
|
@@ -1109,6 +1202,8 @@ export class HNSWIndex {
|
|
|
1109
1202
|
* Ensure a noun doesn't have too many connections at a given level
|
|
1110
1203
|
*/
|
|
1111
1204
|
async pruneConnections(noun, level) {
|
|
1205
|
+
// COW: Ensure noun is copied before modification
|
|
1206
|
+
this.ensureCOW(noun.id);
|
|
1112
1207
|
const connections = noun.connections.get(level);
|
|
1113
1208
|
if (connections.size <= this.config.M) {
|
|
1114
1209
|
return;
|
|
@@ -54,6 +54,15 @@ export declare class TypeAwareHNSWIndex {
|
|
|
54
54
|
useParallelization?: boolean;
|
|
55
55
|
storage?: BaseStorage;
|
|
56
56
|
});
|
|
57
|
+
/**
|
|
58
|
+
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
59
|
+
*
|
|
60
|
+
* Propagates enableCOW() to all underlying type-specific HNSW indexes.
|
|
61
|
+
* Each index performs O(1) shallow copy of its own data structures.
|
|
62
|
+
*
|
|
63
|
+
* @param parent - Parent TypeAwareHNSWIndex to copy from
|
|
64
|
+
*/
|
|
65
|
+
enableCOW(parent: TypeAwareHNSWIndex): void;
|
|
57
66
|
/**
|
|
58
67
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
59
68
|
*
|
|
@@ -49,6 +49,28 @@ export class TypeAwareHNSWIndex {
|
|
|
49
49
|
: true;
|
|
50
50
|
prodLog.info('TypeAwareHNSWIndex initialized (Phase 2: Type-Aware HNSW)');
|
|
51
51
|
}
|
|
52
|
+
/**
|
|
53
|
+
* Enable COW (Copy-on-Write) mode - Instant fork via shallow copy
|
|
54
|
+
*
|
|
55
|
+
* Propagates enableCOW() to all underlying type-specific HNSW indexes.
|
|
56
|
+
* Each index performs O(1) shallow copy of its own data structures.
|
|
57
|
+
*
|
|
58
|
+
* @param parent - Parent TypeAwareHNSWIndex to copy from
|
|
59
|
+
*/
|
|
60
|
+
enableCOW(parent) {
|
|
61
|
+
// Shallow copy indexes Map
|
|
62
|
+
this.indexes = new Map(parent.indexes);
|
|
63
|
+
// Enable COW on each underlying type-specific index
|
|
64
|
+
for (const [type, parentIndex] of parent.indexes.entries()) {
|
|
65
|
+
const childIndex = new HNSWIndex(this.config, this.distanceFunction, {
|
|
66
|
+
useParallelization: this.useParallelization,
|
|
67
|
+
storage: this.storage || undefined
|
|
68
|
+
});
|
|
69
|
+
childIndex.enableCOW(parentIndex);
|
|
70
|
+
this.indexes.set(type, childIndex);
|
|
71
|
+
}
|
|
72
|
+
prodLog.info(`TypeAwareHNSWIndex COW enabled: ${parent.indexes.size} type-specific indexes shallow copied`);
|
|
73
|
+
}
|
|
52
74
|
/**
|
|
53
75
|
* Get or create HNSW index for a specific type (lazy initialization)
|
|
54
76
|
*
|
package/dist/index.d.ts
CHANGED
|
@@ -29,6 +29,12 @@ export { UniversalSentenceEncoder, TransformerEmbedding, createEmbeddingFunction
|
|
|
29
29
|
import { OPFSStorage, MemoryStorage, R2Storage, S3CompatibleStorage, createStorage } from './storage/storageFactory.js';
|
|
30
30
|
export { OPFSStorage, MemoryStorage, R2Storage, S3CompatibleStorage, createStorage };
|
|
31
31
|
export { FileSystemStorage } from './storage/adapters/fileSystemStorage.js';
|
|
32
|
+
import { CommitLog } from './storage/cow/CommitLog.js';
|
|
33
|
+
import { CommitObject, CommitBuilder } from './storage/cow/CommitObject.js';
|
|
34
|
+
import { BlobStorage } from './storage/cow/BlobStorage.js';
|
|
35
|
+
import { RefManager } from './storage/cow/RefManager.js';
|
|
36
|
+
import { TreeObject } from './storage/cow/TreeObject.js';
|
|
37
|
+
export { CommitLog, CommitObject, CommitBuilder, BlobStorage, RefManager, TreeObject };
|
|
32
38
|
import { Pipeline, pipeline, augmentationPipeline, ExecutionMode, PipelineOptions, PipelineResult, createPipeline, createStreamingPipeline, StreamlinedExecutionMode, StreamlinedPipelineOptions, StreamlinedPipelineResult } from './pipeline.js';
|
|
33
39
|
export { Pipeline, pipeline, augmentationPipeline, ExecutionMode, createPipeline, createStreamingPipeline, StreamlinedExecutionMode, };
|
|
34
40
|
export type { PipelineOptions, PipelineResult, StreamlinedPipelineOptions, StreamlinedPipelineResult };
|
package/dist/index.js
CHANGED
|
@@ -67,6 +67,16 @@ import { OPFSStorage, MemoryStorage, R2Storage, S3CompatibleStorage, createStora
|
|
|
67
67
|
export { OPFSStorage, MemoryStorage, R2Storage, S3CompatibleStorage, createStorage };
|
|
68
68
|
// FileSystemStorage is exported separately to avoid browser build issues
|
|
69
69
|
export { FileSystemStorage } from './storage/adapters/fileSystemStorage.js';
|
|
70
|
+
// Export COW (Copy-on-Write) infrastructure for v5.0.0
|
|
71
|
+
// Enables premium augmentations to implement temporal features
|
|
72
|
+
import { CommitLog } from './storage/cow/CommitLog.js';
|
|
73
|
+
import { CommitObject, CommitBuilder } from './storage/cow/CommitObject.js';
|
|
74
|
+
import { BlobStorage } from './storage/cow/BlobStorage.js';
|
|
75
|
+
import { RefManager } from './storage/cow/RefManager.js';
|
|
76
|
+
import { TreeObject } from './storage/cow/TreeObject.js';
|
|
77
|
+
export {
|
|
78
|
+
// COW infrastructure
|
|
79
|
+
CommitLog, CommitObject, CommitBuilder, BlobStorage, RefManager, TreeObject };
|
|
70
80
|
// Export unified pipeline
|
|
71
81
|
import { Pipeline, pipeline, augmentationPipeline, ExecutionMode, createPipeline, createStreamingPipeline, StreamlinedExecutionMode } from './pipeline.js';
|
|
72
82
|
// Sequential pipeline removed - use unified pipeline instead
|
|
@@ -73,6 +73,11 @@ export class PatternSignal {
|
|
|
73
73
|
/\b[A-Z][a-z]+,\s*[A-Z]{2}\b/, // City, State format (e.g., "Paris, FR")
|
|
74
74
|
/\b(?:street|avenue|road|boulevard|lane|drive)\b/i
|
|
75
75
|
]);
|
|
76
|
+
// Location patterns - MEDIUM PRIORITY (city/country format - requires more context)
|
|
77
|
+
// v4.11.2: Lower priority to avoid matching person names with commas
|
|
78
|
+
this.addPatterns(NounType.Location, 0.75, [
|
|
79
|
+
/\b[A-Z][a-z]+,\s*(?:Japan|China|France|Germany|Italy|Spain|Canada|Mexico|Brazil|India|Australia|Russia|UK|USA)\b/
|
|
80
|
+
]);
|
|
76
81
|
// Event patterns - HIGH PRIORITY (specific event keywords)
|
|
77
82
|
this.addPatterns(NounType.Event, 0.84, [
|
|
78
83
|
/\b(?:conference|summit|symposium|workshop|seminar|webinar)\b/i,
|
|
@@ -109,7 +114,8 @@ export class PatternSignal {
|
|
|
109
114
|
]);
|
|
110
115
|
// Technology patterns (Thing type)
|
|
111
116
|
this.addPatterns(NounType.Thing, 0.82, [
|
|
112
|
-
/\b(?:JavaScript|TypeScript|Python|Java|
|
|
117
|
+
/\b(?:JavaScript|TypeScript|Python|Java|Go|Rust|Swift|Kotlin)\b/,
|
|
118
|
+
/\bC\+\+(?!\w)/, // v4.11.2: Special handling for C++ (word boundary doesn't work with +)
|
|
113
119
|
/\b(?:React|Vue|Angular|Node|Express|Django|Flask|Rails)\b/,
|
|
114
120
|
/\b(?:AWS|Azure|GCP|Docker|Kubernetes|Git|GitHub|GitLab)\b/,
|
|
115
121
|
/\b(?:API|SDK|CLI|IDE|framework|library|package|module)\b/i,
|
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
import { GraphAdjacencyIndex } from '../graph/graphAdjacencyIndex.js';
|
|
6
6
|
import { GraphVerb, HNSWNoun, HNSWVerb, NounMetadata, VerbMetadata, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../coreTypes.js';
|
|
7
7
|
import { BaseStorageAdapter } from './adapters/baseStorageAdapter.js';
|
|
8
|
+
import { RefManager } from './cow/RefManager.js';
|
|
9
|
+
import { BlobStorage } from './cow/BlobStorage.js';
|
|
10
|
+
import { CommitLog } from './cow/CommitLog.js';
|
|
8
11
|
/**
|
|
9
12
|
* Storage adapter batch configuration profile
|
|
10
13
|
* Each storage adapter declares its optimal batch behavior for rate limiting
|
|
@@ -48,6 +51,11 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
48
51
|
protected isInitialized: boolean;
|
|
49
52
|
protected graphIndex?: GraphAdjacencyIndex;
|
|
50
53
|
protected readOnly: boolean;
|
|
54
|
+
refManager?: RefManager;
|
|
55
|
+
blobStorage?: BlobStorage;
|
|
56
|
+
commitLog?: CommitLog;
|
|
57
|
+
currentBranch: string;
|
|
58
|
+
protected cowEnabled: boolean;
|
|
51
59
|
/**
|
|
52
60
|
* Analyze a storage key to determine its routing and path
|
|
53
61
|
* @param id - The key to analyze (UUID or system key)
|
|
@@ -65,6 +73,19 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
65
73
|
* Ensure the storage adapter is initialized
|
|
66
74
|
*/
|
|
67
75
|
protected ensureInitialized(): Promise<void>;
|
|
76
|
+
/**
|
|
77
|
+
* Initialize COW (Copy-on-Write) support
|
|
78
|
+
* Creates RefManager and BlobStorage for instant fork() capability
|
|
79
|
+
*
|
|
80
|
+
* @param options - COW initialization options
|
|
81
|
+
* @param options.branch - Initial branch name (default: 'main')
|
|
82
|
+
* @param options.enableCompression - Enable zstd compression for blobs (default: true)
|
|
83
|
+
* @returns Promise that resolves when COW is initialized
|
|
84
|
+
*/
|
|
85
|
+
protected initializeCOW(options?: {
|
|
86
|
+
branch?: string;
|
|
87
|
+
enableCompression?: boolean;
|
|
88
|
+
}): Promise<void>;
|
|
68
89
|
/**
|
|
69
90
|
* Save a noun to storage (v4.0.0: vector only, metadata saved separately)
|
|
70
91
|
* @param noun Pure HNSW vector data (no metadata)
|
|
@@ -7,6 +7,9 @@ import { BaseStorageAdapter } from './adapters/baseStorageAdapter.js';
|
|
|
7
7
|
import { validateNounType, validateVerbType } from '../utils/typeValidation.js';
|
|
8
8
|
import { NounType } from '../types/graphTypes.js';
|
|
9
9
|
import { getShardIdFromUuid } from './sharding.js';
|
|
10
|
+
import { RefManager } from './cow/RefManager.js';
|
|
11
|
+
import { BlobStorage } from './cow/BlobStorage.js';
|
|
12
|
+
import { CommitLog } from './cow/CommitLog.js';
|
|
10
13
|
// Clean directory structure (v4.7.2+)
|
|
11
14
|
// All storage adapters use this consistent structure
|
|
12
15
|
export const NOUNS_METADATA_DIR = 'entities/nouns/metadata';
|
|
@@ -38,6 +41,8 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
38
41
|
super(...arguments);
|
|
39
42
|
this.isInitialized = false;
|
|
40
43
|
this.readOnly = false;
|
|
44
|
+
this.currentBranch = 'main';
|
|
45
|
+
this.cowEnabled = false;
|
|
41
46
|
}
|
|
42
47
|
/**
|
|
43
48
|
* Analyze a storage key to determine its routing and path
|
|
@@ -119,6 +124,109 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
119
124
|
await this.init();
|
|
120
125
|
}
|
|
121
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Initialize COW (Copy-on-Write) support
|
|
129
|
+
* Creates RefManager and BlobStorage for instant fork() capability
|
|
130
|
+
*
|
|
131
|
+
* @param options - COW initialization options
|
|
132
|
+
* @param options.branch - Initial branch name (default: 'main')
|
|
133
|
+
* @param options.enableCompression - Enable zstd compression for blobs (default: true)
|
|
134
|
+
* @returns Promise that resolves when COW is initialized
|
|
135
|
+
*/
|
|
136
|
+
async initializeCOW(options) {
|
|
137
|
+
if (this.cowEnabled) {
|
|
138
|
+
// Already initialized
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
// Set current branch
|
|
142
|
+
this.currentBranch = options?.branch || 'main';
|
|
143
|
+
// Create COWStorageAdapter bridge
|
|
144
|
+
// This adapts BaseStorage's methods to the simple key-value interface
|
|
145
|
+
const cowAdapter = {
|
|
146
|
+
get: async (key) => {
|
|
147
|
+
try {
|
|
148
|
+
const data = await this.readObjectFromPath(`_cow/${key}`);
|
|
149
|
+
if (data === null) {
|
|
150
|
+
return undefined;
|
|
151
|
+
}
|
|
152
|
+
// Convert to Buffer
|
|
153
|
+
if (Buffer.isBuffer(data)) {
|
|
154
|
+
return data;
|
|
155
|
+
}
|
|
156
|
+
return Buffer.from(JSON.stringify(data));
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
return undefined;
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
put: async (key, data) => {
|
|
163
|
+
// Store as Buffer (for blob data) or parse JSON (for metadata)
|
|
164
|
+
let obj;
|
|
165
|
+
try {
|
|
166
|
+
// Try to parse as JSON first (for metadata)
|
|
167
|
+
obj = JSON.parse(data.toString());
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
// Not JSON, store as binary (base64 encoded for JSON storage)
|
|
171
|
+
obj = { _binary: true, data: data.toString('base64') };
|
|
172
|
+
}
|
|
173
|
+
await this.writeObjectToPath(`_cow/${key}`, obj);
|
|
174
|
+
},
|
|
175
|
+
delete: async (key) => {
|
|
176
|
+
try {
|
|
177
|
+
await this.deleteObjectFromPath(`_cow/${key}`);
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
// Ignore if doesn't exist
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
list: async (prefix) => {
|
|
184
|
+
try {
|
|
185
|
+
const paths = await this.listObjectsUnderPath(`_cow/${prefix}`);
|
|
186
|
+
// Remove _cow/ prefix and return relative keys
|
|
187
|
+
return paths.map(p => p.replace(/^_cow\//, ''));
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
};
|
|
194
|
+
// Initialize RefManager
|
|
195
|
+
this.refManager = new RefManager(cowAdapter);
|
|
196
|
+
// Initialize BlobStorage
|
|
197
|
+
this.blobStorage = new BlobStorage(cowAdapter, {
|
|
198
|
+
enableCompression: options?.enableCompression !== false
|
|
199
|
+
});
|
|
200
|
+
// Initialize CommitLog
|
|
201
|
+
this.commitLog = new CommitLog(this.blobStorage, this.refManager);
|
|
202
|
+
// Check if main branch exists, create if not
|
|
203
|
+
const mainRef = await this.refManager.getRef('main');
|
|
204
|
+
if (!mainRef) {
|
|
205
|
+
// Create initial commit (empty tree)
|
|
206
|
+
const emptyTreeHash = '0000000000000000000000000000000000000000000000000000000000000000';
|
|
207
|
+
await this.refManager.createBranch('main', emptyTreeHash, {
|
|
208
|
+
description: 'Initial branch',
|
|
209
|
+
author: 'system'
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
// Set HEAD to current branch
|
|
213
|
+
const currentRef = await this.refManager.getRef(this.currentBranch);
|
|
214
|
+
if (currentRef) {
|
|
215
|
+
await this.refManager.setHead(this.currentBranch);
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
// Branch doesn't exist, create it from main
|
|
219
|
+
const mainCommit = await this.refManager.resolveRef('main');
|
|
220
|
+
if (mainCommit) {
|
|
221
|
+
await this.refManager.createBranch(this.currentBranch, mainCommit, {
|
|
222
|
+
description: `Branch created from main`,
|
|
223
|
+
author: 'system'
|
|
224
|
+
});
|
|
225
|
+
await this.refManager.setHead(this.currentBranch);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
this.cowEnabled = true;
|
|
229
|
+
}
|
|
122
230
|
/**
|
|
123
231
|
* Save a noun to storage (v4.0.0: vector only, metadata saved separately)
|
|
124
232
|
* @param noun Pure HNSW vector data (no metadata)
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BlobStorage: Content-Addressable Blob Storage for COW (Copy-on-Write)
|
|
3
|
+
*
|
|
4
|
+
* State-of-the-art implementation featuring:
|
|
5
|
+
* - Content-addressable: SHA-256 hashing
|
|
6
|
+
* - Type-aware chunking: Separate vectors, metadata, relationships
|
|
7
|
+
* - Compression: zstd for JSON, optimized for vectors
|
|
8
|
+
* - LRU caching: Hot blob performance
|
|
9
|
+
* - Streaming: Multipart upload for large blobs
|
|
10
|
+
* - Batch operations: Parallel I/O
|
|
11
|
+
* - Integrity: Cryptographic verification
|
|
12
|
+
* - Observability: Metrics and tracing
|
|
13
|
+
*
|
|
14
|
+
* @module storage/cow/BlobStorage
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Simple key-value storage interface for COW primitives
|
|
18
|
+
* This will be implemented by BaseStorage when COW is integrated
|
|
19
|
+
*/
|
|
20
|
+
export interface COWStorageAdapter {
|
|
21
|
+
get(key: string): Promise<Buffer | undefined>;
|
|
22
|
+
put(key: string, data: Buffer): Promise<void>;
|
|
23
|
+
delete(key: string): Promise<void>;
|
|
24
|
+
list(prefix: string): Promise<string[]>;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Blob metadata stored alongside blob data
|
|
28
|
+
*/
|
|
29
|
+
export interface BlobMetadata {
|
|
30
|
+
hash: string;
|
|
31
|
+
size: number;
|
|
32
|
+
compressedSize: number;
|
|
33
|
+
compression: 'none' | 'zstd';
|
|
34
|
+
type: 'vector' | 'metadata' | 'tree' | 'commit' | 'raw';
|
|
35
|
+
createdAt: number;
|
|
36
|
+
refCount: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Blob write options
|
|
40
|
+
*/
|
|
41
|
+
export interface BlobWriteOptions {
|
|
42
|
+
compression?: 'none' | 'zstd' | 'auto';
|
|
43
|
+
type?: 'vector' | 'metadata' | 'tree' | 'commit' | 'raw';
|
|
44
|
+
skipVerification?: boolean;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Blob read options
|
|
48
|
+
*/
|
|
49
|
+
export interface BlobReadOptions {
|
|
50
|
+
skipDecompression?: boolean;
|
|
51
|
+
skipCache?: boolean;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Blob statistics for observability
|
|
55
|
+
*/
|
|
56
|
+
export interface BlobStats {
|
|
57
|
+
totalBlobs: number;
|
|
58
|
+
totalSize: number;
|
|
59
|
+
compressedSize: number;
|
|
60
|
+
cacheHits: number;
|
|
61
|
+
cacheMisses: number;
|
|
62
|
+
compressionRatio: number;
|
|
63
|
+
avgBlobSize: number;
|
|
64
|
+
dedupSavings: number;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* State-of-the-art content-addressable blob storage
|
|
68
|
+
*
|
|
69
|
+
* Features:
|
|
70
|
+
* - Content addressing via SHA-256
|
|
71
|
+
* - Type-aware compression (zstd, vector-optimized)
|
|
72
|
+
* - LRU caching with memory limits
|
|
73
|
+
* - Streaming for large blobs
|
|
74
|
+
* - Batch operations
|
|
75
|
+
* - Integrity verification
|
|
76
|
+
* - Observability metrics
|
|
77
|
+
*/
|
|
78
|
+
export declare class BlobStorage {
|
|
79
|
+
private adapter;
|
|
80
|
+
private cache;
|
|
81
|
+
private cacheMaxSize;
|
|
82
|
+
private currentCacheSize;
|
|
83
|
+
private stats;
|
|
84
|
+
private zstdCompress?;
|
|
85
|
+
private zstdDecompress?;
|
|
86
|
+
private readonly CACHE_MAX_SIZE;
|
|
87
|
+
private readonly MULTIPART_THRESHOLD;
|
|
88
|
+
private readonly COMPRESSION_THRESHOLD;
|
|
89
|
+
constructor(adapter: COWStorageAdapter, options?: {
|
|
90
|
+
cacheMaxSize?: number;
|
|
91
|
+
enableCompression?: boolean;
|
|
92
|
+
});
|
|
93
|
+
/**
|
|
94
|
+
* Lazy load zstd compression module
|
|
95
|
+
* (Avoids loading if not needed)
|
|
96
|
+
*/
|
|
97
|
+
private initCompression;
|
|
98
|
+
/**
|
|
99
|
+
* Compute SHA-256 hash of data
|
|
100
|
+
*
|
|
101
|
+
* @param data - Data to hash
|
|
102
|
+
* @returns SHA-256 hash as hex string
|
|
103
|
+
*/
|
|
104
|
+
static hash(data: Buffer): string;
|
|
105
|
+
/**
|
|
106
|
+
* Write a blob to storage
|
|
107
|
+
*
|
|
108
|
+
* Features:
|
|
109
|
+
* - Content-addressable: hash determines storage key
|
|
110
|
+
* - Deduplication: existing blob not rewritten
|
|
111
|
+
* - Compression: auto-compress based on type
|
|
112
|
+
* - Multipart: for large blobs (>5MB)
|
|
113
|
+
* - Verification: hash verification
|
|
114
|
+
* - Caching: write-through cache
|
|
115
|
+
*
|
|
116
|
+
* @param data - Blob data to write
|
|
117
|
+
* @param options - Write options
|
|
118
|
+
* @returns Blob hash
|
|
119
|
+
*/
|
|
120
|
+
write(data: Buffer, options?: BlobWriteOptions): Promise<string>;
|
|
121
|
+
/**
|
|
122
|
+
* Read a blob from storage
|
|
123
|
+
*
|
|
124
|
+
* Features:
|
|
125
|
+
* - Cache lookup first (LRU)
|
|
126
|
+
* - Decompression (if compressed)
|
|
127
|
+
* - Verification (optional hash check)
|
|
128
|
+
* - Streaming for large blobs
|
|
129
|
+
*
|
|
130
|
+
* @param hash - Blob hash
|
|
131
|
+
* @param options - Read options
|
|
132
|
+
* @returns Blob data
|
|
133
|
+
*/
|
|
134
|
+
read(hash: string, options?: BlobReadOptions): Promise<Buffer>;
|
|
135
|
+
/**
|
|
136
|
+
* Check if blob exists
|
|
137
|
+
*
|
|
138
|
+
* @param hash - Blob hash
|
|
139
|
+
* @returns True if blob exists
|
|
140
|
+
*/
|
|
141
|
+
has(hash: string): Promise<boolean>;
|
|
142
|
+
/**
|
|
143
|
+
* Delete a blob from storage
|
|
144
|
+
*
|
|
145
|
+
* Features:
|
|
146
|
+
* - Reference counting: only delete if refCount = 0
|
|
147
|
+
* - Cascade: delete metadata too
|
|
148
|
+
* - Cache invalidation
|
|
149
|
+
*
|
|
150
|
+
* @param hash - Blob hash
|
|
151
|
+
*/
|
|
152
|
+
delete(hash: string): Promise<void>;
|
|
153
|
+
/**
|
|
154
|
+
* Get blob metadata without reading full blob
|
|
155
|
+
*
|
|
156
|
+
* @param hash - Blob hash
|
|
157
|
+
* @returns Blob metadata
|
|
158
|
+
*/
|
|
159
|
+
getMetadata(hash: string): Promise<BlobMetadata | undefined>;
|
|
160
|
+
/**
|
|
161
|
+
* Batch write multiple blobs in parallel
|
|
162
|
+
*
|
|
163
|
+
* @param blobs - Array of [data, options] tuples
|
|
164
|
+
* @returns Array of blob hashes
|
|
165
|
+
*/
|
|
166
|
+
writeBatch(blobs: Array<[Buffer, BlobWriteOptions?]>): Promise<string[]>;
|
|
167
|
+
/**
|
|
168
|
+
* Batch read multiple blobs in parallel
|
|
169
|
+
*
|
|
170
|
+
* @param hashes - Array of blob hashes
|
|
171
|
+
* @param options - Read options
|
|
172
|
+
* @returns Array of blob data
|
|
173
|
+
*/
|
|
174
|
+
readBatch(hashes: string[], options?: BlobReadOptions): Promise<Buffer[]>;
|
|
175
|
+
/**
|
|
176
|
+
* List all blobs (for garbage collection, debugging)
|
|
177
|
+
*
|
|
178
|
+
* @returns Array of blob hashes
|
|
179
|
+
*/
|
|
180
|
+
listBlobs(): Promise<string[]>;
|
|
181
|
+
/**
|
|
182
|
+
* Get storage statistics
|
|
183
|
+
*
|
|
184
|
+
* @returns Blob statistics
|
|
185
|
+
*/
|
|
186
|
+
getStats(): BlobStats;
|
|
187
|
+
/**
|
|
188
|
+
* Clear cache (useful for testing, memory pressure)
|
|
189
|
+
*/
|
|
190
|
+
clearCache(): void;
|
|
191
|
+
/**
|
|
192
|
+
* Garbage collect unreferenced blobs
|
|
193
|
+
*
|
|
194
|
+
* @param referencedHashes - Set of hashes that should be kept
|
|
195
|
+
* @returns Number of blobs deleted
|
|
196
|
+
*/
|
|
197
|
+
garbageCollect(referencedHashes: Set<string>): Promise<number>;
|
|
198
|
+
/**
|
|
199
|
+
* Select compression strategy based on data and options
|
|
200
|
+
*/
|
|
201
|
+
private selectCompression;
|
|
202
|
+
/**
|
|
203
|
+
* Write large blob using multipart upload
|
|
204
|
+
* (Future enhancement: stream to adapter if supported)
|
|
205
|
+
*/
|
|
206
|
+
private writeMultipart;
|
|
207
|
+
/**
|
|
208
|
+
* Increment reference count for a blob
|
|
209
|
+
*/
|
|
210
|
+
private incrementRefCount;
|
|
211
|
+
/**
|
|
212
|
+
* Decrement reference count for a blob
|
|
213
|
+
*/
|
|
214
|
+
private decrementRefCount;
|
|
215
|
+
/**
|
|
216
|
+
* Add blob to LRU cache
|
|
217
|
+
*/
|
|
218
|
+
private addToCache;
|
|
219
|
+
/**
|
|
220
|
+
* Get blob from cache
|
|
221
|
+
*/
|
|
222
|
+
private getFromCache;
|
|
223
|
+
/**
|
|
224
|
+
* Remove blob from cache
|
|
225
|
+
*/
|
|
226
|
+
private removeFromCache;
|
|
227
|
+
/**
|
|
228
|
+
* Evict least recently used entry from cache
|
|
229
|
+
*/
|
|
230
|
+
private evictLRU;
|
|
231
|
+
}
|