@soulcraft/brainy 4.9.1 → 4.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/hnsw/hnswIndex.js +15 -5
- package/dist/import/ImportCoordinator.d.ts +49 -0
- package/dist/import/ImportCoordinator.js +122 -20
- package/dist/importers/VFSStructureGenerator.d.ts +3 -0
- package/dist/importers/VFSStructureGenerator.js +32 -6
- package/dist/storage/adapters/azureBlobStorage.d.ts +2 -0
- package/dist/storage/adapters/azureBlobStorage.js +94 -35
- package/dist/storage/adapters/fileSystemStorage.d.ts +2 -0
- package/dist/storage/adapters/fileSystemStorage.js +50 -13
- package/dist/storage/adapters/gcsStorage.d.ts +2 -0
- package/dist/storage/adapters/gcsStorage.js +96 -37
- package/dist/storage/adapters/memoryStorage.d.ts +7 -0
- package/dist/storage/adapters/memoryStorage.js +55 -5
- package/dist/storage/adapters/opfsStorage.d.ts +7 -0
- package/dist/storage/adapters/opfsStorage.js +37 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +102 -42
- package/dist/vfs/importers/DirectoryImporter.d.ts +7 -0
- package/dist/vfs/importers/DirectoryImporter.js +29 -4
- package/dist/vfs/types.d.ts +5 -0
- package/package.json +1 -1
|
@@ -3074,51 +3074,74 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
3074
3074
|
*/
|
|
3075
3075
|
async saveHNSWData(nounId, hnswData) {
|
|
3076
3076
|
await this.ensureInitialized();
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3077
|
+
const { PutObjectCommand, GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
3078
|
+
// CRITICAL FIX (v4.7.3): Must preserve existing node data (id, vector) when updating HNSW metadata
|
|
3079
|
+
// Previous implementation overwrote the entire file, destroying vector data
|
|
3080
|
+
// Now we READ the existing node, UPDATE only connections/level, then WRITE back the complete node
|
|
3081
|
+
// CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
|
|
3082
|
+
// Uses S3 IfMatch preconditions - retries with exponential backoff on conflicts
|
|
3083
|
+
// Prevents data corruption when multiple entities connect to same neighbor simultaneously
|
|
3084
|
+
const shard = getShardIdFromUuid(nounId);
|
|
3085
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
3086
|
+
const maxRetries = 5;
|
|
3087
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
3082
3088
|
try {
|
|
3083
|
-
//
|
|
3084
|
-
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3089
|
+
// Get current ETag and data
|
|
3090
|
+
let currentETag;
|
|
3091
|
+
let existingNode = {};
|
|
3092
|
+
try {
|
|
3093
|
+
const getResponse = await this.s3Client.send(new GetObjectCommand({
|
|
3094
|
+
Bucket: this.bucketName,
|
|
3095
|
+
Key: key
|
|
3096
|
+
}));
|
|
3097
|
+
const existingData = await getResponse.Body.transformToString();
|
|
3098
|
+
existingNode = JSON.parse(existingData);
|
|
3099
|
+
currentETag = getResponse.ETag;
|
|
3100
|
+
}
|
|
3101
|
+
catch (error) {
|
|
3102
|
+
// File doesn't exist yet - will create new
|
|
3103
|
+
if (error.name !== 'NoSuchKey' && error.Code !== 'NoSuchKey') {
|
|
3104
|
+
throw error;
|
|
3105
|
+
}
|
|
3106
|
+
}
|
|
3090
3107
|
// Preserve id and vector, update only HNSW graph metadata
|
|
3091
3108
|
const updatedNode = {
|
|
3092
|
-
...existingNode,
|
|
3109
|
+
...existingNode, // Preserve all existing fields (id, vector, etc.)
|
|
3093
3110
|
level: hnswData.level,
|
|
3094
3111
|
connections: hnswData.connections
|
|
3095
3112
|
};
|
|
3113
|
+
// ATOMIC WRITE: Use ETag precondition
|
|
3114
|
+
// If currentETag exists, only write if ETag matches (no concurrent modification)
|
|
3115
|
+
// If no ETag, only write if file doesn't exist (IfNoneMatch: *)
|
|
3096
3116
|
await this.s3Client.send(new PutObjectCommand({
|
|
3097
3117
|
Bucket: this.bucketName,
|
|
3098
3118
|
Key: key,
|
|
3099
3119
|
Body: JSON.stringify(updatedNode, null, 2),
|
|
3100
|
-
ContentType: 'application/json'
|
|
3120
|
+
ContentType: 'application/json',
|
|
3121
|
+
...(currentETag
|
|
3122
|
+
? { IfMatch: currentETag }
|
|
3123
|
+
: { IfNoneMatch: '*' }) // Only create if doesn't exist
|
|
3101
3124
|
}));
|
|
3125
|
+
// Success! Exit retry loop
|
|
3126
|
+
return;
|
|
3102
3127
|
}
|
|
3103
3128
|
catch (error) {
|
|
3104
|
-
//
|
|
3105
|
-
if (error.name === '
|
|
3106
|
-
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
throw error;
|
|
3129
|
+
// Precondition failed - concurrent modification detected
|
|
3130
|
+
if (error.name === 'PreconditionFailed' || error.Code === 'PreconditionFailed') {
|
|
3131
|
+
if (attempt === maxRetries - 1) {
|
|
3132
|
+
this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
|
|
3133
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
|
|
3134
|
+
}
|
|
3135
|
+
// Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
|
|
3136
|
+
const backoffMs = 50 * Math.pow(2, attempt);
|
|
3137
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
3138
|
+
continue;
|
|
3115
3139
|
}
|
|
3140
|
+
// Other error - rethrow
|
|
3141
|
+
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
3142
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
3116
3143
|
}
|
|
3117
3144
|
}
|
|
3118
|
-
catch (error) {
|
|
3119
|
-
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
3120
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
3121
|
-
}
|
|
3122
3145
|
}
|
|
3123
3146
|
/**
|
|
3124
3147
|
* Get HNSW graph data for a noun
|
|
@@ -3153,22 +3176,59 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
3153
3176
|
/**
|
|
3154
3177
|
* Save HNSW system data (entry point, max level)
|
|
3155
3178
|
* Storage path: system/hnsw-system.json
|
|
3179
|
+
*
|
|
3180
|
+
* CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
|
|
3156
3181
|
*/
|
|
3157
3182
|
async saveHNSWSystem(systemData) {
|
|
3158
3183
|
await this.ensureInitialized();
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
|
|
3163
|
-
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
|
|
3170
|
-
|
|
3171
|
-
|
|
3184
|
+
const { PutObjectCommand, HeadObjectCommand } = await import('@aws-sdk/client-s3');
|
|
3185
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
3186
|
+
const maxRetries = 5;
|
|
3187
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
3188
|
+
try {
|
|
3189
|
+
// Get current ETag (use HEAD to avoid downloading data)
|
|
3190
|
+
let currentETag;
|
|
3191
|
+
try {
|
|
3192
|
+
const headResponse = await this.s3Client.send(new HeadObjectCommand({
|
|
3193
|
+
Bucket: this.bucketName,
|
|
3194
|
+
Key: key
|
|
3195
|
+
}));
|
|
3196
|
+
currentETag = headResponse.ETag;
|
|
3197
|
+
}
|
|
3198
|
+
catch (error) {
|
|
3199
|
+
// File doesn't exist yet
|
|
3200
|
+
if (error.name !== 'NotFound' && error.name !== 'NoSuchKey' && error.Code !== 'NoSuchKey') {
|
|
3201
|
+
throw error;
|
|
3202
|
+
}
|
|
3203
|
+
}
|
|
3204
|
+
// ATOMIC WRITE: Use ETag precondition
|
|
3205
|
+
await this.s3Client.send(new PutObjectCommand({
|
|
3206
|
+
Bucket: this.bucketName,
|
|
3207
|
+
Key: key,
|
|
3208
|
+
Body: JSON.stringify(systemData, null, 2),
|
|
3209
|
+
ContentType: 'application/json',
|
|
3210
|
+
...(currentETag
|
|
3211
|
+
? { IfMatch: currentETag }
|
|
3212
|
+
: { IfNoneMatch: '*' })
|
|
3213
|
+
}));
|
|
3214
|
+
// Success!
|
|
3215
|
+
return;
|
|
3216
|
+
}
|
|
3217
|
+
catch (error) {
|
|
3218
|
+
// Precondition failed - concurrent modification
|
|
3219
|
+
if (error.name === 'PreconditionFailed' || error.Code === 'PreconditionFailed') {
|
|
3220
|
+
if (attempt === maxRetries - 1) {
|
|
3221
|
+
this.logger.error(`Max retries (${maxRetries}) exceeded for HNSW system data`);
|
|
3222
|
+
throw new Error('Failed to save HNSW system data: max retries exceeded due to concurrent modifications');
|
|
3223
|
+
}
|
|
3224
|
+
const backoffMs = 50 * Math.pow(2, attempt);
|
|
3225
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
3226
|
+
continue;
|
|
3227
|
+
}
|
|
3228
|
+
// Other error - rethrow
|
|
3229
|
+
this.logger.error('Failed to save HNSW system data:', error);
|
|
3230
|
+
throw new Error(`Failed to save HNSW system data: ${error}`);
|
|
3231
|
+
}
|
|
3172
3232
|
}
|
|
3173
3233
|
}
|
|
3174
3234
|
/**
|
|
@@ -19,6 +19,9 @@ export interface ImportOptions {
|
|
|
19
19
|
extractMetadata?: boolean;
|
|
20
20
|
showProgress?: boolean;
|
|
21
21
|
filter?: (path: string) => boolean;
|
|
22
|
+
importId?: string;
|
|
23
|
+
projectId?: string;
|
|
24
|
+
customMetadata?: Record<string, any>;
|
|
22
25
|
}
|
|
23
26
|
export interface ImportResult {
|
|
24
27
|
imported: string[];
|
|
@@ -47,6 +50,10 @@ export declare class DirectoryImporter {
|
|
|
47
50
|
* Import a directory or file into VFS
|
|
48
51
|
*/
|
|
49
52
|
import(sourcePath: string, options?: ImportOptions): Promise<ImportResult>;
|
|
53
|
+
/**
|
|
54
|
+
* Derive project ID from target path
|
|
55
|
+
*/
|
|
56
|
+
private deriveProjectId;
|
|
50
57
|
/**
|
|
51
58
|
* Import with progress tracking (generator)
|
|
52
59
|
*/
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import { promises as fs } from 'fs';
|
|
11
11
|
import * as path from 'path';
|
|
12
|
+
import { v4 as uuidv4 } from '../../universal/uuid.js';
|
|
12
13
|
export class DirectoryImporter {
|
|
13
14
|
constructor(vfs, brain) {
|
|
14
15
|
this.vfs = vfs;
|
|
@@ -19,6 +20,18 @@ export class DirectoryImporter {
|
|
|
19
20
|
*/
|
|
20
21
|
async import(sourcePath, options = {}) {
|
|
21
22
|
const startTime = Date.now();
|
|
23
|
+
// v4.10.0: Generate tracking metadata
|
|
24
|
+
const importId = options.importId || uuidv4();
|
|
25
|
+
const projectId = options.projectId || this.deriveProjectId(options.targetPath || '/');
|
|
26
|
+
const trackingMetadata = {
|
|
27
|
+
importIds: [importId],
|
|
28
|
+
projectId,
|
|
29
|
+
importedAt: Date.now(),
|
|
30
|
+
importSource: sourcePath,
|
|
31
|
+
...(options.customMetadata || {})
|
|
32
|
+
};
|
|
33
|
+
// Store tracking metadata in options for use in helper methods
|
|
34
|
+
const enhancedOptions = { ...options, _trackingMetadata: trackingMetadata };
|
|
22
35
|
const result = {
|
|
23
36
|
imported: [],
|
|
24
37
|
failed: [],
|
|
@@ -34,7 +47,7 @@ export class DirectoryImporter {
|
|
|
34
47
|
await this.importFile(sourcePath, options.targetPath || '/', result);
|
|
35
48
|
}
|
|
36
49
|
else if (stats.isDirectory()) {
|
|
37
|
-
await this.importDirectory(sourcePath,
|
|
50
|
+
await this.importDirectory(sourcePath, enhancedOptions, result);
|
|
38
51
|
}
|
|
39
52
|
}
|
|
40
53
|
catch (error) {
|
|
@@ -46,6 +59,13 @@ export class DirectoryImporter {
|
|
|
46
59
|
result.duration = Date.now() - startTime;
|
|
47
60
|
return result;
|
|
48
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Derive project ID from target path
|
|
64
|
+
*/
|
|
65
|
+
deriveProjectId(targetPath) {
|
|
66
|
+
const segments = targetPath.split('/').filter(s => s.length > 0);
|
|
67
|
+
return segments.length > 0 ? segments[0] : 'default_project';
|
|
68
|
+
}
|
|
49
69
|
/**
|
|
50
70
|
* Import with progress tracking (generator)
|
|
51
71
|
*/
|
|
@@ -125,9 +145,13 @@ export class DirectoryImporter {
|
|
|
125
145
|
};
|
|
126
146
|
await collectDirs(sourcePath, targetPath);
|
|
127
147
|
// Create all directories
|
|
148
|
+
const trackingMetadata = options._trackingMetadata || {};
|
|
128
149
|
for (const dirPath of dirsToCreate) {
|
|
129
150
|
try {
|
|
130
|
-
await this.vfs.mkdir(dirPath, {
|
|
151
|
+
await this.vfs.mkdir(dirPath, {
|
|
152
|
+
recursive: true,
|
|
153
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
154
|
+
});
|
|
131
155
|
result.directoriesCreated++;
|
|
132
156
|
}
|
|
133
157
|
catch (error) {
|
|
@@ -207,14 +231,15 @@ export class DirectoryImporter {
|
|
|
207
231
|
}
|
|
208
232
|
}
|
|
209
233
|
// Write to VFS
|
|
234
|
+
const trackingMetadata = options._trackingMetadata || {};
|
|
210
235
|
await this.vfs.writeFile(vfsPath, content, {
|
|
211
236
|
generateEmbedding: options.generateEmbeddings,
|
|
212
237
|
extractMetadata: options.extractMetadata,
|
|
213
238
|
metadata: {
|
|
214
239
|
originalPath: filePath,
|
|
215
|
-
importedAt: Date.now(),
|
|
216
240
|
originalSize: stats.size,
|
|
217
|
-
originalModified: stats.mtime.getTime()
|
|
241
|
+
originalModified: stats.mtime.getTime(),
|
|
242
|
+
...trackingMetadata // v4.10.0: Add tracking metadata
|
|
218
243
|
}
|
|
219
244
|
});
|
|
220
245
|
return { vfsPath, size: stats.size };
|
package/dist/vfs/types.d.ts
CHANGED
|
@@ -54,6 +54,11 @@ export interface VFSMetadata {
|
|
|
54
54
|
dependencies?: string[];
|
|
55
55
|
exports?: string[];
|
|
56
56
|
language?: string;
|
|
57
|
+
importIds?: string[];
|
|
58
|
+
projectId?: string;
|
|
59
|
+
importedAt?: number;
|
|
60
|
+
importFormat?: string;
|
|
61
|
+
importSource?: string;
|
|
57
62
|
lineCount?: number;
|
|
58
63
|
wordCount?: number;
|
|
59
64
|
charset?: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "4.9.
|
|
3
|
+
"version": "4.9.2",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|