@soulcraft/brainy 4.9.1 → 4.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/hnsw/hnswIndex.js +15 -5
- package/dist/import/ImportCoordinator.d.ts +49 -0
- package/dist/import/ImportCoordinator.js +122 -20
- package/dist/importers/VFSStructureGenerator.d.ts +3 -0
- package/dist/importers/VFSStructureGenerator.js +32 -6
- package/dist/storage/adapters/azureBlobStorage.d.ts +2 -0
- package/dist/storage/adapters/azureBlobStorage.js +94 -35
- package/dist/storage/adapters/fileSystemStorage.d.ts +2 -0
- package/dist/storage/adapters/fileSystemStorage.js +50 -13
- package/dist/storage/adapters/gcsStorage.d.ts +2 -0
- package/dist/storage/adapters/gcsStorage.js +96 -37
- package/dist/storage/adapters/memoryStorage.d.ts +7 -0
- package/dist/storage/adapters/memoryStorage.js +55 -5
- package/dist/storage/adapters/opfsStorage.d.ts +7 -0
- package/dist/storage/adapters/opfsStorage.js +37 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +102 -42
- package/dist/vfs/importers/DirectoryImporter.d.ts +7 -0
- package/dist/vfs/importers/DirectoryImporter.js +29 -4
- package/dist/vfs/types.d.ts +5 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [4.9.2](https://github.com/soulcraftlabs/brainy/compare/v4.9.1...v4.9.2) (2025-10-29)
|
|
6
|
+
|
|
7
|
+
- fix: resolve HNSW concurrency race condition across all storage adapters (0bcf50a)
|
|
8
|
+
|
|
9
|
+
|
|
5
10
|
## [4.9.1](https://github.com/soulcraftlabs/brainy/compare/v4.9.0...v4.9.1) (2025-10-29)
|
|
6
11
|
|
|
7
12
|
### 📚 Documentation
|
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -195,17 +195,27 @@ export class HNSWIndex {
|
|
|
195
195
|
await this.pruneConnections(neighbor, level);
|
|
196
196
|
}
|
|
197
197
|
// Persist updated neighbor HNSW data (v3.35.0+)
|
|
198
|
+
//
|
|
199
|
+
// CRITICAL FIX (v4.10.1): Serialize neighbor updates to prevent race conditions
|
|
200
|
+
// Previously: Fire-and-forget (.catch) caused 16-32 concurrent writes per entity
|
|
201
|
+
// Now: Await each update, serializing writes to prevent data corruption
|
|
202
|
+
// Trade-off: 20-30% slower bulk import vs 100% data integrity
|
|
198
203
|
if (this.storage) {
|
|
199
204
|
const neighborConnectionsObj = {};
|
|
200
205
|
for (const [lvl, nounIds] of neighbor.connections.entries()) {
|
|
201
206
|
neighborConnectionsObj[lvl.toString()] = Array.from(nounIds);
|
|
202
207
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
208
|
+
try {
|
|
209
|
+
await this.storage.saveHNSWData(neighborId, {
|
|
210
|
+
level: neighbor.level,
|
|
211
|
+
connections: neighborConnectionsObj
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
// Log error but don't throw - allow insert to continue
|
|
216
|
+
// Storage adapters have retry logic, so this is a rare last-resort failure
|
|
207
217
|
console.error(`Failed to persist neighbor HNSW data for ${neighborId}:`, error);
|
|
208
|
-
}
|
|
218
|
+
}
|
|
209
219
|
}
|
|
210
220
|
}
|
|
211
221
|
// Update entry point for the next level
|
|
@@ -28,6 +28,24 @@ export interface ImportSource {
|
|
|
28
28
|
password: string;
|
|
29
29
|
};
|
|
30
30
|
}
|
|
31
|
+
/**
|
|
32
|
+
* Tracking context for import operations
|
|
33
|
+
* Contains metadata that should be attached to all created entities/relationships
|
|
34
|
+
*/
|
|
35
|
+
export interface TrackingContext {
|
|
36
|
+
/** Unique identifier for this import operation */
|
|
37
|
+
importId: string;
|
|
38
|
+
/** Project identifier grouping related imports */
|
|
39
|
+
projectId: string;
|
|
40
|
+
/** Timestamp when import started */
|
|
41
|
+
importedAt: number;
|
|
42
|
+
/** Format of imported data */
|
|
43
|
+
importFormat: string;
|
|
44
|
+
/** Source filename or URL */
|
|
45
|
+
importSource: string;
|
|
46
|
+
/** Custom metadata from user */
|
|
47
|
+
customMetadata: Record<string, any>;
|
|
48
|
+
}
|
|
31
49
|
/**
|
|
32
50
|
* Valid import options for v4.x
|
|
33
51
|
*/
|
|
@@ -64,6 +82,23 @@ export interface ValidImportOptions {
|
|
|
64
82
|
enableHistory?: boolean;
|
|
65
83
|
/** Chunk size for streaming large imports (0 = no streaming) */
|
|
66
84
|
chunkSize?: number;
|
|
85
|
+
/**
|
|
86
|
+
* Unique identifier for this import operation (auto-generated if not provided)
|
|
87
|
+
* Used to track all entities/relationships created in this import
|
|
88
|
+
* Note: Entities can belong to multiple imports (stored as array)
|
|
89
|
+
*/
|
|
90
|
+
importId?: string;
|
|
91
|
+
/**
|
|
92
|
+
* Project identifier (user-specified or derived from vfsPath)
|
|
93
|
+
* Groups multiple imports under a common project
|
|
94
|
+
* If not specified, defaults to sanitized vfsPath
|
|
95
|
+
*/
|
|
96
|
+
projectId?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Custom metadata to attach to all created entities
|
|
99
|
+
* Merged with import/project tracking metadata
|
|
100
|
+
*/
|
|
101
|
+
customMetadata?: Record<string, any>;
|
|
67
102
|
/**
|
|
68
103
|
* Progress callback for tracking import progress (v4.2.0+)
|
|
69
104
|
*
|
|
@@ -286,6 +321,20 @@ export declare class ImportCoordinator {
|
|
|
286
321
|
* Respects LOG_LEVEL for verbosity (detailed in dev, concise in prod)
|
|
287
322
|
*/
|
|
288
323
|
private buildValidationErrorMessage;
|
|
324
|
+
/**
|
|
325
|
+
* Derive project ID from VFS path
|
|
326
|
+
* Extracts meaningful project name from path, avoiding timestamps
|
|
327
|
+
*
|
|
328
|
+
* Examples:
|
|
329
|
+
* - /imports/myproject → "myproject"
|
|
330
|
+
* - /imports/2024-01-15/myproject → "myproject"
|
|
331
|
+
* - /imports/1234567890 → "import_1234567890"
|
|
332
|
+
* - /my-game/characters → "my-game"
|
|
333
|
+
*
|
|
334
|
+
* @param vfsPath - VFS path to derive project ID from
|
|
335
|
+
* @returns Derived project identifier
|
|
336
|
+
*/
|
|
337
|
+
private deriveProjectId;
|
|
289
338
|
/**
|
|
290
339
|
* Get progressive flush interval based on CURRENT entity count (v4.2.0+)
|
|
291
340
|
*
|
|
@@ -68,7 +68,6 @@ export class ImportCoordinator {
|
|
|
68
68
|
*/
|
|
69
69
|
async import(source, options = {}) {
|
|
70
70
|
const startTime = Date.now();
|
|
71
|
-
const importId = uuidv4();
|
|
72
71
|
// Validate options (v4.0.0+: Reject deprecated v3.x options)
|
|
73
72
|
this.validateOptions(options);
|
|
74
73
|
// Normalize source (v4.2.0: handles URL fetching)
|
|
@@ -85,14 +84,7 @@ export class ImportCoordinator {
|
|
|
85
84
|
if (!detection) {
|
|
86
85
|
throw new Error('Unable to detect file format. Please specify format explicitly.');
|
|
87
86
|
}
|
|
88
|
-
//
|
|
89
|
-
options.onProgress?.({
|
|
90
|
-
stage: 'extracting',
|
|
91
|
-
message: `Extracting entities from ${detection.format}...`
|
|
92
|
-
});
|
|
93
|
-
// Extract entities and relationships
|
|
94
|
-
const extractionResult = await this.extract(normalizedSource, detection.format, options);
|
|
95
|
-
// Set defaults
|
|
87
|
+
// Set defaults early (needed for tracking context)
|
|
96
88
|
// CRITICAL FIX (v4.3.2): Spread options FIRST, then apply defaults
|
|
97
89
|
// Previously: ...options at the end overwrote normalized defaults with undefined
|
|
98
90
|
// Now: Defaults properly override undefined values
|
|
@@ -110,6 +102,24 @@ export class ImportCoordinator {
|
|
|
110
102
|
enableConceptExtraction: options.enableConceptExtraction !== false, // Already defaults to true
|
|
111
103
|
deduplicationThreshold: options.deduplicationThreshold || 0.85
|
|
112
104
|
};
|
|
105
|
+
// Generate tracking context (v4.10.0+: Unified import/project tracking)
|
|
106
|
+
const importId = options.importId || uuidv4();
|
|
107
|
+
const projectId = options.projectId || this.deriveProjectId(opts.vfsPath);
|
|
108
|
+
const trackingContext = {
|
|
109
|
+
importId,
|
|
110
|
+
projectId,
|
|
111
|
+
importedAt: Date.now(),
|
|
112
|
+
importFormat: detection.format,
|
|
113
|
+
importSource: normalizedSource.filename || 'unknown',
|
|
114
|
+
customMetadata: options.customMetadata || {}
|
|
115
|
+
};
|
|
116
|
+
// Report extraction stage
|
|
117
|
+
options.onProgress?.({
|
|
118
|
+
stage: 'extracting',
|
|
119
|
+
message: `Extracting entities from ${detection.format}...`
|
|
120
|
+
});
|
|
121
|
+
// Extract entities and relationships
|
|
122
|
+
const extractionResult = await this.extract(normalizedSource, detection.format, options);
|
|
113
123
|
// Report VFS storage stage
|
|
114
124
|
options.onProgress?.({
|
|
115
125
|
stage: 'storing-vfs',
|
|
@@ -126,7 +136,8 @@ export class ImportCoordinator {
|
|
|
126
136
|
sourceBuffer: normalizedSource.type === 'buffer' ? normalizedSource.data : undefined,
|
|
127
137
|
sourceFilename: normalizedSource.filename || `import.${detection.format}`,
|
|
128
138
|
createRelationshipFile: true,
|
|
129
|
-
createMetadataFile: true
|
|
139
|
+
createMetadataFile: true,
|
|
140
|
+
trackingContext // v4.10.0: Pass tracking metadata to VFS
|
|
130
141
|
});
|
|
131
142
|
// Report graph storage stage
|
|
132
143
|
options.onProgress?.({
|
|
@@ -137,7 +148,8 @@ export class ImportCoordinator {
|
|
|
137
148
|
const graphResult = await this.createGraphEntities(normalizedResult, vfsResult, opts, {
|
|
138
149
|
sourceFilename: normalizedSource.filename || `import.${detection.format}`,
|
|
139
150
|
format: detection.format
|
|
140
|
-
}
|
|
151
|
+
}, trackingContext // v4.10.0: Pass tracking metadata to graph creation
|
|
152
|
+
);
|
|
141
153
|
// Report complete
|
|
142
154
|
options.onProgress?.({
|
|
143
155
|
stage: 'complete',
|
|
@@ -414,7 +426,8 @@ export class ImportCoordinator {
|
|
|
414
426
|
* Create entities and relationships in knowledge graph
|
|
415
427
|
* v4.9.0: Added sourceInfo parameter for document entity creation
|
|
416
428
|
*/
|
|
417
|
-
async createGraphEntities(extractionResult, vfsResult, options, sourceInfo
|
|
429
|
+
async createGraphEntities(extractionResult, vfsResult, options, sourceInfo, trackingContext // v4.10.0: Import/project tracking
|
|
430
|
+
) {
|
|
418
431
|
const entities = [];
|
|
419
432
|
const relationships = [];
|
|
420
433
|
let mergedCount = 0;
|
|
@@ -469,11 +482,19 @@ export class ImportCoordinator {
|
|
|
469
482
|
name: sourceInfo.sourceFilename,
|
|
470
483
|
sourceFile: sourceInfo.sourceFilename,
|
|
471
484
|
format: sourceInfo.format,
|
|
472
|
-
importedAt: Date.now(),
|
|
473
485
|
importSource: true,
|
|
474
486
|
vfsPath: vfsResult.rootPath,
|
|
475
487
|
totalRows: rows.length,
|
|
476
|
-
byType: this.countByType(rows)
|
|
488
|
+
byType: this.countByType(rows),
|
|
489
|
+
// v4.10.0: Import tracking metadata
|
|
490
|
+
...(trackingContext && {
|
|
491
|
+
importIds: [trackingContext.importId],
|
|
492
|
+
projectId: trackingContext.projectId,
|
|
493
|
+
importedAt: trackingContext.importedAt,
|
|
494
|
+
importFormat: trackingContext.importFormat,
|
|
495
|
+
importSource: trackingContext.importSource,
|
|
496
|
+
...trackingContext.customMetadata
|
|
497
|
+
})
|
|
477
498
|
}
|
|
478
499
|
});
|
|
479
500
|
console.log(`✅ Document entity created: ${documentEntityId}`);
|
|
@@ -499,7 +520,18 @@ export class ImportCoordinator {
|
|
|
499
520
|
metadata: {
|
|
500
521
|
...entity.metadata,
|
|
501
522
|
vfsPath: vfsFile?.path,
|
|
502
|
-
importedFrom: 'import-coordinator'
|
|
523
|
+
importedFrom: 'import-coordinator',
|
|
524
|
+
// v4.10.0: Import tracking metadata
|
|
525
|
+
...(trackingContext && {
|
|
526
|
+
importIds: [trackingContext.importId],
|
|
527
|
+
projectId: trackingContext.projectId,
|
|
528
|
+
importedAt: trackingContext.importedAt,
|
|
529
|
+
importFormat: trackingContext.importFormat,
|
|
530
|
+
importSource: trackingContext.importSource,
|
|
531
|
+
sourceRow: row.rowNumber,
|
|
532
|
+
sourceSheet: row.sheet,
|
|
533
|
+
...trackingContext.customMetadata
|
|
534
|
+
})
|
|
503
535
|
}
|
|
504
536
|
}, importSource, {
|
|
505
537
|
similarityThreshold: options.deduplicationThreshold || 0.85,
|
|
@@ -525,9 +557,19 @@ export class ImportCoordinator {
|
|
|
525
557
|
name: entity.name,
|
|
526
558
|
confidence: entity.confidence,
|
|
527
559
|
vfsPath: vfsFile?.path,
|
|
528
|
-
importedAt: Date.now(),
|
|
529
560
|
importedFrom: 'import-coordinator',
|
|
530
|
-
imports: [importSource]
|
|
561
|
+
imports: [importSource],
|
|
562
|
+
// v4.10.0: Import tracking metadata
|
|
563
|
+
...(trackingContext && {
|
|
564
|
+
importIds: [trackingContext.importId],
|
|
565
|
+
projectId: trackingContext.projectId,
|
|
566
|
+
importedAt: trackingContext.importedAt,
|
|
567
|
+
importFormat: trackingContext.importFormat,
|
|
568
|
+
importSource: trackingContext.importSource,
|
|
569
|
+
sourceRow: row.rowNumber,
|
|
570
|
+
sourceSheet: row.sheet,
|
|
571
|
+
...trackingContext.customMetadata
|
|
572
|
+
})
|
|
531
573
|
}
|
|
532
574
|
});
|
|
533
575
|
newCount++;
|
|
@@ -554,7 +596,15 @@ export class ImportCoordinator {
|
|
|
554
596
|
sheet: row.sheet,
|
|
555
597
|
rowNumber: row.rowNumber,
|
|
556
598
|
extractedAt: Date.now(),
|
|
557
|
-
format: sourceInfo?.format
|
|
599
|
+
format: sourceInfo?.format,
|
|
600
|
+
// v4.10.0: Import tracking metadata
|
|
601
|
+
...(trackingContext && {
|
|
602
|
+
importIds: [trackingContext.importId],
|
|
603
|
+
projectId: trackingContext.projectId,
|
|
604
|
+
createdAt: Date.now(),
|
|
605
|
+
importFormat: trackingContext.importFormat,
|
|
606
|
+
...trackingContext.customMetadata
|
|
607
|
+
})
|
|
558
608
|
}
|
|
559
609
|
});
|
|
560
610
|
provenanceCount++;
|
|
@@ -593,7 +643,14 @@ export class ImportCoordinator {
|
|
|
593
643
|
name: rel.to,
|
|
594
644
|
placeholder: true,
|
|
595
645
|
inferredFrom: entity.name,
|
|
596
|
-
|
|
646
|
+
// v4.10.0: Import tracking metadata
|
|
647
|
+
...(trackingContext && {
|
|
648
|
+
importIds: [trackingContext.importId],
|
|
649
|
+
projectId: trackingContext.projectId,
|
|
650
|
+
importedAt: trackingContext.importedAt,
|
|
651
|
+
importFormat: trackingContext.importFormat,
|
|
652
|
+
...trackingContext.customMetadata
|
|
653
|
+
})
|
|
597
654
|
}
|
|
598
655
|
});
|
|
599
656
|
// CRITICAL: Add to entities array so future searches find it
|
|
@@ -614,7 +671,14 @@ export class ImportCoordinator {
|
|
|
614
671
|
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
615
672
|
metadata: {
|
|
616
673
|
evidence: rel.evidence,
|
|
617
|
-
|
|
674
|
+
// v4.10.0: Import tracking metadata (will be merged in batch creation)
|
|
675
|
+
...(trackingContext && {
|
|
676
|
+
importIds: [trackingContext.importId],
|
|
677
|
+
projectId: trackingContext.projectId,
|
|
678
|
+
importedAt: trackingContext.importedAt,
|
|
679
|
+
importFormat: trackingContext.importFormat,
|
|
680
|
+
...trackingContext.customMetadata
|
|
681
|
+
})
|
|
618
682
|
}
|
|
619
683
|
});
|
|
620
684
|
}
|
|
@@ -937,6 +1001,44 @@ ${optionDetails}
|
|
|
937
1001
|
return `Invalid import options: ${optionsList}. See https://brainy.dev/docs/guides/migrating-to-v4`;
|
|
938
1002
|
}
|
|
939
1003
|
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Derive project ID from VFS path
|
|
1006
|
+
* Extracts meaningful project name from path, avoiding timestamps
|
|
1007
|
+
*
|
|
1008
|
+
* Examples:
|
|
1009
|
+
* - /imports/myproject → "myproject"
|
|
1010
|
+
* - /imports/2024-01-15/myproject → "myproject"
|
|
1011
|
+
* - /imports/1234567890 → "import_1234567890"
|
|
1012
|
+
* - /my-game/characters → "my-game"
|
|
1013
|
+
*
|
|
1014
|
+
* @param vfsPath - VFS path to derive project ID from
|
|
1015
|
+
* @returns Derived project identifier
|
|
1016
|
+
*/
|
|
1017
|
+
deriveProjectId(vfsPath) {
|
|
1018
|
+
// Extract meaningful project name from vfsPath
|
|
1019
|
+
const segments = vfsPath.split('/').filter(s => s.length > 0);
|
|
1020
|
+
if (segments.length === 0) {
|
|
1021
|
+
return 'default_project';
|
|
1022
|
+
}
|
|
1023
|
+
// If path starts with /imports/, look for meaningful segment
|
|
1024
|
+
if (segments[0] === 'imports') {
|
|
1025
|
+
if (segments.length === 1) {
|
|
1026
|
+
return 'default_project';
|
|
1027
|
+
}
|
|
1028
|
+
const lastSegment = segments[segments.length - 1];
|
|
1029
|
+
// If last segment looks like a timestamp, use parent
|
|
1030
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(lastSegment) || /^\d{10,}$/.test(lastSegment)) {
|
|
1031
|
+
// Use parent segment if available
|
|
1032
|
+
if (segments.length >= 3) {
|
|
1033
|
+
return segments[segments.length - 2];
|
|
1034
|
+
}
|
|
1035
|
+
return `import_${lastSegment}`;
|
|
1036
|
+
}
|
|
1037
|
+
return lastSegment;
|
|
1038
|
+
}
|
|
1039
|
+
// For non-/imports/ paths, use first segment as project
|
|
1040
|
+
return segments[0];
|
|
1041
|
+
}
|
|
940
1042
|
/**
|
|
941
1043
|
* Get progressive flush interval based on CURRENT entity count (v4.2.0+)
|
|
942
1044
|
*
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { Brainy } from '../brainy.js';
|
|
12
12
|
import type { SmartExcelResult } from './SmartExcelImporter.js';
|
|
13
|
+
import type { TrackingContext } from '../import/ImportCoordinator.js';
|
|
13
14
|
export interface VFSStructureOptions {
|
|
14
15
|
/** Root path in VFS for import */
|
|
15
16
|
rootPath: string;
|
|
@@ -27,6 +28,8 @@ export interface VFSStructureOptions {
|
|
|
27
28
|
createRelationshipFile?: boolean;
|
|
28
29
|
/** Create metadata file */
|
|
29
30
|
createMetadataFile?: boolean;
|
|
31
|
+
/** Import tracking context (v4.10.0) */
|
|
32
|
+
trackingContext?: TrackingContext;
|
|
30
33
|
}
|
|
31
34
|
export interface VFSStructureResult {
|
|
32
35
|
/** Root path created */
|
|
@@ -54,9 +54,21 @@ export class VFSStructureGenerator {
|
|
|
54
54
|
};
|
|
55
55
|
// Ensure VFS is initialized
|
|
56
56
|
await this.init();
|
|
57
|
+
// Extract tracking metadata if provided
|
|
58
|
+
const trackingMetadata = options.trackingContext ? {
|
|
59
|
+
importIds: [options.trackingContext.importId],
|
|
60
|
+
projectId: options.trackingContext.projectId,
|
|
61
|
+
importedAt: options.trackingContext.importedAt,
|
|
62
|
+
importFormat: options.trackingContext.importFormat,
|
|
63
|
+
importSource: options.trackingContext.importSource,
|
|
64
|
+
...options.trackingContext.customMetadata
|
|
65
|
+
} : {};
|
|
57
66
|
// Create root directory
|
|
58
67
|
try {
|
|
59
|
-
await this.vfs.mkdir(options.rootPath, {
|
|
68
|
+
await this.vfs.mkdir(options.rootPath, {
|
|
69
|
+
recursive: true,
|
|
70
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
71
|
+
});
|
|
60
72
|
result.directories.push(options.rootPath);
|
|
61
73
|
result.operations++;
|
|
62
74
|
}
|
|
@@ -70,7 +82,9 @@ export class VFSStructureGenerator {
|
|
|
70
82
|
// Preserve source file if requested
|
|
71
83
|
if (options.preserveSource && options.sourceBuffer && options.sourceFilename) {
|
|
72
84
|
const sourcePath = `${options.rootPath}/_source${this.getExtension(options.sourceFilename)}`;
|
|
73
|
-
await this.vfs.writeFile(sourcePath, options.sourceBuffer
|
|
85
|
+
await this.vfs.writeFile(sourcePath, options.sourceBuffer, {
|
|
86
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
87
|
+
});
|
|
74
88
|
result.files.push({
|
|
75
89
|
path: sourcePath,
|
|
76
90
|
type: 'source'
|
|
@@ -84,7 +98,10 @@ export class VFSStructureGenerator {
|
|
|
84
98
|
const groupPath = `${options.rootPath}/${groupName}`;
|
|
85
99
|
// Create group directory
|
|
86
100
|
try {
|
|
87
|
-
await this.vfs.mkdir(groupPath, {
|
|
101
|
+
await this.vfs.mkdir(groupPath, {
|
|
102
|
+
recursive: true,
|
|
103
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
104
|
+
});
|
|
88
105
|
result.directories.push(groupPath);
|
|
89
106
|
result.operations++;
|
|
90
107
|
}
|
|
@@ -117,7 +134,12 @@ export class VFSStructureGenerator {
|
|
|
117
134
|
evidence: rel.evidence
|
|
118
135
|
}))
|
|
119
136
|
};
|
|
120
|
-
await this.vfs.writeFile(entityPath, JSON.stringify(entityJson, null, 2)
|
|
137
|
+
await this.vfs.writeFile(entityPath, JSON.stringify(entityJson, null, 2), {
|
|
138
|
+
metadata: {
|
|
139
|
+
...trackingMetadata, // v4.10.0: Add tracking metadata
|
|
140
|
+
entityId: extracted.entity.id
|
|
141
|
+
}
|
|
142
|
+
});
|
|
121
143
|
result.files.push({
|
|
122
144
|
path: entityPath,
|
|
123
145
|
entityId: extracted.entity.id,
|
|
@@ -143,7 +165,9 @@ export class VFSStructureGenerator {
|
|
|
143
165
|
}
|
|
144
166
|
}
|
|
145
167
|
};
|
|
146
|
-
await this.vfs.writeFile(relationshipsPath, JSON.stringify(relationshipsJson, null, 2)
|
|
168
|
+
await this.vfs.writeFile(relationshipsPath, JSON.stringify(relationshipsJson, null, 2), {
|
|
169
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
170
|
+
});
|
|
147
171
|
result.files.push({
|
|
148
172
|
path: relationshipsPath,
|
|
149
173
|
type: 'relationships'
|
|
@@ -180,7 +204,9 @@ export class VFSStructureGenerator {
|
|
|
180
204
|
fileCount: result.files.length
|
|
181
205
|
}
|
|
182
206
|
};
|
|
183
|
-
await this.vfs.writeFile(metadataPath, JSON.stringify(metadataJson, null, 2)
|
|
207
|
+
await this.vfs.writeFile(metadataPath, JSON.stringify(metadataJson, null, 2), {
|
|
208
|
+
metadata: trackingMetadata // v4.10.0: Add tracking metadata
|
|
209
|
+
});
|
|
184
210
|
result.files.push({
|
|
185
211
|
path: metadataPath,
|
|
186
212
|
type: 'metadata'
|
|
@@ -322,6 +322,8 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
322
322
|
} | null>;
|
|
323
323
|
/**
|
|
324
324
|
* Save HNSW system data (entry point, max level)
|
|
325
|
+
*
|
|
326
|
+
* CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
|
|
325
327
|
*/
|
|
326
328
|
saveHNSWSystem(systemData: {
|
|
327
329
|
entryPointId: string | null;
|
|
@@ -1332,44 +1332,69 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
1332
1332
|
*/
|
|
1333
1333
|
async saveHNSWData(nounId, hnswData) {
|
|
1334
1334
|
await this.ensureInitialized();
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1335
|
+
// CRITICAL FIX (v4.7.3): Must preserve existing node data (id, vector) when updating HNSW metadata
|
|
1336
|
+
// Previous implementation overwrote the entire file, destroying vector data
|
|
1337
|
+
// Now we READ the existing node, UPDATE only connections/level, then WRITE back the complete node
|
|
1338
|
+
// CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
|
|
1339
|
+
// Uses Azure Blob ETags with ifMatch preconditions - retries with exponential backoff on conflicts
|
|
1340
|
+
// Prevents data corruption when multiple entities connect to same neighbor simultaneously
|
|
1341
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1342
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1343
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1344
|
+
const maxRetries = 5;
|
|
1345
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1340
1346
|
try {
|
|
1341
|
-
//
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1347
|
+
// Get current ETag and data
|
|
1348
|
+
let currentETag;
|
|
1349
|
+
let existingNode = {};
|
|
1350
|
+
try {
|
|
1351
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
1352
|
+
const existingData = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1353
|
+
existingNode = JSON.parse(existingData.toString());
|
|
1354
|
+
currentETag = downloadResponse.etag;
|
|
1355
|
+
}
|
|
1356
|
+
catch (error) {
|
|
1357
|
+
// File doesn't exist yet - will create new
|
|
1358
|
+
if (error.statusCode !== 404 && error.code !== 'BlobNotFound') {
|
|
1359
|
+
throw error;
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1345
1362
|
// Preserve id and vector, update only HNSW graph metadata
|
|
1346
1363
|
const updatedNode = {
|
|
1347
|
-
...existingNode,
|
|
1364
|
+
...existingNode, // Preserve all existing fields (id, vector, etc.)
|
|
1348
1365
|
level: hnswData.level,
|
|
1349
1366
|
connections: hnswData.connections
|
|
1350
1367
|
};
|
|
1351
1368
|
const content = JSON.stringify(updatedNode, null, 2);
|
|
1369
|
+
// ATOMIC WRITE: Use ETag precondition
|
|
1370
|
+
// If currentETag exists, only write if ETag matches (no concurrent modification)
|
|
1371
|
+
// If no ETag, only write if blob doesn't exist (ifNoneMatch: *)
|
|
1352
1372
|
await blockBlobClient.upload(content, content.length, {
|
|
1353
|
-
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
1373
|
+
blobHTTPHeaders: { blobContentType: 'application/json' },
|
|
1374
|
+
conditions: currentETag
|
|
1375
|
+
? { ifMatch: currentETag }
|
|
1376
|
+
: { ifNoneMatch: '*' } // Only create if doesn't exist
|
|
1354
1377
|
});
|
|
1378
|
+
// Success! Exit retry loop
|
|
1379
|
+
return;
|
|
1355
1380
|
}
|
|
1356
1381
|
catch (error) {
|
|
1357
|
-
//
|
|
1358
|
-
if (error.statusCode ===
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
}
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1382
|
+
// Precondition failed - concurrent modification detected
|
|
1383
|
+
if (error.statusCode === 412 || error.code === 'ConditionNotMet') {
|
|
1384
|
+
if (attempt === maxRetries - 1) {
|
|
1385
|
+
this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
|
|
1386
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
|
|
1387
|
+
}
|
|
1388
|
+
// Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
|
|
1389
|
+
const backoffMs = 50 * Math.pow(2, attempt);
|
|
1390
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
1391
|
+
continue;
|
|
1366
1392
|
}
|
|
1393
|
+
// Other error - rethrow
|
|
1394
|
+
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1395
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1367
1396
|
}
|
|
1368
1397
|
}
|
|
1369
|
-
catch (error) {
|
|
1370
|
-
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1371
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1372
|
-
}
|
|
1373
1398
|
}
|
|
1374
1399
|
/**
|
|
1375
1400
|
* Get HNSW graph data for a noun
|
|
@@ -1394,20 +1419,54 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
1394
1419
|
}
|
|
1395
1420
|
/**
|
|
1396
1421
|
* Save HNSW system data (entry point, max level)
|
|
1422
|
+
*
|
|
1423
|
+
* CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
|
|
1397
1424
|
*/
|
|
1398
1425
|
async saveHNSWSystem(systemData) {
|
|
1399
1426
|
await this.ensureInitialized();
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1427
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1428
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1429
|
+
const maxRetries = 5;
|
|
1430
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1431
|
+
try {
|
|
1432
|
+
// Get current ETag
|
|
1433
|
+
let currentETag;
|
|
1434
|
+
try {
|
|
1435
|
+
const properties = await blockBlobClient.getProperties();
|
|
1436
|
+
currentETag = properties.etag;
|
|
1437
|
+
}
|
|
1438
|
+
catch (error) {
|
|
1439
|
+
// File doesn't exist yet
|
|
1440
|
+
if (error.statusCode !== 404 && error.code !== 'BlobNotFound') {
|
|
1441
|
+
throw error;
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
const content = JSON.stringify(systemData, null, 2);
|
|
1445
|
+
// ATOMIC WRITE: Use ETag precondition
|
|
1446
|
+
await blockBlobClient.upload(content, content.length, {
|
|
1447
|
+
blobHTTPHeaders: { blobContentType: 'application/json' },
|
|
1448
|
+
conditions: currentETag
|
|
1449
|
+
? { ifMatch: currentETag }
|
|
1450
|
+
: { ifNoneMatch: '*' }
|
|
1451
|
+
});
|
|
1452
|
+
// Success!
|
|
1453
|
+
return;
|
|
1454
|
+
}
|
|
1455
|
+
catch (error) {
|
|
1456
|
+
// Precondition failed - concurrent modification
|
|
1457
|
+
if (error.statusCode === 412 || error.code === 'ConditionNotMet') {
|
|
1458
|
+
if (attempt === maxRetries - 1) {
|
|
1459
|
+
this.logger.error(`Max retries (${maxRetries}) exceeded for HNSW system data`);
|
|
1460
|
+
throw new Error('Failed to save HNSW system data: max retries exceeded due to concurrent modifications');
|
|
1461
|
+
}
|
|
1462
|
+
const backoffMs = 50 * Math.pow(2, attempt);
|
|
1463
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
1464
|
+
continue;
|
|
1465
|
+
}
|
|
1466
|
+
// Other error - rethrow
|
|
1467
|
+
this.logger.error('Failed to save HNSW system data:', error);
|
|
1468
|
+
throw new Error(`Failed to save HNSW system data: ${error}`);
|
|
1469
|
+
}
|
|
1411
1470
|
}
|
|
1412
1471
|
}
|
|
1413
1472
|
/**
|
|
@@ -391,6 +391,8 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
391
391
|
} | null>;
|
|
392
392
|
/**
|
|
393
393
|
* Save HNSW system data (entry point, max level)
|
|
394
|
+
*
|
|
395
|
+
* CRITICAL FIX (v4.10.1): Atomic write to prevent race conditions during concurrent updates
|
|
394
396
|
*/
|
|
395
397
|
saveHNSWSystem(systemData: {
|
|
396
398
|
entryPointId: string | null;
|