@soulcraft/brainy 4.10.3 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,101 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [4.11.0](https://github.com/soulcraftlabs/brainy/compare/v4.10.4...v4.11.0) (2025-10-30)
6
+
7
+ ### 🚨 CRITICAL BUG FIX
8
+
9
+ **DataAPI.restore() Complete Data Loss Bug Fixed**
10
+
11
+ Previous versions (v4.10.4 and earlier) had a critical bug where `DataAPI.restore()` did NOT persist data to storage, causing complete data loss after instance restart or cache clear. **If you used backup/restore in v4.10.4 or earlier, your restored data was NOT saved.**
12
+
13
+ ### 🔧 What Was Fixed
14
+
15
+ * **fix(api)**: DataAPI.restore() now properly persists data to all storage adapters
16
+ - **Root Cause**: restore() called `storage.saveNoun()` directly, bypassing all indexes and proper persistence
17
+ - **Fix**: Now uses `brain.addMany()` and `brain.relateMany()` (proper persistence path)
18
+ - **Result**: Data now survives instance restart and is fully indexed/searchable
19
+
20
+ ### ✨ Improvements
21
+
22
+ * **feat(api)**: Enhanced restore() with progress reporting and error tracking
23
+ - **New Return Type**: Returns `{ entitiesRestored, relationshipsRestored, errors }` instead of `void`
24
+ - **Progress Callback**: Optional `onProgress(completed, total)` parameter for UI updates
25
+ - **Error Details**: Returns array of failed entities/relations with error messages
26
+ - **Verification**: Automatically verifies first entity is retrievable after restore
27
+
28
+ * **feat(api)**: Cross-storage restore support
29
+ - Backup from any storage adapter, restore to any other
30
+ - Example: Backup from GCS → Restore to Filesystem
31
+ - Automatically uses target storage's optimal batch configuration
32
+
33
+ * **perf(api)**: Storage-aware batching for restore operations
34
+ - Leverages v4.10.4's storage-aware batching (10-100x faster on cloud storage)
35
+ - Automatic backpressure management prevents circuit breaker activation
36
+ - Separate read/write circuit breakers (backup can run during restore throttling)
37
+
38
+ ### 📊 What's Now Guaranteed
39
+
40
+ | Feature | v4.10.4 | v4.11.0 |
41
+ |---------|---------|---------|
42
+ | Data Persists to Storage | ❌ No | ✅ Yes |
43
+ | Data Survives Restart | ❌ No | ✅ Yes |
44
+ | HNSW Index Updated | ❌ No | ✅ Yes |
45
+ | Metadata Index Updated | ❌ No | ✅ Yes |
46
+ | Searchable After Restore | ❌ No | ✅ Yes |
47
+ | Progress Reporting | ❌ No | ✅ Yes |
48
+ | Error Tracking | ❌ Silent | ✅ Detailed |
49
+ | Cross-Storage Support | ❌ No | ✅ Yes |
50
+
51
+ ### 🔄 Migration Guide
52
+
53
+ **No code changes required!** The fix is backward compatible:
54
+
55
+ ```typescript
56
+ // Old code (still works)
57
+ await brain.data().restore({ backup, overwrite: true })
58
+
59
+ // New code (with progress tracking)
60
+ const result = await brain.data().restore({
61
+ backup,
62
+ overwrite: true,
63
+ onProgress: (done, total) => {
64
+ console.log(`Restoring... ${done}/${total}`)
65
+ }
66
+ })
67
+
68
+ console.log(`✅ Restored ${result.entitiesRestored} entities`)
69
+ if (result.errors.length > 0) {
70
+ console.warn(`⚠️ ${result.errors.length} failures`)
71
+ }
72
+ ```
73
+
74
+ ### ⚠️ Breaking Changes (Minor API Change)
75
+
76
+ * **DataAPI.restore()** return type changed from `Promise<void>` to `Promise<{ entitiesRestored, relationshipsRestored, errors }>`
77
+ - Impact: Minimal - most code doesn't use the return value
78
+ - Fix: Remove explicit `Promise<void>` type annotations if present
79
+
80
+ ### 📝 Files Modified
81
+
82
+ * `src/api/DataAPI.ts` - Complete rewrite of restore() method (lines 161-338)
83
+
84
+ ### [4.10.4](https://github.com/soulcraftlabs/brainy/compare/v4.10.3...v4.10.4) (2025-10-30)
85
+
86
+ * fix: prevent circuit breaker activation and data loss during bulk imports
87
+ - Storage-aware batching system prevents rate limiting on cloud storage (GCS, S3, R2, Azure)
88
+ - Separate read/write circuit breakers prevent read lockouts during write throttling
89
+ - ImportCoordinator uses addMany()/relateMany() for 10-100x performance improvement
90
+ - Fixes silent data loss and 30+ second lockouts on 1000+ row imports
91
+
92
+ ### [4.10.3](https://github.com/soulcraftlabs/brainy/compare/v4.10.2...v4.10.3) (2025-10-29)
93
+
94
+ * fix: add atomic writes to ALL file operations to prevent concurrent write corruption
95
+
96
+ ### [4.10.2](https://github.com/soulcraftlabs/brainy/compare/v4.10.1...v4.10.2) (2025-10-29)
97
+
98
+ * fix: VFS not initialized during Excel import, causing 0 files accessible
99
+
5
100
  ### [4.10.1](https://github.com/soulcraftlabs/brainy/compare/v4.10.0...v4.10.1) (2025-10-29)
6
101
 
7
102
  - fix: add mutex locks to FileSystemStorage for HNSW concurrency (CRITICAL) (ff86e88)
@@ -81,13 +81,31 @@ export declare class DataAPI {
81
81
  }>;
82
82
  /**
83
83
  * Restore data from a backup
84
+ *
85
+ * v4.11.1: CRITICAL FIX - Now uses brain.addMany() and brain.relateMany()
86
+ * Previous implementation only wrote to storage cache without updating indexes,
87
+ * causing complete data loss on restart. This fix ensures:
88
+ * - All 5 indexes updated (HNSW, metadata, adjacency, sparse, type-aware)
89
+ * - Proper persistence to disk/cloud storage
90
+ * - Storage-aware batching for optimal performance
91
+ * - Atomic writes to prevent corruption
92
+ * - Data survives instance restart
84
93
  */
85
94
  restore(params: {
86
95
  backup: BackupData;
87
96
  merge?: boolean;
88
97
  overwrite?: boolean;
89
98
  validate?: boolean;
90
- }): Promise<void>;
99
+ onProgress?: (completed: number, total: number) => void;
100
+ }): Promise<{
101
+ entitiesRestored: number;
102
+ relationshipsRestored: number;
103
+ errors: Array<{
104
+ type: 'entity' | 'relation';
105
+ id: string;
106
+ error: string;
107
+ }>;
108
+ }>;
91
109
  /**
92
110
  * Clear data
93
111
  */
@@ -75,89 +75,150 @@ export class DataAPI {
75
75
  }
76
76
  /**
77
77
  * Restore data from a backup
78
+ *
79
+ * v4.11.1: CRITICAL FIX - Now uses brain.addMany() and brain.relateMany()
80
+ * Previous implementation only wrote to storage cache without updating indexes,
81
+ * causing complete data loss on restart. This fix ensures:
82
+ * - All 5 indexes updated (HNSW, metadata, adjacency, sparse, type-aware)
83
+ * - Proper persistence to disk/cloud storage
84
+ * - Storage-aware batching for optimal performance
85
+ * - Atomic writes to prevent corruption
86
+ * - Data survives instance restart
78
87
  */
79
88
  async restore(params) {
80
- const { backup, merge = false, overwrite = false, validate = true } = params;
89
+ const { backup, merge = false, overwrite = false, validate = true, onProgress } = params;
90
+ const result = {
91
+ entitiesRestored: 0,
92
+ relationshipsRestored: 0,
93
+ errors: []
94
+ };
81
95
  // Validate backup format
82
96
  if (validate) {
83
97
  if (!backup.version || !backup.entities || !backup.relations) {
84
- throw new Error('Invalid backup format');
98
+ throw new Error('Invalid backup format: missing version, entities, or relations');
85
99
  }
86
100
  }
101
+ // Validate brain instance is available (required for v4.11.1+ restore)
102
+ if (!this.brain) {
103
+ throw new Error('Restore requires brain instance. DataAPI must be initialized with brain reference. ' +
104
+ 'Use: await brain.data() instead of constructing DataAPI directly.');
105
+ }
87
106
  // Clear existing data if not merging
88
107
  if (!merge && overwrite) {
89
108
  await this.clear({ entities: true, relations: true });
90
109
  }
91
- // Restore entities
92
- for (const entity of backup.entities) {
110
+ // ============================================
111
+ // Phase 1: Restore entities using addMany()
112
+ // v4.11.1: Uses proper persistence path through brain.addMany()
113
+ // ============================================
114
+ // Prepare entity parameters for addMany()
115
+ const entityParams = backup.entities
116
+ .filter(entity => {
117
+ // Skip existing entities when merging without overwrite
118
+ if (merge && !overwrite) {
119
+ // Note: We'll rely on addMany's internal duplicate handling
120
+ // rather than checking each entity individually (performance)
121
+ return true;
122
+ }
123
+ return true;
124
+ })
125
+ .map(entity => {
126
+ // Extract data field from metadata (backup format compatibility)
127
+ // Backup stores the original data in metadata.data
128
+ const data = entity.metadata?.data || entity.id;
129
+ return {
130
+ id: entity.id,
131
+ data, // Required field for brainy.add()
132
+ type: entity.type,
133
+ metadata: entity.metadata || {},
134
+ vector: entity.vector, // Preserve original vectors from backup
135
+ service: entity.service,
136
+ // Preserve confidence and weight if available
137
+ confidence: entity.metadata?.confidence,
138
+ weight: entity.metadata?.weight
139
+ };
140
+ });
141
+ // Restore entities in batches using storage-aware batching (v4.11.0)
142
+ if (entityParams.length > 0) {
93
143
  try {
94
- // v4.0.0: Prepare noun and metadata separately
95
- const noun = {
96
- id: entity.id,
97
- vector: entity.vector || new Array(384).fill(0), // Default vector if missing
98
- connections: new Map(),
99
- level: 0
100
- };
101
- const metadata = {
102
- ...entity.metadata,
103
- noun: entity.type,
104
- service: entity.service,
105
- createdAt: Date.now()
106
- };
107
- // Check if entity exists when merging
108
- if (merge) {
109
- const existing = await this.storage.getNoun(entity.id);
110
- if (existing && !overwrite) {
111
- continue; // Skip existing entities unless overwriting
144
+ const addResult = await this.brain.addMany({
145
+ items: entityParams,
146
+ continueOnError: true,
147
+ onProgress: (done, total) => {
148
+ onProgress?.(done, backup.entities.length + backup.relations.length);
112
149
  }
113
- }
114
- await this.storage.saveNoun(noun);
115
- await this.storage.saveNounMetadata(entity.id, metadata);
150
+ });
151
+ result.entitiesRestored = addResult.successful.length;
152
+ // Track errors
153
+ addResult.failed.forEach((failure) => {
154
+ result.errors.push({
155
+ type: 'entity',
156
+ id: failure.item?.id || 'unknown',
157
+ error: failure.error || 'Unknown error'
158
+ });
159
+ });
116
160
  }
117
161
  catch (error) {
118
- console.error(`Failed to restore entity ${entity.id}:`, error);
162
+ throw new Error(`Failed to restore entities: ${error.message}`);
119
163
  }
120
164
  }
121
- // Restore relations
122
- for (const relation of backup.relations) {
165
+ // ============================================
166
+ // Phase 2: Restore relationships using relateMany()
167
+ // v4.11.1: Uses proper persistence path through brain.relateMany()
168
+ // ============================================
169
+ // Prepare relationship parameters for relateMany()
170
+ const relationParams = backup.relations
171
+ .filter(relation => {
172
+ // Skip existing relations when merging without overwrite
173
+ if (merge && !overwrite) {
174
+ // Note: We'll rely on relateMany's internal duplicate handling
175
+ return true;
176
+ }
177
+ return true;
178
+ })
179
+ .map(relation => ({
180
+ from: relation.from,
181
+ to: relation.to,
182
+ type: relation.type,
183
+ metadata: relation.metadata || {},
184
+ weight: relation.weight || 1.0
185
+ // Note: relation.id is ignored - brain.relate() generates new IDs
186
+ // This is intentional to avoid ID conflicts
187
+ }));
188
+ // Restore relationships in batches using storage-aware batching (v4.11.0)
189
+ if (relationParams.length > 0) {
123
190
  try {
124
- // Get source and target entities to compute relation vector
125
- const sourceNoun = await this.storage.getNoun(relation.from);
126
- const targetNoun = await this.storage.getNoun(relation.to);
127
- if (!sourceNoun || !targetNoun) {
128
- console.warn(`Skipping relation ${relation.id}: missing entities`);
129
- continue;
130
- }
131
- // Compute relation vector as average of source and target
132
- const relationVector = sourceNoun.vector.map((v, i) => (v + targetNoun.vector[i]) / 2);
133
- // v4.0.0: Prepare verb and metadata separately
134
- const verb = {
135
- id: relation.id,
136
- vector: relationVector,
137
- connections: new Map(),
138
- verb: relation.type,
139
- sourceId: relation.from,
140
- targetId: relation.to
141
- };
142
- const verbMetadata = {
143
- weight: relation.weight,
144
- ...relation.metadata,
145
- createdAt: Date.now()
146
- };
147
- // Check if relation exists when merging
148
- if (merge) {
149
- const existing = await this.storage.getVerb(relation.id);
150
- if (existing && !overwrite) {
151
- continue;
152
- }
153
- }
154
- await this.storage.saveVerb(verb);
155
- await this.storage.saveVerbMetadata(relation.id, verbMetadata);
191
+ const relateResult = await this.brain.relateMany({
192
+ items: relationParams,
193
+ continueOnError: true
194
+ });
195
+ result.relationshipsRestored = relateResult.successful.length;
196
+ // Track errors
197
+ relateResult.failed.forEach((failure) => {
198
+ result.errors.push({
199
+ type: 'relation',
200
+ id: failure.item?.from + '->' + failure.item?.to || 'unknown',
201
+ error: failure.error || 'Unknown error'
202
+ });
203
+ });
156
204
  }
157
205
  catch (error) {
158
- console.error(`Failed to restore relation ${relation.id}:`, error);
206
+ throw new Error(`Failed to restore relationships: ${error.message}`);
207
+ }
208
+ }
209
+ // ============================================
210
+ // Phase 3: Verify restoration succeeded
211
+ // ============================================
212
+ // Sample verification: Check that first entity is actually retrievable
213
+ if (backup.entities.length > 0 && result.entitiesRestored > 0) {
214
+ const firstEntityId = backup.entities[0].id;
215
+ const verified = await this.brain.get(firstEntityId);
216
+ if (!verified) {
217
+ console.warn(`⚠️ Restore completed but verification failed - entity ${firstEntityId} not retrievable. ` +
218
+ `This may indicate a persistence issue with the storage adapter.`);
159
219
  }
160
220
  }
221
+ return result;
161
222
  }
162
223
  /**
163
224
  * Clear data
package/dist/brainy.js CHANGED
@@ -1517,6 +1517,16 @@ export class Brainy {
1517
1517
  */
1518
1518
  async addMany(params) {
1519
1519
  await this.ensureInitialized();
1520
+ // Get optimal batch configuration from storage adapter (v4.11.0)
1521
+ // This automatically adapts to storage characteristics:
1522
+ // - GCS: 50 batch size, 100ms delay, sequential
1523
+ // - S3/R2: 100 batch size, 50ms delay, parallel
1524
+ // - Memory: 1000 batch size, 0ms delay, parallel
1525
+ const storageConfig = this.storage.getBatchConfig();
1526
+ // Use storage preferences (allow explicit user override)
1527
+ const batchSize = params.chunkSize ?? storageConfig.maxBatchSize;
1528
+ const parallel = params.parallel ?? storageConfig.supportsParallelWrites;
1529
+ const delayMs = storageConfig.batchDelayMs;
1520
1530
  const result = {
1521
1531
  successful: [],
1522
1532
  failed: [],
@@ -1524,10 +1534,10 @@ export class Brainy {
1524
1534
  duration: 0
1525
1535
  };
1526
1536
  const startTime = Date.now();
1527
- const chunkSize = params.chunkSize || 100;
1528
- // Process in chunks
1529
- for (let i = 0; i < params.items.length; i += chunkSize) {
1530
- const chunk = params.items.slice(i, i + chunkSize);
1537
+ let lastBatchTime = Date.now();
1538
+ // Process in batches
1539
+ for (let i = 0; i < params.items.length; i += batchSize) {
1540
+ const chunk = params.items.slice(i, i + batchSize);
1531
1541
  const promises = chunk.map(async (item) => {
1532
1542
  try {
1533
1543
  const id = await this.add(item);
@@ -1543,18 +1553,29 @@ export class Brainy {
1543
1553
  }
1544
1554
  }
1545
1555
  });
1546
- if (params.parallel !== false) {
1556
+ // Parallel vs Sequential based on storage preference
1557
+ if (parallel) {
1547
1558
  await Promise.allSettled(promises);
1548
1559
  }
1549
1560
  else {
1561
+ // Sequential processing for rate-limited storage
1550
1562
  for (const promise of promises) {
1551
1563
  await promise;
1552
1564
  }
1553
1565
  }
1554
- // Report progress
1566
+ // Progress callback
1555
1567
  if (params.onProgress) {
1556
1568
  params.onProgress(result.successful.length + result.failed.length, result.total);
1557
1569
  }
1570
+ // Adaptive delay between batches
1571
+ if (i + batchSize < params.items.length && delayMs > 0) {
1572
+ const batchDuration = Date.now() - lastBatchTime;
1573
+ // If batch was too fast, add delay to respect rate limits
1574
+ if (batchDuration < delayMs) {
1575
+ await new Promise(resolve => setTimeout(resolve, delayMs - batchDuration));
1576
+ }
1577
+ lastBatchTime = Date.now();
1578
+ }
1558
1579
  }
1559
1580
  result.duration = Date.now() - startTime;
1560
1581
  return result;
@@ -1655,6 +1676,13 @@ export class Brainy {
1655
1676
  */
1656
1677
  async relateMany(params) {
1657
1678
  await this.ensureInitialized();
1679
+ // Get optimal batch configuration from storage adapter (v4.11.0)
1680
+ // Automatically adapts to storage characteristics
1681
+ const storageConfig = this.storage.getBatchConfig();
1682
+ // Use storage preferences (allow explicit user override)
1683
+ const batchSize = params.chunkSize ?? storageConfig.maxBatchSize;
1684
+ const parallel = params.parallel ?? storageConfig.supportsParallelWrites;
1685
+ const delayMs = storageConfig.batchDelayMs;
1658
1686
  const result = {
1659
1687
  successful: [],
1660
1688
  failed: [],
@@ -1662,11 +1690,11 @@ export class Brainy {
1662
1690
  duration: 0
1663
1691
  };
1664
1692
  const startTime = Date.now();
1665
- const chunkSize = params.chunkSize || 100;
1666
- for (let i = 0; i < params.items.length; i += chunkSize) {
1667
- const chunk = params.items.slice(i, i + chunkSize);
1668
- if (params.parallel) {
1669
- // Process chunk in parallel
1693
+ let lastBatchTime = Date.now();
1694
+ for (let i = 0; i < params.items.length; i += batchSize) {
1695
+ const chunk = params.items.slice(i, i + batchSize);
1696
+ if (parallel) {
1697
+ // Parallel processing
1670
1698
  const promises = chunk.map(async (item) => {
1671
1699
  try {
1672
1700
  const relationId = await this.relate(item);
@@ -1682,10 +1710,10 @@ export class Brainy {
1682
1710
  }
1683
1711
  }
1684
1712
  });
1685
- await Promise.all(promises);
1713
+ await Promise.allSettled(promises);
1686
1714
  }
1687
1715
  else {
1688
- // Process chunk sequentially
1716
+ // Sequential processing
1689
1717
  for (const item of chunk) {
1690
1718
  try {
1691
1719
  const relationId = await this.relate(item);
@@ -1702,10 +1730,18 @@ export class Brainy {
1702
1730
  }
1703
1731
  }
1704
1732
  }
1705
- // Report progress
1733
+ // Progress callback
1706
1734
  if (params.onProgress) {
1707
1735
  params.onProgress(result.successful.length + result.failed.length, result.total);
1708
1736
  }
1737
+ // Adaptive delay
1738
+ if (i + batchSize < params.items.length && delayMs > 0) {
1739
+ const batchDuration = Date.now() - lastBatchTime;
1740
+ if (batchDuration < delayMs) {
1741
+ await new Promise(resolve => setTimeout(resolve, delayMs - batchDuration));
1742
+ }
1743
+ lastBatchTime = Date.now();
1744
+ }
1709
1745
  }
1710
1746
  result.duration = Date.now() - startTime;
1711
1747
  return result.successful;