@soulcraft/brainy 5.7.5 β 5.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +44 -0
- package/dist/brainy.d.ts +90 -3
- package/dist/brainy.js +195 -46
- package/dist/index.d.ts +6 -0
- package/dist/index.js +4 -0
- package/package.json +13 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [5.7.7](https://github.com/soulcraftlabs/brainy/compare/v5.7.6...v5.7.7) (2025-11-13)
|
|
6
|
+
|
|
7
|
+
- docs: update index architecture documentation for v5.7.7 lazy loading (67039fc)
|
|
8
|
+
|
|
9
|
+
|
|
5
10
|
### [5.7.4](https://github.com/soulcraftlabs/brainy/compare/v5.7.3...v5.7.4) (2025-11-12)
|
|
6
11
|
|
|
7
12
|
- fix: resolve v5.7.3 race condition by persisting write-through cache (v5.7.4) (6e19ec8)
|
package/README.md
CHANGED
|
@@ -135,6 +135,50 @@ const results = await brain.find({
|
|
|
135
135
|
|
|
136
136
|
---
|
|
137
137
|
|
|
138
|
+
## Entity Extraction (NEW in v5.7.6)
|
|
139
|
+
|
|
140
|
+
**Extract entities from text with AI-powered classification:**
|
|
141
|
+
|
|
142
|
+
```javascript
|
|
143
|
+
import { Brainy, NounType } from '@soulcraft/brainy'
|
|
144
|
+
|
|
145
|
+
const brain = new Brainy()
|
|
146
|
+
await brain.init()
|
|
147
|
+
|
|
148
|
+
// Extract all entities
|
|
149
|
+
const entities = await brain.extractEntities('John Smith founded Acme Corp in New York')
|
|
150
|
+
// Returns:
|
|
151
|
+
// [
|
|
152
|
+
// { text: 'John Smith', type: NounType.Person, confidence: 0.95 },
|
|
153
|
+
// { text: 'Acme Corp', type: NounType.Organization, confidence: 0.92 },
|
|
154
|
+
// { text: 'New York', type: NounType.Location, confidence: 0.88 }
|
|
155
|
+
// ]
|
|
156
|
+
|
|
157
|
+
// Extract with filters
|
|
158
|
+
const people = await brain.extractEntities(resume, {
|
|
159
|
+
types: [NounType.Person],
|
|
160
|
+
confidence: 0.8
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
// Advanced: Direct access to extractors
|
|
164
|
+
import { SmartExtractor } from '@soulcraft/brainy'
|
|
165
|
+
|
|
166
|
+
const extractor = new SmartExtractor(brain, { minConfidence: 0.7 })
|
|
167
|
+
const result = await extractor.extract('CEO', {
|
|
168
|
+
formatContext: { format: 'excel', columnHeader: 'Title' }
|
|
169
|
+
})
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Features:**
|
|
173
|
+
- π― **4-Signal Ensemble** - ExactMatch (40%) + Embedding (35%) + Pattern (20%) + Context (5%)
|
|
174
|
+
- π **Format Intelligence** - Adapts to Excel, CSV, PDF, YAML, DOCX, JSON, Markdown
|
|
175
|
+
- β‘ **Fast** - ~15-20ms per extraction with LRU caching
|
|
176
|
+
- π **42 Types** - Person, Organization, Location, Document, and 38 more
|
|
177
|
+
|
|
178
|
+
**β [Neural Extraction Guide](docs/neural-extraction.md)** | **[Import Preview Mode](docs/neural-extraction.md#import-preview-mode)**
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
138
182
|
## From Prototype to Planet Scale
|
|
139
183
|
|
|
140
184
|
**The same API. Zero rewrites. Any scale.**
|
package/dist/brainy.d.ts
CHANGED
|
@@ -44,6 +44,9 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
44
44
|
private _vfs?;
|
|
45
45
|
private initialized;
|
|
46
46
|
private dimensions?;
|
|
47
|
+
private lazyRebuildInProgress;
|
|
48
|
+
private lazyRebuildCompleted;
|
|
49
|
+
private lazyRebuildPromise;
|
|
47
50
|
constructor(config?: BrainyConfig);
|
|
48
51
|
/**
|
|
49
52
|
* Initialize Brainy - MUST be called before use
|
|
@@ -1075,6 +1078,35 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1075
1078
|
includeVectors?: boolean;
|
|
1076
1079
|
neuralMatching?: boolean;
|
|
1077
1080
|
}): Promise<ExtractedEntity[]>;
|
|
1081
|
+
/**
|
|
1082
|
+
* Extract entities from text (alias for extract())
|
|
1083
|
+
* v5.7.6: Added for API clarity and Workshop team request
|
|
1084
|
+
*
|
|
1085
|
+
* Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
|
|
1086
|
+
* - ExactMatch (40%) - Dictionary lookups
|
|
1087
|
+
* - Embedding (35%) - Semantic similarity
|
|
1088
|
+
* - Pattern (20%) - Regex patterns
|
|
1089
|
+
* - Context (5%) - Contextual hints
|
|
1090
|
+
*
|
|
1091
|
+
* @param text - Text to extract entities from
|
|
1092
|
+
* @param options - Extraction options
|
|
1093
|
+
* @returns Array of extracted entities with types and confidence scores
|
|
1094
|
+
*
|
|
1095
|
+
* @example
|
|
1096
|
+
* ```typescript
|
|
1097
|
+
* const entities = await brain.extractEntities('John Smith founded Acme Corp', {
|
|
1098
|
+
* confidence: 0.7,
|
|
1099
|
+
* types: [NounType.Person, NounType.Organization],
|
|
1100
|
+
* neuralMatching: true
|
|
1101
|
+
* })
|
|
1102
|
+
* ```
|
|
1103
|
+
*/
|
|
1104
|
+
extractEntities(text: string, options?: {
|
|
1105
|
+
types?: NounType[];
|
|
1106
|
+
confidence?: number;
|
|
1107
|
+
includeVectors?: boolean;
|
|
1108
|
+
neuralMatching?: boolean;
|
|
1109
|
+
}): Promise<ExtractedEntity[]>;
|
|
1078
1110
|
/**
|
|
1079
1111
|
* Extract concepts from text
|
|
1080
1112
|
*
|
|
@@ -1365,6 +1397,41 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1365
1397
|
* })
|
|
1366
1398
|
*/
|
|
1367
1399
|
flush(): Promise<void>;
|
|
1400
|
+
/**
|
|
1401
|
+
* Get index loading status (v5.7.7 - Diagnostic for lazy loading)
|
|
1402
|
+
*
|
|
1403
|
+
* Returns detailed information about index population and lazy loading state.
|
|
1404
|
+
* Useful for debugging empty query results or performance troubleshooting.
|
|
1405
|
+
*
|
|
1406
|
+
* @example
|
|
1407
|
+
* ```typescript
|
|
1408
|
+
* const status = await brain.getIndexStatus()
|
|
1409
|
+
* console.log(`HNSW Index: ${status.hnswIndex.size} entities`)
|
|
1410
|
+
* console.log(`Metadata Index: ${status.metadataIndex.entries} entries`)
|
|
1411
|
+
* console.log(`Graph Index: ${status.graphIndex.relationships} relationships`)
|
|
1412
|
+
* console.log(`Lazy rebuild completed: ${status.lazyRebuildCompleted}`)
|
|
1413
|
+
* ```
|
|
1414
|
+
*/
|
|
1415
|
+
getIndexStatus(): Promise<{
|
|
1416
|
+
initialized: boolean;
|
|
1417
|
+
lazyRebuildCompleted: boolean;
|
|
1418
|
+
disableAutoRebuild: boolean;
|
|
1419
|
+
hnswIndex: {
|
|
1420
|
+
size: number;
|
|
1421
|
+
populated: boolean;
|
|
1422
|
+
};
|
|
1423
|
+
metadataIndex: {
|
|
1424
|
+
entries: number;
|
|
1425
|
+
populated: boolean;
|
|
1426
|
+
};
|
|
1427
|
+
graphIndex: {
|
|
1428
|
+
relationships: number;
|
|
1429
|
+
populated: boolean;
|
|
1430
|
+
};
|
|
1431
|
+
storage: {
|
|
1432
|
+
totalEntities: number;
|
|
1433
|
+
};
|
|
1434
|
+
}>;
|
|
1368
1435
|
/**
|
|
1369
1436
|
* Efficient Pagination API - Production-scale pagination using index-first approach
|
|
1370
1437
|
* Automatically optimizes based on query type and applies pagination at the index level
|
|
@@ -1654,22 +1721,42 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
1654
1721
|
*/
|
|
1655
1722
|
private normalizeConfig;
|
|
1656
1723
|
/**
|
|
1657
|
-
*
|
|
1724
|
+
* Ensure indexes are loaded (v5.7.7 - Production-scale lazy loading)
|
|
1725
|
+
*
|
|
1726
|
+
* Called by query methods (find, search, get, etc.) when disableAutoRebuild is true.
|
|
1727
|
+
* Handles concurrent queries safely - multiple calls wait for same rebuild.
|
|
1728
|
+
*
|
|
1729
|
+
* Performance:
|
|
1730
|
+
* - First query: Triggers rebuild (~50-200ms for 1K-10K entities)
|
|
1731
|
+
* - Concurrent queries: Wait for same rebuild (no duplicate work)
|
|
1732
|
+
* - Subsequent queries: Instant (0ms check, indexes already loaded)
|
|
1733
|
+
*
|
|
1734
|
+
* Production scale:
|
|
1735
|
+
* - 1K entities: ~50ms
|
|
1736
|
+
* - 10K entities: ~200ms
|
|
1737
|
+
* - 100K entities: ~2s (streaming pagination)
|
|
1738
|
+
* - 1M+ entities: Uses chunked lazy loading (per-type on demand)
|
|
1658
1739
|
*/
|
|
1740
|
+
private ensureIndexesLoaded;
|
|
1659
1741
|
/**
|
|
1660
|
-
* Rebuild indexes from persisted data if needed (v3.35.0
|
|
1742
|
+
* Rebuild indexes from persisted data if needed (v3.35.0+, v5.7.7 LAZY LOADING)
|
|
1661
1743
|
*
|
|
1662
1744
|
* FIXES FOR CRITICAL BUGS:
|
|
1663
1745
|
* - Bug #1: GraphAdjacencyIndex rebuild never called β
FIXED
|
|
1664
1746
|
* - Bug #2: Early return blocks recovery when count=0 β
FIXED
|
|
1665
1747
|
* - Bug #4: HNSW index has no rebuild mechanism β
FIXED
|
|
1748
|
+
* - Bug #5: disableAutoRebuild leaves indexes empty forever β
FIXED (v5.7.7)
|
|
1666
1749
|
*
|
|
1667
1750
|
* Production-grade rebuild with:
|
|
1668
|
-
* - Handles
|
|
1751
|
+
* - Handles BILLIONS of entities via streaming pagination
|
|
1669
1752
|
* - Smart threshold-based decisions (auto-rebuild < 1000 items)
|
|
1753
|
+
* - Lazy loading on first query (when disableAutoRebuild: true)
|
|
1670
1754
|
* - Progress reporting for large datasets
|
|
1671
1755
|
* - Parallel index rebuilds for performance
|
|
1672
1756
|
* - Robust error recovery (continues on partial failures)
|
|
1757
|
+
* - Concurrency-safe (multiple queries wait for same rebuild)
|
|
1758
|
+
*
|
|
1759
|
+
* @param force - Force rebuild even if disableAutoRebuild is true (for lazy loading)
|
|
1673
1760
|
*/
|
|
1674
1761
|
private rebuildIndexesIfNeeded;
|
|
1675
1762
|
/**
|
package/dist/brainy.js
CHANGED
|
@@ -35,6 +35,11 @@ export class Brainy {
|
|
|
35
35
|
constructor(config) {
|
|
36
36
|
// State
|
|
37
37
|
this.initialized = false;
|
|
38
|
+
// Lazy rebuild state (v5.7.7 - Production-scale lazy loading)
|
|
39
|
+
// Prevents race conditions when multiple queries trigger rebuild simultaneously
|
|
40
|
+
this.lazyRebuildInProgress = false;
|
|
41
|
+
this.lazyRebuildCompleted = false;
|
|
42
|
+
this.lazyRebuildPromise = null;
|
|
38
43
|
// Normalize configuration with defaults
|
|
39
44
|
this.config = this.normalizeConfig(config);
|
|
40
45
|
// Setup core components
|
|
@@ -1115,6 +1120,9 @@ export class Brainy {
|
|
|
1115
1120
|
*/
|
|
1116
1121
|
async find(query) {
|
|
1117
1122
|
await this.ensureInitialized();
|
|
1123
|
+
// v5.7.7: Ensure indexes are loaded (lazy loading when disableAutoRebuild: true)
|
|
1124
|
+
// This is a production-safe, concurrency-controlled lazy load
|
|
1125
|
+
await this.ensureIndexesLoaded();
|
|
1118
1126
|
// Parse natural language queries
|
|
1119
1127
|
const params = typeof query === 'string' ? await this.parseNaturalQuery(query) : query;
|
|
1120
1128
|
// Phase 3: Automatic type inference for 40% latency reduction
|
|
@@ -2012,8 +2020,11 @@ export class Brainy {
|
|
|
2012
2020
|
this.metadataIndex = new MetadataIndexManager(this.storage);
|
|
2013
2021
|
await this.metadataIndex.init();
|
|
2014
2022
|
this.graphIndex = new GraphAdjacencyIndex(this.storage);
|
|
2015
|
-
//
|
|
2016
|
-
|
|
2023
|
+
// v5.7.7: Reset lazy loading state when switching branches
|
|
2024
|
+
// Indexes contain data from previous branch, must rebuild for new branch
|
|
2025
|
+
this.lazyRebuildCompleted = false;
|
|
2026
|
+
// Rebuild indexes from new branch data (force=true to override disableAutoRebuild)
|
|
2027
|
+
await this.rebuildIndexesIfNeeded(true);
|
|
2017
2028
|
// Re-initialize VFS for new branch
|
|
2018
2029
|
if (this._vfs) {
|
|
2019
2030
|
this._vfs = new VirtualFileSystem(this);
|
|
@@ -2742,6 +2753,32 @@ export class Brainy {
|
|
|
2742
2753
|
}
|
|
2743
2754
|
return await this._extractor.extract(text, options);
|
|
2744
2755
|
}
|
|
2756
|
+
/**
|
|
2757
|
+
* Extract entities from text (alias for extract())
|
|
2758
|
+
* v5.7.6: Added for API clarity and Workshop team request
|
|
2759
|
+
*
|
|
2760
|
+
* Uses NeuralEntityExtractor with SmartExtractor ensemble (4-signal architecture):
|
|
2761
|
+
* - ExactMatch (40%) - Dictionary lookups
|
|
2762
|
+
* - Embedding (35%) - Semantic similarity
|
|
2763
|
+
* - Pattern (20%) - Regex patterns
|
|
2764
|
+
* - Context (5%) - Contextual hints
|
|
2765
|
+
*
|
|
2766
|
+
* @param text - Text to extract entities from
|
|
2767
|
+
* @param options - Extraction options
|
|
2768
|
+
* @returns Array of extracted entities with types and confidence scores
|
|
2769
|
+
*
|
|
2770
|
+
* @example
|
|
2771
|
+
* ```typescript
|
|
2772
|
+
* const entities = await brain.extractEntities('John Smith founded Acme Corp', {
|
|
2773
|
+
* confidence: 0.7,
|
|
2774
|
+
* types: [NounType.Person, NounType.Organization],
|
|
2775
|
+
* neuralMatching: true
|
|
2776
|
+
* })
|
|
2777
|
+
* ```
|
|
2778
|
+
*/
|
|
2779
|
+
async extractEntities(text, options) {
|
|
2780
|
+
return this.extract(text, options);
|
|
2781
|
+
}
|
|
2745
2782
|
/**
|
|
2746
2783
|
* Extract concepts from text
|
|
2747
2784
|
*
|
|
@@ -3092,6 +3129,55 @@ export class Brainy {
|
|
|
3092
3129
|
const elapsed = Date.now() - startTime;
|
|
3093
3130
|
console.log(`β
All indexes flushed to disk in ${elapsed}ms`);
|
|
3094
3131
|
}
|
|
3132
|
+
/**
|
|
3133
|
+
* Get index loading status (v5.7.7 - Diagnostic for lazy loading)
|
|
3134
|
+
*
|
|
3135
|
+
* Returns detailed information about index population and lazy loading state.
|
|
3136
|
+
* Useful for debugging empty query results or performance troubleshooting.
|
|
3137
|
+
*
|
|
3138
|
+
* @example
|
|
3139
|
+
* ```typescript
|
|
3140
|
+
* const status = await brain.getIndexStatus()
|
|
3141
|
+
* console.log(`HNSW Index: ${status.hnswIndex.size} entities`)
|
|
3142
|
+
* console.log(`Metadata Index: ${status.metadataIndex.entries} entries`)
|
|
3143
|
+
* console.log(`Graph Index: ${status.graphIndex.relationships} relationships`)
|
|
3144
|
+
* console.log(`Lazy rebuild completed: ${status.lazyRebuildCompleted}`)
|
|
3145
|
+
* ```
|
|
3146
|
+
*/
|
|
3147
|
+
async getIndexStatus() {
|
|
3148
|
+
const metadataStats = await this.metadataIndex.getStats();
|
|
3149
|
+
const hnswSize = this.index.size();
|
|
3150
|
+
const graphSize = await this.graphIndex.size();
|
|
3151
|
+
// Check storage entity count
|
|
3152
|
+
let storageEntityCount = 0;
|
|
3153
|
+
try {
|
|
3154
|
+
const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
|
|
3155
|
+
storageEntityCount = entities.totalCount || 0;
|
|
3156
|
+
}
|
|
3157
|
+
catch (e) {
|
|
3158
|
+
// Ignore errors
|
|
3159
|
+
}
|
|
3160
|
+
return {
|
|
3161
|
+
initialized: this.initialized,
|
|
3162
|
+
lazyRebuildCompleted: this.lazyRebuildCompleted,
|
|
3163
|
+
disableAutoRebuild: this.config.disableAutoRebuild || false,
|
|
3164
|
+
hnswIndex: {
|
|
3165
|
+
size: hnswSize,
|
|
3166
|
+
populated: hnswSize > 0
|
|
3167
|
+
},
|
|
3168
|
+
metadataIndex: {
|
|
3169
|
+
entries: metadataStats.totalEntries,
|
|
3170
|
+
populated: metadataStats.totalEntries > 0
|
|
3171
|
+
},
|
|
3172
|
+
graphIndex: {
|
|
3173
|
+
relationships: graphSize,
|
|
3174
|
+
populated: graphSize > 0
|
|
3175
|
+
},
|
|
3176
|
+
storage: {
|
|
3177
|
+
totalEntities: storageEntityCount
|
|
3178
|
+
}
|
|
3179
|
+
};
|
|
3180
|
+
}
|
|
3095
3181
|
/**
|
|
3096
3182
|
* Efficient Pagination API - Production-scale pagination using index-first approach
|
|
3097
3183
|
* Automatically optimizes based on query type and applies pagination at the index level
|
|
@@ -3884,35 +3970,97 @@ export class Brainy {
|
|
|
3884
3970
|
};
|
|
3885
3971
|
}
|
|
3886
3972
|
/**
|
|
3887
|
-
*
|
|
3973
|
+
* Ensure indexes are loaded (v5.7.7 - Production-scale lazy loading)
|
|
3974
|
+
*
|
|
3975
|
+
* Called by query methods (find, search, get, etc.) when disableAutoRebuild is true.
|
|
3976
|
+
* Handles concurrent queries safely - multiple calls wait for same rebuild.
|
|
3977
|
+
*
|
|
3978
|
+
* Performance:
|
|
3979
|
+
* - First query: Triggers rebuild (~50-200ms for 1K-10K entities)
|
|
3980
|
+
* - Concurrent queries: Wait for same rebuild (no duplicate work)
|
|
3981
|
+
* - Subsequent queries: Instant (0ms check, indexes already loaded)
|
|
3982
|
+
*
|
|
3983
|
+
* Production scale:
|
|
3984
|
+
* - 1K entities: ~50ms
|
|
3985
|
+
* - 10K entities: ~200ms
|
|
3986
|
+
* - 100K entities: ~2s (streaming pagination)
|
|
3987
|
+
* - 1M+ entities: Uses chunked lazy loading (per-type on demand)
|
|
3888
3988
|
*/
|
|
3989
|
+
async ensureIndexesLoaded() {
|
|
3990
|
+
// Fast path: If rebuild already completed, return immediately (0ms)
|
|
3991
|
+
if (this.lazyRebuildCompleted) {
|
|
3992
|
+
return;
|
|
3993
|
+
}
|
|
3994
|
+
// If indexes already populated, mark as complete and skip
|
|
3995
|
+
if (this.index.size() > 0) {
|
|
3996
|
+
this.lazyRebuildCompleted = true;
|
|
3997
|
+
return;
|
|
3998
|
+
}
|
|
3999
|
+
// Concurrency control: If rebuild is in progress, wait for it
|
|
4000
|
+
if (this.lazyRebuildInProgress && this.lazyRebuildPromise) {
|
|
4001
|
+
await this.lazyRebuildPromise;
|
|
4002
|
+
return;
|
|
4003
|
+
}
|
|
4004
|
+
// Check if lazy rebuild is needed
|
|
4005
|
+
// Only needed if: disableAutoRebuild=true AND indexes are empty AND storage has data
|
|
4006
|
+
if (!this.config.disableAutoRebuild) {
|
|
4007
|
+
// Auto-rebuild is enabled, indexes should already be loaded
|
|
4008
|
+
return;
|
|
4009
|
+
}
|
|
4010
|
+
// Check if storage has data (fast check with limit=1)
|
|
4011
|
+
const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
|
|
4012
|
+
const hasData = (entities.totalCount && entities.totalCount > 0) || entities.items.length > 0;
|
|
4013
|
+
if (!hasData) {
|
|
4014
|
+
// Storage is empty, no rebuild needed
|
|
4015
|
+
this.lazyRebuildCompleted = true;
|
|
4016
|
+
return;
|
|
4017
|
+
}
|
|
4018
|
+
// Start lazy rebuild (with mutex to prevent concurrent rebuilds)
|
|
4019
|
+
this.lazyRebuildInProgress = true;
|
|
4020
|
+
this.lazyRebuildPromise = this.rebuildIndexesIfNeeded(true)
|
|
4021
|
+
.then(() => {
|
|
4022
|
+
this.lazyRebuildCompleted = true;
|
|
4023
|
+
})
|
|
4024
|
+
.finally(() => {
|
|
4025
|
+
this.lazyRebuildInProgress = false;
|
|
4026
|
+
this.lazyRebuildPromise = null;
|
|
4027
|
+
});
|
|
4028
|
+
await this.lazyRebuildPromise;
|
|
4029
|
+
}
|
|
3889
4030
|
/**
|
|
3890
|
-
* Rebuild indexes from persisted data if needed (v3.35.0
|
|
4031
|
+
* Rebuild indexes from persisted data if needed (v3.35.0+, v5.7.7 LAZY LOADING)
|
|
3891
4032
|
*
|
|
3892
4033
|
* FIXES FOR CRITICAL BUGS:
|
|
3893
4034
|
* - Bug #1: GraphAdjacencyIndex rebuild never called β
FIXED
|
|
3894
4035
|
* - Bug #2: Early return blocks recovery when count=0 β
FIXED
|
|
3895
4036
|
* - Bug #4: HNSW index has no rebuild mechanism β
FIXED
|
|
4037
|
+
* - Bug #5: disableAutoRebuild leaves indexes empty forever β
FIXED (v5.7.7)
|
|
3896
4038
|
*
|
|
3897
4039
|
* Production-grade rebuild with:
|
|
3898
|
-
* - Handles
|
|
4040
|
+
* - Handles BILLIONS of entities via streaming pagination
|
|
3899
4041
|
* - Smart threshold-based decisions (auto-rebuild < 1000 items)
|
|
4042
|
+
* - Lazy loading on first query (when disableAutoRebuild: true)
|
|
3900
4043
|
* - Progress reporting for large datasets
|
|
3901
4044
|
* - Parallel index rebuilds for performance
|
|
3902
4045
|
* - Robust error recovery (continues on partial failures)
|
|
4046
|
+
* - Concurrency-safe (multiple queries wait for same rebuild)
|
|
4047
|
+
*
|
|
4048
|
+
* @param force - Force rebuild even if disableAutoRebuild is true (for lazy loading)
|
|
3903
4049
|
*/
|
|
3904
|
-
async rebuildIndexesIfNeeded() {
|
|
4050
|
+
async rebuildIndexesIfNeeded(force = false) {
|
|
3905
4051
|
try {
|
|
3906
|
-
// Check if auto-rebuild is explicitly disabled
|
|
3907
|
-
|
|
4052
|
+
// v5.7.7: Check if auto-rebuild is explicitly disabled (ONLY during init, not for lazy loading)
|
|
4053
|
+
// force=true means this is a lazy rebuild triggered by first query
|
|
4054
|
+
if (this.config.disableAutoRebuild === true && !force) {
|
|
3908
4055
|
if (!this.config.silent) {
|
|
3909
4056
|
console.log('β‘ Auto-rebuild explicitly disabled via config');
|
|
4057
|
+
console.log('π‘ Indexes will build automatically on first query (lazy loading)');
|
|
3910
4058
|
}
|
|
3911
4059
|
return;
|
|
3912
4060
|
}
|
|
3913
4061
|
// OPTIMIZATION: Instant check - if index already has data, skip immediately
|
|
3914
4062
|
// This gives 0s startup for warm restarts (vs 50-100ms of async checks)
|
|
3915
|
-
if (this.index.size() > 0) {
|
|
4063
|
+
if (this.index.size() > 0 && !force) {
|
|
3916
4064
|
if (!this.config.silent) {
|
|
3917
4065
|
console.log(`β
Index already populated (${this.index.size().toLocaleString()} entities) - 0s startup!`);
|
|
3918
4066
|
}
|
|
@@ -3924,11 +4072,14 @@ export class Brainy {
|
|
|
3924
4072
|
const totalCount = entities.totalCount || 0;
|
|
3925
4073
|
// If storage is truly empty, no rebuild needed
|
|
3926
4074
|
if (totalCount === 0 && entities.items.length === 0) {
|
|
4075
|
+
if (force && !this.config.silent) {
|
|
4076
|
+
console.log('β
Storage empty - no rebuild needed');
|
|
4077
|
+
}
|
|
3927
4078
|
return;
|
|
3928
4079
|
}
|
|
3929
|
-
// Intelligent decision: Auto-rebuild
|
|
3930
|
-
//
|
|
3931
|
-
const AUTO_REBUILD_THRESHOLD =
|
|
4080
|
+
// Intelligent decision: Auto-rebuild based on dataset size
|
|
4081
|
+
// Production scale: Handles billions via streaming pagination
|
|
4082
|
+
const AUTO_REBUILD_THRESHOLD = 10000; // Auto-rebuild if < 10K items (v5.7.7: increased from 1K)
|
|
3932
4083
|
// Check if indexes need rebuilding
|
|
3933
4084
|
const metadataStats = await this.metadataIndex.getStats();
|
|
3934
4085
|
const hnswIndexSize = this.index.size();
|
|
@@ -3936,48 +4087,46 @@ export class Brainy {
|
|
|
3936
4087
|
const needsRebuild = metadataStats.totalEntries === 0 ||
|
|
3937
4088
|
hnswIndexSize === 0 ||
|
|
3938
4089
|
graphIndexSize === 0;
|
|
3939
|
-
if (!needsRebuild) {
|
|
4090
|
+
if (!needsRebuild && !force) {
|
|
3940
4091
|
// All indexes already populated, no rebuild needed
|
|
3941
4092
|
return;
|
|
3942
4093
|
}
|
|
3943
|
-
//
|
|
3944
|
-
|
|
3945
|
-
|
|
4094
|
+
// v5.7.7: Determine rebuild strategy
|
|
4095
|
+
const isLazyRebuild = force && this.config.disableAutoRebuild === true;
|
|
4096
|
+
const isSmallDataset = totalCount < AUTO_REBUILD_THRESHOLD;
|
|
4097
|
+
const shouldRebuild = isLazyRebuild || isSmallDataset || this.config.disableAutoRebuild === false;
|
|
4098
|
+
if (!shouldRebuild) {
|
|
4099
|
+
// Large dataset with auto-rebuild disabled: Wait for lazy loading
|
|
3946
4100
|
if (!this.config.silent) {
|
|
3947
|
-
console.log(
|
|
4101
|
+
console.log(`β‘ Large dataset (${totalCount.toLocaleString()} items) - using lazy loading for optimal startup`);
|
|
4102
|
+
console.log('π‘ Indexes will build automatically on first query');
|
|
3948
4103
|
}
|
|
3949
4104
|
return;
|
|
3950
4105
|
}
|
|
3951
|
-
//
|
|
3952
|
-
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
|
|
3956
|
-
|
|
3957
|
-
|
|
3958
|
-
|
|
3959
|
-
// Indexes load their data from storage (no recomputation)
|
|
3960
|
-
const rebuildStartTime = Date.now();
|
|
3961
|
-
await Promise.all([
|
|
3962
|
-
metadataStats.totalEntries === 0 ? this.metadataIndex.rebuild() : Promise.resolve(),
|
|
3963
|
-
hnswIndexSize === 0 ? this.index.rebuild() : Promise.resolve(),
|
|
3964
|
-
graphIndexSize === 0 ? this.graphIndex.rebuild() : Promise.resolve()
|
|
3965
|
-
]);
|
|
3966
|
-
const rebuildDuration = Date.now() - rebuildStartTime;
|
|
3967
|
-
if (!this.config.silent) {
|
|
3968
|
-
console.log(`β
All indexes rebuilt in ${rebuildDuration}ms:\n` +
|
|
3969
|
-
` - Metadata: ${await this.metadataIndex.getStats().then(s => s.totalEntries)} entries\n` +
|
|
3970
|
-
` - HNSW Vector: ${this.index.size()} nodes\n` +
|
|
3971
|
-
` - Graph Adjacency: ${await this.graphIndex.size()} relationships\n` +
|
|
3972
|
-
` π‘ Indexes loaded from persisted storage (no recomputation)`);
|
|
3973
|
-
}
|
|
4106
|
+
// REBUILD: Either small dataset, forced rebuild, or explicit enable
|
|
4107
|
+
const rebuildReason = isLazyRebuild
|
|
4108
|
+
? 'π Lazy loading triggered by first query'
|
|
4109
|
+
: isSmallDataset
|
|
4110
|
+
? `π Small dataset (${totalCount.toLocaleString()} items)`
|
|
4111
|
+
: 'π Auto-rebuild explicitly enabled';
|
|
4112
|
+
if (!this.config.silent) {
|
|
4113
|
+
console.log(`${rebuildReason} - rebuilding all indexes from persisted data...`);
|
|
3974
4114
|
}
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
|
|
3980
|
-
|
|
4115
|
+
// Rebuild all 3 indexes in parallel for performance
|
|
4116
|
+
// Indexes load their data from storage (no recomputation)
|
|
4117
|
+
const rebuildStartTime = Date.now();
|
|
4118
|
+
await Promise.all([
|
|
4119
|
+
metadataStats.totalEntries === 0 ? this.metadataIndex.rebuild() : Promise.resolve(),
|
|
4120
|
+
hnswIndexSize === 0 ? this.index.rebuild() : Promise.resolve(),
|
|
4121
|
+
graphIndexSize === 0 ? this.graphIndex.rebuild() : Promise.resolve()
|
|
4122
|
+
]);
|
|
4123
|
+
const rebuildDuration = Date.now() - rebuildStartTime;
|
|
4124
|
+
if (!this.config.silent) {
|
|
4125
|
+
console.log(`β
All indexes rebuilt in ${rebuildDuration}ms:\n` +
|
|
4126
|
+
` - Metadata: ${await this.metadataIndex.getStats().then(s => s.totalEntries)} entries\n` +
|
|
4127
|
+
` - HNSW Vector: ${this.index.size()} nodes\n` +
|
|
4128
|
+
` - Graph Adjacency: ${await this.graphIndex.size()} relationships\n` +
|
|
4129
|
+
` π‘ Indexes loaded from persisted storage (no recomputation)`);
|
|
3981
4130
|
}
|
|
3982
4131
|
}
|
|
3983
4132
|
catch (error) {
|
package/dist/index.d.ts
CHANGED
|
@@ -15,6 +15,12 @@ export { PresetName, ModelPrecision, StorageOption, FeatureSet, DistributedRole,
|
|
|
15
15
|
export { Cortex, cortex } from './cortex.js';
|
|
16
16
|
export { NeuralImport } from './cortex/neuralImport.js';
|
|
17
17
|
export type { NeuralAnalysisResult, DetectedEntity, DetectedRelationship, NeuralInsight, NeuralImportOptions } from './cortex/neuralImport.js';
|
|
18
|
+
export { NeuralEntityExtractor } from './neural/entityExtractor.js';
|
|
19
|
+
export { SmartExtractor } from './neural/SmartExtractor.js';
|
|
20
|
+
export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
|
|
21
|
+
export type { ExtractedEntity } from './neural/entityExtractor.js';
|
|
22
|
+
export type { ExtractionResult, SmartExtractorOptions, FormatContext } from './neural/SmartExtractor.js';
|
|
23
|
+
export type { RelationshipExtractionResult, SmartRelationshipExtractorOptions } from './neural/SmartRelationshipExtractor.js';
|
|
18
24
|
import { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance } from './utils/index.js';
|
|
19
25
|
export { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance };
|
|
20
26
|
export { getBrainyVersion } from './utils/version.js';
|
package/dist/index.js
CHANGED
|
@@ -31,6 +31,10 @@ getPreset, isValidPreset, getPresetsByCategory, getAllPresetNames, getPresetDesc
|
|
|
31
31
|
export { Cortex, cortex } from './cortex.js';
|
|
32
32
|
// Export Neural Import (AI data understanding)
|
|
33
33
|
export { NeuralImport } from './cortex/neuralImport.js';
|
|
34
|
+
// Export Neural Entity Extraction (v5.7.6 - Workshop request)
|
|
35
|
+
export { NeuralEntityExtractor } from './neural/entityExtractor.js';
|
|
36
|
+
export { SmartExtractor } from './neural/SmartExtractor.js';
|
|
37
|
+
export { SmartRelationshipExtractor } from './neural/SmartRelationshipExtractor.js';
|
|
34
38
|
// Import Manager removed - use brain.import() instead (available on all Brainy instances)
|
|
35
39
|
// Augmentation types are already exported later in the file
|
|
36
40
|
// Export distance functions for convenience
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "5.7.
|
|
3
|
+
"version": "5.7.7",
|
|
4
4
|
"description": "Universal Knowledge Protocolβ’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns Γ 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -39,6 +39,18 @@
|
|
|
39
39
|
"./universal": {
|
|
40
40
|
"import": "./dist/universal/index.js",
|
|
41
41
|
"types": "./dist/universal/index.d.ts"
|
|
42
|
+
},
|
|
43
|
+
"./neural/entityExtractor": {
|
|
44
|
+
"import": "./dist/neural/entityExtractor.js",
|
|
45
|
+
"types": "./dist/neural/entityExtractor.d.ts"
|
|
46
|
+
},
|
|
47
|
+
"./neural/SmartExtractor": {
|
|
48
|
+
"import": "./dist/neural/SmartExtractor.js",
|
|
49
|
+
"types": "./dist/neural/SmartExtractor.d.ts"
|
|
50
|
+
},
|
|
51
|
+
"./neural/SmartRelationshipExtractor": {
|
|
52
|
+
"import": "./dist/neural/SmartRelationshipExtractor.js",
|
|
53
|
+
"types": "./dist/neural/SmartRelationshipExtractor.d.ts"
|
|
42
54
|
}
|
|
43
55
|
},
|
|
44
56
|
"browser": {
|