@soulcraft/brainy 5.4.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +4 -3
- package/dist/augmentations/display/fieldPatterns.js +3 -3
- package/dist/augmentations/display/intelligentComputation.d.ts +1 -1
- package/dist/augmentations/display/intelligentComputation.js +1 -3
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +7 -9
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +1 -1
- package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
- package/dist/augmentations/universalDisplayAugmentation.js +1 -1
- package/dist/brainy.js +2 -2
- package/dist/cli/commands/types.js +2 -2
- package/dist/cortex/neuralImport.js +0 -1
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +3 -3
- package/dist/hnsw/typeAwareHNSWIndex.js +5 -5
- package/dist/importers/SmartExcelImporter.js +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/neural/embeddedKeywordEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.js +56 -56
- package/dist/neural/embeddedTypeEmbeddings.d.ts +3 -3
- package/dist/neural/embeddedTypeEmbeddings.js +14 -14
- package/dist/neural/entityExtractor.js +2 -2
- package/dist/neural/relationshipConfidence.js +1 -1
- package/dist/neural/signals/VerbContextSignal.js +6 -6
- package/dist/neural/signals/VerbExactMatchSignal.js +9 -9
- package/dist/neural/signals/VerbPatternSignal.js +5 -5
- package/dist/query/typeAwareQueryPlanner.d.ts +7 -7
- package/dist/query/typeAwareQueryPlanner.js +9 -10
- package/dist/storage/baseStorage.d.ts +48 -1
- package/dist/storage/baseStorage.js +237 -19
- package/dist/types/graphTypes.d.ts +588 -230
- package/dist/types/graphTypes.js +683 -248
- package/dist/types/typeMigration.d.ts +95 -0
- package/dist/types/typeMigration.js +141 -0
- package/dist/utils/intelligentTypeMapper.js +2 -2
- package/dist/utils/metadataIndex.js +6 -6
- package/package.json +2 -2
- package/dist/importManager.d.ts +0 -78
- package/dist/importManager.js +0 -267
- package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +0 -300
- package/dist/storage/adapters/typeAwareStorageAdapter.js +0 -1012
|
@@ -234,11 +234,11 @@ export class NeuralEntityExtractor {
|
|
|
234
234
|
}
|
|
235
235
|
// Hashtag
|
|
236
236
|
if (text.startsWith('#')) {
|
|
237
|
-
return { type: NounType.
|
|
237
|
+
return { type: NounType.Concept, confidence: 0.8 };
|
|
238
238
|
}
|
|
239
239
|
// Mention
|
|
240
240
|
if (text.startsWith('@')) {
|
|
241
|
-
return { type: NounType.
|
|
241
|
+
return { type: NounType.Person, confidence: 0.8 };
|
|
242
242
|
}
|
|
243
243
|
// Capitalized words (likely proper nouns)
|
|
244
244
|
if (/^[A-Z]/.test(text)) {
|
|
@@ -106,7 +106,7 @@ export class RelationshipConfidenceScorer {
|
|
|
106
106
|
[VerbType.Contains]: ['contains', 'includes', 'has', 'holds'],
|
|
107
107
|
[VerbType.Requires]: ['requires', 'needs', 'depends on', 'relies on'],
|
|
108
108
|
[VerbType.Uses]: ['uses', 'utilizes', 'employs', 'applies'],
|
|
109
|
-
[VerbType.
|
|
109
|
+
[VerbType.ReportsTo]: ['manages', 'oversees', 'supervises', 'controls'],
|
|
110
110
|
[VerbType.Causes]: ['influences', 'affects', 'impacts', 'shapes', 'causes'],
|
|
111
111
|
[VerbType.DependsOn]: ['depends on', 'relies on', 'based on'],
|
|
112
112
|
[VerbType.Modifies]: ['modifies', 'changes', 'alters', 'updates'],
|
|
@@ -105,7 +105,7 @@ export class VerbContextSignal {
|
|
|
105
105
|
{
|
|
106
106
|
subjectType: NounType.Document,
|
|
107
107
|
objectType: NounType.Person,
|
|
108
|
-
verbType: VerbType.
|
|
108
|
+
verbType: VerbType.Creates,
|
|
109
109
|
confidence: 0.80,
|
|
110
110
|
description: 'Document created by Person'
|
|
111
111
|
},
|
|
@@ -173,7 +173,7 @@ export class VerbContextSignal {
|
|
|
173
173
|
{
|
|
174
174
|
subjectType: NounType.Product,
|
|
175
175
|
objectType: NounType.Organization,
|
|
176
|
-
verbType: VerbType.
|
|
176
|
+
verbType: VerbType.Creates,
|
|
177
177
|
confidence: 0.75,
|
|
178
178
|
description: 'Product created by Organization'
|
|
179
179
|
},
|
|
@@ -188,7 +188,7 @@ export class VerbContextSignal {
|
|
|
188
188
|
{
|
|
189
189
|
subjectType: NounType.Product,
|
|
190
190
|
objectType: NounType.Person,
|
|
191
|
-
verbType: VerbType.
|
|
191
|
+
verbType: VerbType.Creates,
|
|
192
192
|
confidence: 0.75,
|
|
193
193
|
description: 'Product created by Person'
|
|
194
194
|
},
|
|
@@ -196,7 +196,7 @@ export class VerbContextSignal {
|
|
|
196
196
|
{
|
|
197
197
|
subjectType: NounType.Event,
|
|
198
198
|
objectType: NounType.Person,
|
|
199
|
-
verbType: VerbType.
|
|
199
|
+
verbType: VerbType.Creates,
|
|
200
200
|
confidence: 0.70,
|
|
201
201
|
description: 'Event created by Person'
|
|
202
202
|
},
|
|
@@ -220,7 +220,7 @@ export class VerbContextSignal {
|
|
|
220
220
|
{
|
|
221
221
|
subjectType: NounType.Project,
|
|
222
222
|
objectType: NounType.Organization,
|
|
223
|
-
verbType: VerbType.
|
|
223
|
+
verbType: VerbType.Owns,
|
|
224
224
|
confidence: 0.75,
|
|
225
225
|
description: 'Project belongs to Organization'
|
|
226
226
|
},
|
|
@@ -228,7 +228,7 @@ export class VerbContextSignal {
|
|
|
228
228
|
{
|
|
229
229
|
subjectType: NounType.Project,
|
|
230
230
|
objectType: NounType.Person,
|
|
231
|
-
verbType: VerbType.
|
|
231
|
+
verbType: VerbType.Creates,
|
|
232
232
|
confidence: 0.70,
|
|
233
233
|
description: 'Project created by Person'
|
|
234
234
|
},
|
|
@@ -190,14 +190,14 @@ export class VerbExactMatchSignal {
|
|
|
190
190
|
// Common relationship phrases with their VerbTypes
|
|
191
191
|
const phrases = [
|
|
192
192
|
// Creation relationships
|
|
193
|
-
{ pattern: /created?\s+by/i, type: VerbType.
|
|
194
|
-
{ pattern: /authored?\s+by/i, type: VerbType.
|
|
195
|
-
{ pattern: /written\s+by/i, type: VerbType.
|
|
196
|
-
{ pattern: /developed\s+by/i, type: VerbType.
|
|
193
|
+
{ pattern: /created?\s+by/i, type: VerbType.Creates, confidence: 0.95 },
|
|
194
|
+
{ pattern: /authored?\s+by/i, type: VerbType.Creates, confidence: 0.95 },
|
|
195
|
+
{ pattern: /written\s+by/i, type: VerbType.Creates, confidence: 0.95 },
|
|
196
|
+
{ pattern: /developed\s+by/i, type: VerbType.Creates, confidence: 0.90 },
|
|
197
197
|
{ pattern: /built\s+by/i, type: VerbType.Creates, confidence: 0.85 },
|
|
198
198
|
// Ownership relationships
|
|
199
199
|
{ pattern: /owned\s+by/i, type: VerbType.Owns, confidence: 0.95 },
|
|
200
|
-
{ pattern: /belongs\s+to/i, type: VerbType.
|
|
200
|
+
{ pattern: /belongs\s+to/i, type: VerbType.Owns, confidence: 0.95 },
|
|
201
201
|
{ pattern: /attributed\s+to/i, type: VerbType.AttributedTo, confidence: 0.95 },
|
|
202
202
|
// Part/Whole relationships
|
|
203
203
|
{ pattern: /part\s+of/i, type: VerbType.PartOf, confidence: 0.95 },
|
|
@@ -213,17 +213,17 @@ export class VerbExactMatchSignal {
|
|
|
213
213
|
{ pattern: /employed\s+by/i, type: VerbType.WorksWith, confidence: 0.90 },
|
|
214
214
|
// Reporting relationships
|
|
215
215
|
{ pattern: /reports?\s+to/i, type: VerbType.ReportsTo, confidence: 0.95 },
|
|
216
|
-
{ pattern: /manages/i, type: VerbType.
|
|
217
|
-
{ pattern: /supervises/i, type: VerbType.
|
|
216
|
+
{ pattern: /manages/i, type: VerbType.ReportsTo, confidence: 0.85 },
|
|
217
|
+
{ pattern: /supervises/i, type: VerbType.ReportsTo, confidence: 0.95 },
|
|
218
218
|
// Reference relationships
|
|
219
219
|
{ pattern: /references/i, type: VerbType.References, confidence: 0.90 },
|
|
220
220
|
{ pattern: /cites/i, type: VerbType.References, confidence: 0.90 },
|
|
221
221
|
{ pattern: /mentions/i, type: VerbType.References, confidence: 0.85 },
|
|
222
222
|
// Temporal relationships
|
|
223
223
|
{ pattern: /precedes/i, type: VerbType.Precedes, confidence: 0.90 },
|
|
224
|
-
{ pattern: /follows/i, type: VerbType.
|
|
224
|
+
{ pattern: /follows/i, type: VerbType.Precedes, confidence: 0.90 },
|
|
225
225
|
{ pattern: /before/i, type: VerbType.Precedes, confidence: 0.75 },
|
|
226
|
-
{ pattern: /after/i, type: VerbType.
|
|
226
|
+
{ pattern: /after/i, type: VerbType.Precedes, confidence: 0.75 },
|
|
227
227
|
// Causal relationships
|
|
228
228
|
{ pattern: /causes/i, type: VerbType.Causes, confidence: 0.90 },
|
|
229
229
|
{ pattern: /requires/i, type: VerbType.Requires, confidence: 0.90 },
|
|
@@ -53,7 +53,7 @@ export class VerbPatternSignal {
|
|
|
53
53
|
// ========== Creation & Authorship ==========
|
|
54
54
|
{
|
|
55
55
|
regex: /\b(?:created?|made|built|developed|designed|wrote|authored|composed)\s+(?:by|from)\b/i,
|
|
56
|
-
type: VerbType.
|
|
56
|
+
type: VerbType.Creates,
|
|
57
57
|
confidence: 0.90,
|
|
58
58
|
description: 'Creation with agent (passive)'
|
|
59
59
|
},
|
|
@@ -84,7 +84,7 @@ export class VerbPatternSignal {
|
|
|
84
84
|
},
|
|
85
85
|
{
|
|
86
86
|
regex: /\bbelongs?\s+to\b/i,
|
|
87
|
-
type: VerbType.
|
|
87
|
+
type: VerbType.Owns,
|
|
88
88
|
confidence: 0.95,
|
|
89
89
|
description: 'Belonging relationship'
|
|
90
90
|
},
|
|
@@ -141,7 +141,7 @@ export class VerbPatternSignal {
|
|
|
141
141
|
},
|
|
142
142
|
{
|
|
143
143
|
regex: /\b(?:manages?|supervises?|oversees?)\b/i,
|
|
144
|
-
type: VerbType.
|
|
144
|
+
type: VerbType.ReportsTo,
|
|
145
145
|
confidence: 0.85,
|
|
146
146
|
description: 'Management relationship'
|
|
147
147
|
},
|
|
@@ -198,7 +198,7 @@ export class VerbPatternSignal {
|
|
|
198
198
|
},
|
|
199
199
|
{
|
|
200
200
|
regex: /\b(?:succeeds?|follows?|comes?\s+after|happens?\s+after)\b/i,
|
|
201
|
-
type: VerbType.
|
|
201
|
+
type: VerbType.Precedes,
|
|
202
202
|
confidence: 0.85,
|
|
203
203
|
description: 'Temporal succession'
|
|
204
204
|
},
|
|
@@ -210,7 +210,7 @@ export class VerbPatternSignal {
|
|
|
210
210
|
},
|
|
211
211
|
{
|
|
212
212
|
regex: /\bafter\b/i,
|
|
213
|
-
type: VerbType.
|
|
213
|
+
type: VerbType.Precedes,
|
|
214
214
|
confidence: 0.70,
|
|
215
215
|
description: 'After (temporal)'
|
|
216
216
|
},
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
* TypeAwareHNSWIndex graphs.
|
|
7
7
|
*
|
|
8
8
|
* Performance Impact:
|
|
9
|
-
* - Single-type queries:
|
|
10
|
-
* - Multi-type queries:
|
|
9
|
+
* - Single-type queries: 42x speedup (search 1/42 graphs)
|
|
10
|
+
* - Multi-type queries: 8-21x speedup (search 2-5/42 graphs)
|
|
11
11
|
* - Overall: 40% latency reduction @ 1B scale
|
|
12
12
|
*
|
|
13
13
|
* Examples:
|
|
14
|
-
* - "Find engineers" → single-type → [Person] →
|
|
15
|
-
* - "People at Tesla" → multi-type → [Person, Organization] →
|
|
16
|
-
* - "Everything about AI" → all-types → [all
|
|
14
|
+
* - "Find engineers" → single-type → [Person] → 42x speedup
|
|
15
|
+
* - "People at Tesla" → multi-type → [Person, Organization] → 21x speedup
|
|
16
|
+
* - "Everything about AI" → all-types → [all 42 types] → no speedup
|
|
17
17
|
*/
|
|
18
18
|
import { NounType } from '../types/graphTypes.js';
|
|
19
19
|
import { type TypeInference } from './semanticTypeInference.js';
|
|
@@ -38,11 +38,11 @@ export interface TypeAwareQueryPlan {
|
|
|
38
38
|
*/
|
|
39
39
|
routing: QueryRoutingStrategy;
|
|
40
40
|
/**
|
|
41
|
-
* Target types to search (1-
|
|
41
|
+
* Target types to search (1-42 types)
|
|
42
42
|
*/
|
|
43
43
|
targetTypes: NounType[];
|
|
44
44
|
/**
|
|
45
|
-
* Estimated speedup factor (1.0 = no speedup,
|
|
45
|
+
* Estimated speedup factor (1.0 = no speedup, 42.0 = 42x faster)
|
|
46
46
|
*/
|
|
47
47
|
estimatedSpeedup: number;
|
|
48
48
|
/**
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
* TypeAwareHNSWIndex graphs.
|
|
7
7
|
*
|
|
8
8
|
* Performance Impact:
|
|
9
|
-
* - Single-type queries:
|
|
10
|
-
* - Multi-type queries:
|
|
9
|
+
* - Single-type queries: 42x speedup (search 1/42 graphs)
|
|
10
|
+
* - Multi-type queries: 8-21x speedup (search 2-5/42 graphs)
|
|
11
11
|
* - Overall: 40% latency reduction @ 1B scale
|
|
12
12
|
*
|
|
13
13
|
* Examples:
|
|
14
|
-
* - "Find engineers" → single-type → [Person] →
|
|
15
|
-
* - "People at Tesla" → multi-type → [Person, Organization] →
|
|
16
|
-
* - "Everything about AI" → all-types → [all
|
|
14
|
+
* - "Find engineers" → single-type → [Person] → 42x speedup
|
|
15
|
+
* - "People at Tesla" → multi-type → [Person, Organization] → 21x speedup
|
|
16
|
+
* - "Everything about AI" → all-types → [all 42 types] → no speedup
|
|
17
17
|
*/
|
|
18
18
|
import { NounType, NOUN_TYPE_COUNT } from '../types/graphTypes.js';
|
|
19
19
|
import { inferNouns } from './semanticTypeInference.js';
|
|
@@ -145,18 +145,17 @@ export class TypeAwareQueryPlanner {
|
|
|
145
145
|
NounType.Media,
|
|
146
146
|
NounType.File,
|
|
147
147
|
NounType.Message,
|
|
148
|
-
NounType.Content,
|
|
149
148
|
NounType.Collection,
|
|
150
149
|
NounType.Dataset,
|
|
151
150
|
NounType.Product,
|
|
152
151
|
NounType.Service,
|
|
153
|
-
NounType.
|
|
152
|
+
NounType.Person,
|
|
154
153
|
NounType.Task,
|
|
155
154
|
NounType.Project,
|
|
156
155
|
NounType.Process,
|
|
157
156
|
NounType.State,
|
|
158
157
|
NounType.Role,
|
|
159
|
-
NounType.
|
|
158
|
+
NounType.Concept,
|
|
160
159
|
NounType.Language,
|
|
161
160
|
NounType.Currency,
|
|
162
161
|
NounType.Measurement,
|
|
@@ -208,13 +207,13 @@ export class TypeAwareQueryPlanner {
|
|
|
208
207
|
const allPct = ((this.stats.allTypesQueries / total) * 100).toFixed(1);
|
|
209
208
|
const avgConf = (this.stats.avgConfidence * 100).toFixed(1);
|
|
210
209
|
// Calculate weighted average speedup
|
|
211
|
-
const avgSpeedup = ((this.stats.singleTypeQueries *
|
|
210
|
+
const avgSpeedup = ((this.stats.singleTypeQueries * 42.0 +
|
|
212
211
|
this.stats.multiTypeQueries * 10.0 +
|
|
213
212
|
this.stats.allTypesQueries * 1.0) /
|
|
214
213
|
total).toFixed(1);
|
|
215
214
|
return `
|
|
216
215
|
Query Statistics (${total} total):
|
|
217
|
-
- Single-type: ${this.stats.singleTypeQueries} (${singlePct}%) -
|
|
216
|
+
- Single-type: ${this.stats.singleTypeQueries} (${singlePct}%) - 42x speedup
|
|
218
217
|
- Multi-type: ${this.stats.multiTypeQueries} (${multiPct}%) - ~10x speedup
|
|
219
218
|
- All-types: ${this.stats.allTypesQueries} (${allPct}%) - 1x speedup
|
|
220
219
|
- Avg confidence: ${avgConf}%
|
|
@@ -61,6 +61,7 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
61
61
|
protected verbCountsByType: Uint32Array<ArrayBuffer>;
|
|
62
62
|
protected nounTypeCache: Map<string, NounType>;
|
|
63
63
|
protected verbTypeCache: Map<string, VerbType>;
|
|
64
|
+
private typeCountsRebuilt;
|
|
64
65
|
/**
|
|
65
66
|
* Analyze a storage key to determine its routing and path
|
|
66
67
|
* @param id - The key to analyze (UUID or system key)
|
|
@@ -224,7 +225,15 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
224
225
|
* Get nouns with pagination (v5.4.0: Type-first implementation)
|
|
225
226
|
*
|
|
226
227
|
* CRITICAL: This method is required for brain.find() to work!
|
|
227
|
-
* Iterates through
|
|
228
|
+
* Iterates through noun types with billion-scale optimizations.
|
|
229
|
+
*
|
|
230
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
231
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
232
|
+
*
|
|
233
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
234
|
+
* - Skip empty types using nounCountsByType[] tracking (O(1) check)
|
|
235
|
+
* - Early termination when offset + limit entities collected
|
|
236
|
+
* - Memory efficient: Never loads full dataset
|
|
228
237
|
*/
|
|
229
238
|
getNounsWithPagination(options: {
|
|
230
239
|
limit: number;
|
|
@@ -241,6 +250,38 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
241
250
|
hasMore: boolean;
|
|
242
251
|
nextCursor?: string;
|
|
243
252
|
}>;
|
|
253
|
+
/**
|
|
254
|
+
* Get verbs with pagination (v5.5.0: Type-first implementation with billion-scale optimizations)
|
|
255
|
+
*
|
|
256
|
+
* CRITICAL: This method is required for brain.getRelations() to work!
|
|
257
|
+
* Iterates through verb types with the same optimizations as nouns.
|
|
258
|
+
*
|
|
259
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
260
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
261
|
+
*
|
|
262
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
263
|
+
* - Skip empty types using verbCountsByType[] tracking (O(1) check)
|
|
264
|
+
* - Early termination when offset + limit verbs collected
|
|
265
|
+
* - Memory efficient: Never loads full dataset
|
|
266
|
+
* - Inline filtering for sourceId, targetId, verbType
|
|
267
|
+
*/
|
|
268
|
+
getVerbsWithPagination(options: {
|
|
269
|
+
limit: number;
|
|
270
|
+
offset: number;
|
|
271
|
+
cursor?: string;
|
|
272
|
+
filter?: {
|
|
273
|
+
verbType?: string | string[];
|
|
274
|
+
sourceId?: string | string[];
|
|
275
|
+
targetId?: string | string[];
|
|
276
|
+
service?: string | string[];
|
|
277
|
+
metadata?: Record<string, any>;
|
|
278
|
+
};
|
|
279
|
+
}): Promise<{
|
|
280
|
+
items: HNSWVerbWithMetadata[];
|
|
281
|
+
totalCount: number;
|
|
282
|
+
hasMore: boolean;
|
|
283
|
+
nextCursor?: string;
|
|
284
|
+
}>;
|
|
244
285
|
/**
|
|
245
286
|
* Get verbs with pagination and filtering
|
|
246
287
|
* @param options Pagination and filtering options
|
|
@@ -393,6 +434,12 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
393
434
|
* Periodically called when counts are updated
|
|
394
435
|
*/
|
|
395
436
|
protected saveTypeStatistics(): Promise<void>;
|
|
437
|
+
/**
|
|
438
|
+
* Rebuild type counts from actual storage (v5.5.0)
|
|
439
|
+
* Called when statistics are missing or inconsistent
|
|
440
|
+
* Ensures verbCountsByType is always accurate for reliable pagination
|
|
441
|
+
*/
|
|
442
|
+
protected rebuildTypeCounts(): Promise<void>;
|
|
396
443
|
/**
|
|
397
444
|
* Get noun type from cache or metadata
|
|
398
445
|
* Relies on nounTypeCache populated during metadata saves
|
|
@@ -77,12 +77,14 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
77
77
|
this.cowEnabled = false;
|
|
78
78
|
// Type-first indexing support (v5.4.0)
|
|
79
79
|
// Built into all storage adapters for billion-scale efficiency
|
|
80
|
-
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); //
|
|
81
|
-
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); //
|
|
82
|
-
// Total:
|
|
80
|
+
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); // 168 bytes (Stage 3: 42 types)
|
|
81
|
+
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); // 508 bytes (Stage 3: 127 types)
|
|
82
|
+
// Total: 676 bytes (99.2% reduction vs Map-based tracking)
|
|
83
83
|
// Type cache for O(1) lookups after first access
|
|
84
84
|
this.nounTypeCache = new Map();
|
|
85
85
|
this.verbTypeCache = new Map();
|
|
86
|
+
// v5.5.0: Track if type counts have been rebuilt (prevent repeated rebuilds)
|
|
87
|
+
this.typeCountsRebuilt = false;
|
|
86
88
|
}
|
|
87
89
|
/**
|
|
88
90
|
* Analyze a storage key to determine its routing and path
|
|
@@ -825,14 +827,30 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
825
827
|
* Get nouns with pagination (v5.4.0: Type-first implementation)
|
|
826
828
|
*
|
|
827
829
|
* CRITICAL: This method is required for brain.find() to work!
|
|
828
|
-
* Iterates through
|
|
830
|
+
* Iterates through noun types with billion-scale optimizations.
|
|
831
|
+
*
|
|
832
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
833
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
834
|
+
*
|
|
835
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
836
|
+
* - Skip empty types using nounCountsByType[] tracking (O(1) check)
|
|
837
|
+
* - Early termination when offset + limit entities collected
|
|
838
|
+
* - Memory efficient: Never loads full dataset
|
|
829
839
|
*/
|
|
830
840
|
async getNounsWithPagination(options) {
|
|
831
841
|
await this.ensureInitialized();
|
|
832
|
-
const { limit, offset, filter } = options;
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
842
|
+
const { limit, offset = 0, filter } = options;
|
|
843
|
+
const collectedNouns = [];
|
|
844
|
+
const targetCount = offset + limit; // Early termination target
|
|
845
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
846
|
+
const totalNounCountFromArray = this.nounCountsByType.reduce((sum, c) => sum + c, 0);
|
|
847
|
+
const useOptimization = totalNounCountFromArray > 0;
|
|
848
|
+
// v5.5.0: Iterate through noun types with billion-scale optimizations
|
|
849
|
+
for (let i = 0; i < NOUN_TYPE_COUNT && collectedNouns.length < targetCount; i++) {
|
|
850
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
851
|
+
if (useOptimization && this.nounCountsByType[i] === 0) {
|
|
852
|
+
continue;
|
|
853
|
+
}
|
|
836
854
|
const type = TypeUtils.getNounFromIndex(i);
|
|
837
855
|
// If filtering by type, skip other types
|
|
838
856
|
if (filter?.nounType) {
|
|
@@ -846,6 +864,10 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
846
864
|
// List all noun files for this type
|
|
847
865
|
const nounFiles = await this.listObjectsInBranch(typeDir);
|
|
848
866
|
for (const nounPath of nounFiles) {
|
|
867
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
868
|
+
if (collectedNouns.length >= targetCount) {
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
849
871
|
// Skip if not a .json file
|
|
850
872
|
if (!nounPath.endsWith('.json'))
|
|
851
873
|
continue;
|
|
@@ -864,7 +886,7 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
864
886
|
}
|
|
865
887
|
}
|
|
866
888
|
// Combine noun + metadata (v5.4.0: Extract standard fields to top-level)
|
|
867
|
-
|
|
889
|
+
collectedNouns.push({
|
|
868
890
|
...noun,
|
|
869
891
|
type: metadata.noun || type, // Required: Extract type from metadata
|
|
870
892
|
confidence: metadata.confidence,
|
|
@@ -892,19 +914,104 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
892
914
|
// Skip types that have no data
|
|
893
915
|
}
|
|
894
916
|
}
|
|
895
|
-
// Apply pagination
|
|
896
|
-
const
|
|
897
|
-
const
|
|
898
|
-
const hasMore = offset + limit < totalCount;
|
|
917
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
918
|
+
const paginatedNouns = collectedNouns.slice(offset, offset + limit);
|
|
919
|
+
const hasMore = collectedNouns.length >= targetCount;
|
|
899
920
|
return {
|
|
900
921
|
items: paginatedNouns,
|
|
901
|
-
totalCount,
|
|
922
|
+
totalCount: collectedNouns.length, // Accurate count of collected results
|
|
902
923
|
hasMore,
|
|
903
924
|
nextCursor: hasMore && paginatedNouns.length > 0
|
|
904
925
|
? paginatedNouns[paginatedNouns.length - 1].id
|
|
905
926
|
: undefined
|
|
906
927
|
};
|
|
907
928
|
}
|
|
929
|
+
/**
|
|
930
|
+
* Get verbs with pagination (v5.5.0: Type-first implementation with billion-scale optimizations)
|
|
931
|
+
*
|
|
932
|
+
* CRITICAL: This method is required for brain.getRelations() to work!
|
|
933
|
+
* Iterates through verb types with the same optimizations as nouns.
|
|
934
|
+
*
|
|
935
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
936
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
937
|
+
*
|
|
938
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
939
|
+
* - Skip empty types using verbCountsByType[] tracking (O(1) check)
|
|
940
|
+
* - Early termination when offset + limit verbs collected
|
|
941
|
+
* - Memory efficient: Never loads full dataset
|
|
942
|
+
* - Inline filtering for sourceId, targetId, verbType
|
|
943
|
+
*/
|
|
944
|
+
async getVerbsWithPagination(options) {
|
|
945
|
+
await this.ensureInitialized();
|
|
946
|
+
const { limit, offset = 0, filter } = options;
|
|
947
|
+
const collectedVerbs = [];
|
|
948
|
+
const targetCount = offset + limit; // Early termination target
|
|
949
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
950
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
951
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
952
|
+
// v5.5.0: Iterate through verb types with billion-scale optimizations
|
|
953
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
954
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
955
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
956
|
+
continue;
|
|
957
|
+
}
|
|
958
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
959
|
+
// If filtering by verbType, skip other types
|
|
960
|
+
if (filter?.verbType) {
|
|
961
|
+
const filterTypes = Array.isArray(filter.verbType) ? filter.verbType : [filter.verbType];
|
|
962
|
+
if (!filterTypes.includes(type)) {
|
|
963
|
+
continue;
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
try {
|
|
967
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
968
|
+
// Apply filtering inline (memory efficient)
|
|
969
|
+
for (const verb of verbsOfType) {
|
|
970
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
971
|
+
if (collectedVerbs.length >= targetCount) {
|
|
972
|
+
break;
|
|
973
|
+
}
|
|
974
|
+
// Apply filters if specified
|
|
975
|
+
if (filter) {
|
|
976
|
+
// Filter by sourceId
|
|
977
|
+
if (filter.sourceId) {
|
|
978
|
+
const sourceIds = Array.isArray(filter.sourceId)
|
|
979
|
+
? filter.sourceId
|
|
980
|
+
: [filter.sourceId];
|
|
981
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
982
|
+
continue;
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
// Filter by targetId
|
|
986
|
+
if (filter.targetId) {
|
|
987
|
+
const targetIds = Array.isArray(filter.targetId)
|
|
988
|
+
? filter.targetId
|
|
989
|
+
: [filter.targetId];
|
|
990
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
991
|
+
continue;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
// Verb passed all filters - add to collection
|
|
996
|
+
collectedVerbs.push(verb);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
catch (error) {
|
|
1000
|
+
// Skip types that have no data (directory may not exist)
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
1004
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1005
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1006
|
+
return {
|
|
1007
|
+
items: paginatedVerbs,
|
|
1008
|
+
totalCount: collectedVerbs.length, // Accurate count of collected results
|
|
1009
|
+
hasMore,
|
|
1010
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1011
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1012
|
+
: undefined
|
|
1013
|
+
};
|
|
1014
|
+
}
|
|
908
1015
|
/**
|
|
909
1016
|
* Get verbs with pagination and filtering
|
|
910
1017
|
* @param options Pagination and filtering options
|
|
@@ -1080,12 +1187,84 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1080
1187
|
nextCursor: result.nextCursor
|
|
1081
1188
|
};
|
|
1082
1189
|
}
|
|
1083
|
-
//
|
|
1084
|
-
|
|
1190
|
+
// UNIVERSAL FALLBACK: Iterate through verb types with early termination (billion-scale safe)
|
|
1191
|
+
// This approach works for ALL storage adapters without requiring adapter-specific pagination
|
|
1192
|
+
console.warn('Using universal type-iteration strategy for getVerbs(). ' +
|
|
1193
|
+
'This works for all adapters but may be slower than native pagination. ' +
|
|
1194
|
+
'For optimal performance at scale, storage adapters can implement getVerbsWithPagination().');
|
|
1195
|
+
const collectedVerbs = [];
|
|
1196
|
+
let totalScanned = 0;
|
|
1197
|
+
const targetCount = offset + limit; // We need this many verbs total (including offset)
|
|
1198
|
+
// v5.5.0 BUG FIX: Check if optimization should be used
|
|
1199
|
+
// Only use type-skipping optimization if counts are non-zero (reliable)
|
|
1200
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
1201
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
1202
|
+
// Iterate through all 127 verb types (Stage 3 CANONICAL) with early termination
|
|
1203
|
+
// OPTIMIZATION: Skip types with zero count (only if counts are reliable)
|
|
1204
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
1205
|
+
// Skip empty types for performance (but only if optimization is enabled)
|
|
1206
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
1207
|
+
continue;
|
|
1208
|
+
}
|
|
1209
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1210
|
+
try {
|
|
1211
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
1212
|
+
// Apply filtering inline (memory efficient)
|
|
1213
|
+
for (const verb of verbsOfType) {
|
|
1214
|
+
// Apply filters if specified
|
|
1215
|
+
if (options?.filter) {
|
|
1216
|
+
// Filter by sourceId
|
|
1217
|
+
if (options.filter.sourceId) {
|
|
1218
|
+
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
1219
|
+
? options.filter.sourceId
|
|
1220
|
+
: [options.filter.sourceId];
|
|
1221
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
1222
|
+
continue;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
// Filter by targetId
|
|
1226
|
+
if (options.filter.targetId) {
|
|
1227
|
+
const targetIds = Array.isArray(options.filter.targetId)
|
|
1228
|
+
? options.filter.targetId
|
|
1229
|
+
: [options.filter.targetId];
|
|
1230
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
1231
|
+
continue;
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
// Filter by verbType
|
|
1235
|
+
if (options.filter.verbType) {
|
|
1236
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1237
|
+
? options.filter.verbType
|
|
1238
|
+
: [options.filter.verbType];
|
|
1239
|
+
if (!verbTypes.includes(verb.verb)) {
|
|
1240
|
+
continue;
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
// Verb passed filters - add to collection
|
|
1245
|
+
collectedVerbs.push(verb);
|
|
1246
|
+
// Early termination: stop when we have enough for offset + limit
|
|
1247
|
+
if (collectedVerbs.length >= targetCount) {
|
|
1248
|
+
break;
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
totalScanned += verbsOfType.length;
|
|
1252
|
+
}
|
|
1253
|
+
catch (error) {
|
|
1254
|
+
// Ignore errors for types with no verbs (directory may not exist)
|
|
1255
|
+
// This is expected for types that haven't been used yet
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
// Apply pagination (slice for offset)
|
|
1259
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1260
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1085
1261
|
return {
|
|
1086
|
-
items:
|
|
1087
|
-
totalCount:
|
|
1088
|
-
hasMore
|
|
1262
|
+
items: paginatedVerbs,
|
|
1263
|
+
totalCount: collectedVerbs.length, // Accurate count of filtered results
|
|
1264
|
+
hasMore,
|
|
1265
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1266
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1267
|
+
: undefined
|
|
1089
1268
|
};
|
|
1090
1269
|
}
|
|
1091
1270
|
catch (error) {
|
|
@@ -1402,6 +1581,45 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1402
1581
|
};
|
|
1403
1582
|
await this.writeObjectToPath(`${SYSTEM_DIR}/type-statistics.json`, stats);
|
|
1404
1583
|
}
|
|
1584
|
+
/**
|
|
1585
|
+
* Rebuild type counts from actual storage (v5.5.0)
|
|
1586
|
+
* Called when statistics are missing or inconsistent
|
|
1587
|
+
* Ensures verbCountsByType is always accurate for reliable pagination
|
|
1588
|
+
*/
|
|
1589
|
+
async rebuildTypeCounts() {
|
|
1590
|
+
console.log('[BaseStorage] Rebuilding type counts from storage...');
|
|
1591
|
+
// Rebuild verb counts by checking each type directory
|
|
1592
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
1593
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1594
|
+
const prefix = `entities/verbs/${type}/vectors/`;
|
|
1595
|
+
try {
|
|
1596
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1597
|
+
this.verbCountsByType[i] = paths.length;
|
|
1598
|
+
}
|
|
1599
|
+
catch (error) {
|
|
1600
|
+
// Type directory doesn't exist - count is 0
|
|
1601
|
+
this.verbCountsByType[i] = 0;
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
// Rebuild noun counts similarly
|
|
1605
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
1606
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
1607
|
+
const prefix = `entities/nouns/${type}/vectors/`;
|
|
1608
|
+
try {
|
|
1609
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1610
|
+
this.nounCountsByType[i] = paths.length;
|
|
1611
|
+
}
|
|
1612
|
+
catch (error) {
|
|
1613
|
+
// Type directory doesn't exist - count is 0
|
|
1614
|
+
this.nounCountsByType[i] = 0;
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
// Save rebuilt counts to storage
|
|
1618
|
+
await this.saveTypeStatistics();
|
|
1619
|
+
const totalVerbs = this.verbCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1620
|
+
const totalNouns = this.nounCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1621
|
+
console.log(`[BaseStorage] Rebuilt counts: ${totalNouns} nouns, ${totalVerbs} verbs`);
|
|
1622
|
+
}
|
|
1405
1623
|
/**
|
|
1406
1624
|
* Get noun type from cache or metadata
|
|
1407
1625
|
* Relies on nounTypeCache populated during metadata saves
|