@soulcraft/brainy 5.5.0 โ 5.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/augmentations/display/fieldPatterns.js +3 -3
- package/dist/augmentations/display/intelligentComputation.d.ts +1 -1
- package/dist/augmentations/display/intelligentComputation.js +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +1 -1
- package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
- package/dist/augmentations/universalDisplayAugmentation.js +1 -1
- package/dist/brainy.js +1 -1
- package/dist/cli/commands/types.js +2 -2
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +3 -3
- package/dist/hnsw/typeAwareHNSWIndex.js +5 -5
- package/dist/importers/SmartExcelImporter.js +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.js +1 -1
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/query/typeAwareQueryPlanner.d.ts +7 -7
- package/dist/query/typeAwareQueryPlanner.js +7 -7
- package/dist/storage/adapters/azureBlobStorage.js +9 -0
- package/dist/storage/adapters/fileSystemStorage.js +17 -0
- package/dist/storage/adapters/gcsStorage.js +11 -0
- package/dist/storage/adapters/opfsStorage.js +22 -0
- package/dist/storage/adapters/r2Storage.js +11 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +13 -0
- package/dist/storage/baseStorage.d.ts +48 -1
- package/dist/storage/baseStorage.js +242 -19
- package/package.json +1 -1
- package/dist/importManager.d.ts +0 -78
- package/dist/importManager.js +0 -267
- package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +0 -300
- package/dist/storage/adapters/typeAwareStorageAdapter.js +0 -1012
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,38 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [5.6.1](https://github.com/soulcraftlabs/brainy/compare/v5.6.0...v5.6.1) (2025-11-11)
|
|
6
|
+
|
|
7
|
+
### ๐ Bug Fixes
|
|
8
|
+
|
|
9
|
+
* **storage**: Fix `clear()` not deleting COW version control data ([#workshop-bug-report](https://github.com/soulcraftlabs/brainy/issues))
|
|
10
|
+
- Fixed all storage adapters to properly delete `_cow/` directory on clear()
|
|
11
|
+
- Fixed in-memory entity counters not being reset after clear()
|
|
12
|
+
- Prevents COW reinitialization after clear() by setting `cowEnabled = false`
|
|
13
|
+
- **Impact**: Resolves storage persistence bug (103MB โ 0 bytes after clear)
|
|
14
|
+
- **Affected adapters**: FileSystemStorage, OPFSStorage, S3CompatibleStorage (GCSStorage, R2Storage, AzureBlobStorage already correct)
|
|
15
|
+
|
|
16
|
+
### ๐ Technical Details
|
|
17
|
+
|
|
18
|
+
* **Root causes identified**:
|
|
19
|
+
1. `_cow/` directory contents deleted but directory not removed
|
|
20
|
+
2. In-memory counters (`totalNounCount`, `totalVerbCount`) not reset
|
|
21
|
+
3. COW could auto-reinitialize on next operation
|
|
22
|
+
* **Fixes applied**:
|
|
23
|
+
- FileSystemStorage: Use `fs.rm()` to delete entire `_cow/` directory
|
|
24
|
+
- OPFSStorage: Use `removeEntry('_cow', {recursive: true})`
|
|
25
|
+
- Cloud adapters: Already use `deleteObjectsWithPrefix('_cow/')`
|
|
26
|
+
- All adapters: Reset `totalNounCount = 0` and `totalVerbCount = 0`
|
|
27
|
+
- BaseStorage: Added guard in `initializeCOW()` to prevent reinitialization when `cowEnabled === false`
|
|
28
|
+
|
|
29
|
+
## [5.6.0](https://github.com/soulcraftlabs/brainy/compare/v5.5.0...v5.6.0) (2025-11-11)
|
|
30
|
+
|
|
31
|
+
### ๐ Bug Fixes
|
|
32
|
+
|
|
33
|
+
* **relations**: Fix `getRelations()` returning empty array for fresh instances
|
|
34
|
+
- Resolved initialization race condition in relationship loading
|
|
35
|
+
- Fresh Brain instances now correctly load persisted relationships
|
|
36
|
+
|
|
5
37
|
## [5.5.0](https://github.com/soulcraftlabs/brainy/compare/v5.4.0...v5.5.0) (2025-11-06)
|
|
6
38
|
|
|
7
39
|
### ๐ฏ Stage 3 CANONICAL Taxonomy - Complete Coverage
|
|
@@ -20,7 +20,7 @@ export const UNIVERSAL_FIELD_PATTERNS = [
|
|
|
20
20
|
fields: ['firstName', 'lastName', 'fullName', 'realName'],
|
|
21
21
|
displayField: 'title',
|
|
22
22
|
confidence: 0.9,
|
|
23
|
-
applicableTypes: [NounType.Person
|
|
23
|
+
applicableTypes: [NounType.Person],
|
|
24
24
|
transform: (value, context) => {
|
|
25
25
|
const { metadata } = context;
|
|
26
26
|
if (metadata.firstName && metadata.lastName) {
|
|
@@ -68,7 +68,7 @@ export const UNIVERSAL_FIELD_PATTERNS = [
|
|
|
68
68
|
fields: ['bio', 'biography', 'profile', 'about'],
|
|
69
69
|
displayField: 'description',
|
|
70
70
|
confidence: 0.85,
|
|
71
|
-
applicableTypes: [NounType.Person
|
|
71
|
+
applicableTypes: [NounType.Person]
|
|
72
72
|
},
|
|
73
73
|
{
|
|
74
74
|
fields: ['content', 'text', 'body', 'message'],
|
|
@@ -100,7 +100,7 @@ export const UNIVERSAL_FIELD_PATTERNS = [
|
|
|
100
100
|
fields: ['role', 'position', 'jobTitle', 'occupation'],
|
|
101
101
|
displayField: 'type',
|
|
102
102
|
confidence: 0.8,
|
|
103
|
-
applicableTypes: [NounType.Person
|
|
103
|
+
applicableTypes: [NounType.Person],
|
|
104
104
|
transform: (value) => String(value || 'Person')
|
|
105
105
|
},
|
|
106
106
|
{
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* - BrainyTypes for semantic type detection
|
|
6
6
|
* - Neural Import patterns for field analysis
|
|
7
7
|
* - JSON processing utilities for field extraction
|
|
8
|
-
* - Existing NounType/VerbType taxonomy (
|
|
8
|
+
* - Existing NounType/VerbType taxonomy (42+127 types)
|
|
9
9
|
*/
|
|
10
10
|
import type { ComputedDisplayFields, DisplayConfig } from './types.js';
|
|
11
11
|
import type { GraphVerb } from '../../coreTypes.js';
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* - BrainyTypes for semantic type detection
|
|
6
6
|
* - Neural Import patterns for field analysis
|
|
7
7
|
* - JSON processing utilities for field extraction
|
|
8
|
-
* - Existing NounType/VerbType taxonomy (
|
|
8
|
+
* - Existing NounType/VerbType taxonomy (42+127 types)
|
|
9
9
|
*/
|
|
10
10
|
import { getBrainyTypes } from '../typeMatching/brainyTypes.js';
|
|
11
11
|
import { getFieldPatterns, getPriorityFields } from './fieldPatterns.js';
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* BrainyTypes - Intelligent type detection using semantic embeddings
|
|
3
3
|
*
|
|
4
4
|
* This module uses our existing TransformerEmbedding and similarity functions
|
|
5
|
-
* to intelligently match data to our
|
|
5
|
+
* to intelligently match data to our 42 noun types and 127 verb types.
|
|
6
6
|
*
|
|
7
7
|
* Features:
|
|
8
8
|
* - Semantic similarity matching using embeddings
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* BrainyTypes - Intelligent type detection using semantic embeddings
|
|
3
3
|
*
|
|
4
4
|
* This module uses our existing TransformerEmbedding and similarity functions
|
|
5
|
-
* to intelligently match data to our
|
|
5
|
+
* to intelligently match data to our 42 noun types and 127 verb types.
|
|
6
6
|
*
|
|
7
7
|
* Features:
|
|
8
8
|
* - Semantic similarity matching using embeddings
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* IntelligentTypeMatcher - Wrapper around BrainyTypes for testing
|
|
3
3
|
*
|
|
4
4
|
* Provides intelligent type detection using semantic embeddings
|
|
5
|
-
* for matching data to our
|
|
5
|
+
* for matching data to our 42 noun types and 127 verb types.
|
|
6
6
|
*/
|
|
7
7
|
import { NounType, VerbType } from '../../types/graphTypes.js';
|
|
8
8
|
export interface TypeMatchOptions {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* IntelligentTypeMatcher - Wrapper around BrainyTypes for testing
|
|
3
3
|
*
|
|
4
4
|
* Provides intelligent type detection using semantic embeddings
|
|
5
|
-
* for matching data to our
|
|
5
|
+
* for matching data to our 42 noun types and 127 verb types.
|
|
6
6
|
*/
|
|
7
7
|
import { VerbType } from '../../types/graphTypes.js';
|
|
8
8
|
import { getBrainyTypes } from './brainyTypes.js';
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Features:
|
|
7
7
|
* - โ
Leverages existing BrainyTypes for semantic type detection
|
|
8
|
-
* - โ
Complete icon coverage for all
|
|
8
|
+
* - โ
Complete icon coverage for all 42 NounTypes + 127 VerbTypes
|
|
9
9
|
* - โ
Zero performance impact with lazy computation and intelligent caching
|
|
10
10
|
* - โ
Perfect isolation - can be disabled, replaced, or configured
|
|
11
11
|
* - โ
Clean developer experience with zero conflicts
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Features:
|
|
7
7
|
* - โ
Leverages existing BrainyTypes for semantic type detection
|
|
8
|
-
* - โ
Complete icon coverage for all
|
|
8
|
+
* - โ
Complete icon coverage for all 42 NounTypes + 127 VerbTypes
|
|
9
9
|
* - โ
Zero performance impact with lazy computation and intelligent caching
|
|
10
10
|
* - โ
Perfect isolation - can be disabled, replaced, or configured
|
|
11
11
|
* - โ
Clean developer experience with zero conflicts
|
package/dist/brainy.js
CHANGED
|
@@ -3282,7 +3282,7 @@ export class Brainy {
|
|
|
3282
3282
|
return Object.fromEntries(this.metadataIndex.getAllEntityCounts());
|
|
3283
3283
|
},
|
|
3284
3284
|
// Phase 1b: O(1) count by type enum (Uint32Array-based, more efficient)
|
|
3285
|
-
// Uses fixed-size type tracking:
|
|
3285
|
+
// Uses fixed-size type tracking: 676 bytes vs ~35KB with Maps (98.1% reduction)
|
|
3286
3286
|
byTypeEnum: (type) => {
|
|
3287
3287
|
return this.metadataIndex.getEntityCountByTypeEnum(type);
|
|
3288
3288
|
},
|
|
@@ -26,7 +26,7 @@ export async function types(options) {
|
|
|
26
26
|
}
|
|
27
27
|
// Display nouns
|
|
28
28
|
if (showNouns) {
|
|
29
|
-
console.log(chalk.bold.cyan('\n๐ Noun Types (
|
|
29
|
+
console.log(chalk.bold.cyan('\n๐ Noun Types (42):\n'));
|
|
30
30
|
const nounChunks = [];
|
|
31
31
|
for (let i = 0; i < BrainyTypes.nouns.length; i += 3) {
|
|
32
32
|
nounChunks.push(BrainyTypes.nouns.slice(i, i + 3));
|
|
@@ -37,7 +37,7 @@ export async function types(options) {
|
|
|
37
37
|
}
|
|
38
38
|
// Display verbs
|
|
39
39
|
if (showVerbs) {
|
|
40
|
-
console.log(chalk.bold.cyan('\n๐ Verb Types (
|
|
40
|
+
console.log(chalk.bold.cyan('\n๐ Verb Types (127):\n'));
|
|
41
41
|
const verbChunks = [];
|
|
42
42
|
for (let i = 0; i < BrainyTypes.verbs.length; i += 3) {
|
|
43
43
|
verbChunks.push(BrainyTypes.verbs.slice(i, i + 3));
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* - Storage: Already type-first from Phase 1a
|
|
8
8
|
*
|
|
9
9
|
* Architecture:
|
|
10
|
-
* - One HNSWIndex per NounType (
|
|
10
|
+
* - One HNSWIndex per NounType (42 total)
|
|
11
11
|
* - Lazy initialization (indexes created on first use)
|
|
12
12
|
* - Type routing for optimal performance
|
|
13
13
|
* - Falls back to multi-type search when type unknown
|
|
@@ -101,7 +101,7 @@ export declare class TypeAwareHNSWIndex {
|
|
|
101
101
|
* **All-types search** (fallback):
|
|
102
102
|
* ```typescript
|
|
103
103
|
* await index.search(queryVector, 10)
|
|
104
|
-
* // Searches all
|
|
104
|
+
* // Searches all 42 graphs (slower but comprehensive)
|
|
105
105
|
* ```
|
|
106
106
|
*
|
|
107
107
|
* @param queryVector Query vector
|
|
@@ -192,7 +192,7 @@ export declare class TypeAwareHNSWIndex {
|
|
|
192
192
|
* Rebuild HNSW indexes from storage (type-aware)
|
|
193
193
|
*
|
|
194
194
|
* CRITICAL: This implementation uses type-filtered pagination to avoid
|
|
195
|
-
* loading ALL entities for each type (which would be
|
|
195
|
+
* loading ALL entities for each type (which would be 42 billion reads @ 1B scale).
|
|
196
196
|
*
|
|
197
197
|
* Can rebuild all types or specific types.
|
|
198
198
|
* Much faster than rebuilding a monolithic index.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* - Storage: Already type-first from Phase 1a
|
|
8
8
|
*
|
|
9
9
|
* Architecture:
|
|
10
|
-
* - One HNSWIndex per NounType (
|
|
10
|
+
* - One HNSWIndex per NounType (42 total)
|
|
11
11
|
* - Lazy initialization (indexes created on first use)
|
|
12
12
|
* - Type routing for optimal performance
|
|
13
13
|
* - Falls back to multi-type search when type unknown
|
|
@@ -84,7 +84,7 @@ export class TypeAwareHNSWIndex {
|
|
|
84
84
|
// Validate type is a valid NounType
|
|
85
85
|
const typeIndex = TypeUtils.getNounIndex(type);
|
|
86
86
|
if (typeIndex === undefined || typeIndex === null || typeIndex < 0) {
|
|
87
|
-
throw new Error(`Invalid NounType: ${type}. Must be one of the
|
|
87
|
+
throw new Error(`Invalid NounType: ${type}. Must be one of the 42 defined types.`);
|
|
88
88
|
}
|
|
89
89
|
if (!this.indexes.has(type)) {
|
|
90
90
|
prodLog.info(`Creating HNSW index for type: ${type}`);
|
|
@@ -137,7 +137,7 @@ export class TypeAwareHNSWIndex {
|
|
|
137
137
|
* **All-types search** (fallback):
|
|
138
138
|
* ```typescript
|
|
139
139
|
* await index.search(queryVector, 10)
|
|
140
|
-
* // Searches all
|
|
140
|
+
* // Searches all 42 graphs (slower but comprehensive)
|
|
141
141
|
* ```
|
|
142
142
|
*
|
|
143
143
|
* @param queryVector Query vector
|
|
@@ -302,7 +302,7 @@ export class TypeAwareHNSWIndex {
|
|
|
302
302
|
* Rebuild HNSW indexes from storage (type-aware)
|
|
303
303
|
*
|
|
304
304
|
* CRITICAL: This implementation uses type-filtered pagination to avoid
|
|
305
|
-
* loading ALL entities for each type (which would be
|
|
305
|
+
* loading ALL entities for each type (which would be 42 billion reads @ 1B scale).
|
|
306
306
|
*
|
|
307
307
|
* Can rebuild all types or specific types.
|
|
308
308
|
* Much faster than rebuilding a monolithic index.
|
|
@@ -341,7 +341,7 @@ export class TypeAwareHNSWIndex {
|
|
|
341
341
|
`(${(vectorMemory / 1024 / 1024).toFixed(1)}MB > ${(availableCache / 1024 / 1024).toFixed(1)}MB cache) - loading on-demand`);
|
|
342
342
|
}
|
|
343
343
|
// Load ALL nouns ONCE and route to correct type indexes
|
|
344
|
-
// This is O(N) instead of O(
|
|
344
|
+
// This is O(N) instead of O(42*N) from the previous parallel approach
|
|
345
345
|
let cursor = undefined;
|
|
346
346
|
let hasMore = true;
|
|
347
347
|
let totalLoaded = 0;
|
|
@@ -41,7 +41,7 @@ export class SmartExcelImporter {
|
|
|
41
41
|
enableRelationshipInference: true,
|
|
42
42
|
// CONCEPT EXTRACTION PRODUCTION-READY (v3.33.0+):
|
|
43
43
|
// Type embeddings are now pre-computed at build time - zero runtime cost!
|
|
44
|
-
// All
|
|
44
|
+
// All 42 noun types + 127 verb types instantly available
|
|
45
45
|
//
|
|
46
46
|
// Performance profile:
|
|
47
47
|
// - Type embeddings: INSTANT (pre-computed at build time, ~100KB in-memory)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Pre-computed Keyword Embeddings for Unified Semantic Type Inference
|
|
3
3
|
*
|
|
4
4
|
* Generated by: scripts/buildKeywordEmbeddings.ts
|
|
5
|
-
* Generated on: 2025-11-
|
|
5
|
+
* Generated on: 2025-11-06T17:59:17.355Z
|
|
6
6
|
* Total keywords: 1050 (716 nouns + 334 verbs)
|
|
7
7
|
* Canonical: 919, Synonyms: 131
|
|
8
8
|
* Embedding dimension: 384
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Pre-computed Keyword Embeddings for Unified Semantic Type Inference
|
|
3
3
|
*
|
|
4
4
|
* Generated by: scripts/buildKeywordEmbeddings.ts
|
|
5
|
-
* Generated on: 2025-11-
|
|
5
|
+
* Generated on: 2025-11-06T17:59:17.355Z
|
|
6
6
|
* Total keywords: 1050 (716 nouns + 334 verbs)
|
|
7
7
|
* Canonical: 919, Synonyms: 131
|
|
8
8
|
* Embedding dimension: 384
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* ๐ง BRAINY EMBEDDED TYPE EMBEDDINGS
|
|
3
3
|
*
|
|
4
4
|
* AUTO-GENERATED - DO NOT EDIT
|
|
5
|
-
* Generated: 2025-11-
|
|
5
|
+
* Generated: 2025-11-06T17:38:22.619Z
|
|
6
6
|
* Noun Types: 42
|
|
7
7
|
* Verb Types: 127
|
|
8
8
|
*
|
|
@@ -15,7 +15,7 @@ export const TYPE_METADATA = {
|
|
|
15
15
|
verbTypes: 127,
|
|
16
16
|
totalTypes: 169,
|
|
17
17
|
embeddingDimensions: 384,
|
|
18
|
-
generatedAt: "2025-11-
|
|
18
|
+
generatedAt: "2025-11-06T17:38:22.619Z",
|
|
19
19
|
sizeBytes: {
|
|
20
20
|
embeddings: 259584,
|
|
21
21
|
base64: 346112
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
* TypeAwareHNSWIndex graphs.
|
|
7
7
|
*
|
|
8
8
|
* Performance Impact:
|
|
9
|
-
* - Single-type queries:
|
|
10
|
-
* - Multi-type queries:
|
|
9
|
+
* - Single-type queries: 42x speedup (search 1/42 graphs)
|
|
10
|
+
* - Multi-type queries: 8-21x speedup (search 2-5/42 graphs)
|
|
11
11
|
* - Overall: 40% latency reduction @ 1B scale
|
|
12
12
|
*
|
|
13
13
|
* Examples:
|
|
14
|
-
* - "Find engineers" โ single-type โ [Person] โ
|
|
15
|
-
* - "People at Tesla" โ multi-type โ [Person, Organization] โ
|
|
16
|
-
* - "Everything about AI" โ all-types โ [all
|
|
14
|
+
* - "Find engineers" โ single-type โ [Person] โ 42x speedup
|
|
15
|
+
* - "People at Tesla" โ multi-type โ [Person, Organization] โ 21x speedup
|
|
16
|
+
* - "Everything about AI" โ all-types โ [all 42 types] โ no speedup
|
|
17
17
|
*/
|
|
18
18
|
import { NounType } from '../types/graphTypes.js';
|
|
19
19
|
import { type TypeInference } from './semanticTypeInference.js';
|
|
@@ -38,11 +38,11 @@ export interface TypeAwareQueryPlan {
|
|
|
38
38
|
*/
|
|
39
39
|
routing: QueryRoutingStrategy;
|
|
40
40
|
/**
|
|
41
|
-
* Target types to search (1-
|
|
41
|
+
* Target types to search (1-42 types)
|
|
42
42
|
*/
|
|
43
43
|
targetTypes: NounType[];
|
|
44
44
|
/**
|
|
45
|
-
* Estimated speedup factor (1.0 = no speedup,
|
|
45
|
+
* Estimated speedup factor (1.0 = no speedup, 42.0 = 42x faster)
|
|
46
46
|
*/
|
|
47
47
|
estimatedSpeedup: number;
|
|
48
48
|
/**
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
* TypeAwareHNSWIndex graphs.
|
|
7
7
|
*
|
|
8
8
|
* Performance Impact:
|
|
9
|
-
* - Single-type queries:
|
|
10
|
-
* - Multi-type queries:
|
|
9
|
+
* - Single-type queries: 42x speedup (search 1/42 graphs)
|
|
10
|
+
* - Multi-type queries: 8-21x speedup (search 2-5/42 graphs)
|
|
11
11
|
* - Overall: 40% latency reduction @ 1B scale
|
|
12
12
|
*
|
|
13
13
|
* Examples:
|
|
14
|
-
* - "Find engineers" โ single-type โ [Person] โ
|
|
15
|
-
* - "People at Tesla" โ multi-type โ [Person, Organization] โ
|
|
16
|
-
* - "Everything about AI" โ all-types โ [all
|
|
14
|
+
* - "Find engineers" โ single-type โ [Person] โ 42x speedup
|
|
15
|
+
* - "People at Tesla" โ multi-type โ [Person, Organization] โ 21x speedup
|
|
16
|
+
* - "Everything about AI" โ all-types โ [all 42 types] โ no speedup
|
|
17
17
|
*/
|
|
18
18
|
import { NounType, NOUN_TYPE_COUNT } from '../types/graphTypes.js';
|
|
19
19
|
import { inferNouns } from './semanticTypeInference.js';
|
|
@@ -207,13 +207,13 @@ export class TypeAwareQueryPlanner {
|
|
|
207
207
|
const allPct = ((this.stats.allTypesQueries / total) * 100).toFixed(1);
|
|
208
208
|
const avgConf = (this.stats.avgConfidence * 100).toFixed(1);
|
|
209
209
|
// Calculate weighted average speedup
|
|
210
|
-
const avgSpeedup = ((this.stats.singleTypeQueries *
|
|
210
|
+
const avgSpeedup = ((this.stats.singleTypeQueries * 42.0 +
|
|
211
211
|
this.stats.multiTypeQueries * 10.0 +
|
|
212
212
|
this.stats.allTypesQueries * 1.0) /
|
|
213
213
|
total).toFixed(1);
|
|
214
214
|
return `
|
|
215
215
|
Query Statistics (${total} total):
|
|
216
|
-
- Single-type: ${this.stats.singleTypeQueries} (${singlePct}%) -
|
|
216
|
+
- Single-type: ${this.stats.singleTypeQueries} (${singlePct}%) - 42x speedup
|
|
217
217
|
- Multi-type: ${this.stats.multiTypeQueries} (${multiPct}%) - ~10x speedup
|
|
218
218
|
- All-types: ${this.stats.allTypesQueries} (${allPct}%) - 1x speedup
|
|
219
219
|
- Avg confidence: ${avgConf}%
|
|
@@ -851,12 +851,21 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
851
851
|
try {
|
|
852
852
|
this.logger.info('๐งน Clearing all data from Azure container...');
|
|
853
853
|
// Delete all blobs in container
|
|
854
|
+
// v5.6.1: listBlobsFlat() returns ALL blobs including _cow/ prefix
|
|
855
|
+
// This correctly deletes COW version control data (commits, trees, blobs, refs)
|
|
854
856
|
for await (const blob of this.containerClient.listBlobsFlat()) {
|
|
855
857
|
if (blob.name) {
|
|
856
858
|
const blockBlobClient = this.containerClient.getBlockBlobClient(blob.name);
|
|
857
859
|
await blockBlobClient.delete();
|
|
858
860
|
}
|
|
859
861
|
}
|
|
862
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
863
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
864
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
865
|
+
this.refManager = undefined;
|
|
866
|
+
this.blobStorage = undefined;
|
|
867
|
+
this.commitLog = undefined;
|
|
868
|
+
this.cowEnabled = false;
|
|
860
869
|
// Clear caches
|
|
861
870
|
this.nounCacheManager.clear();
|
|
862
871
|
this.verbCacheManager.clear();
|
|
@@ -874,9 +874,26 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
874
874
|
if (await this.directoryExists(this.indexDir)) {
|
|
875
875
|
await removeDirectoryContents(this.indexDir);
|
|
876
876
|
}
|
|
877
|
+
// v5.6.1: Remove COW (copy-on-write) version control data
|
|
878
|
+
// This directory stores all git-like versioning data (commits, trees, blobs, refs)
|
|
879
|
+
// Must be deleted to fully clear all data including version history
|
|
880
|
+
const cowDir = path.join(this.rootDir, '_cow');
|
|
881
|
+
if (await this.directoryExists(cowDir)) {
|
|
882
|
+
// Delete the entire _cow/ directory (not just contents)
|
|
883
|
+
await fs.promises.rm(cowDir, { recursive: true, force: true });
|
|
884
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
885
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
886
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
887
|
+
this.refManager = undefined;
|
|
888
|
+
this.blobStorage = undefined;
|
|
889
|
+
this.commitLog = undefined;
|
|
890
|
+
this.cowEnabled = false;
|
|
891
|
+
}
|
|
877
892
|
// Clear the statistics cache
|
|
878
893
|
this.statisticsCache = null;
|
|
879
894
|
this.statisticsModified = false;
|
|
895
|
+
this.totalNounCount = 0;
|
|
896
|
+
this.totalVerbCount = 0;
|
|
880
897
|
}
|
|
881
898
|
/**
|
|
882
899
|
* Enhanced clear operation with safety mechanisms and performance optimizations
|
|
@@ -778,6 +778,17 @@ export class GcsStorage extends BaseStorage {
|
|
|
778
778
|
await deleteObjectsWithPrefix(this.metadataPrefix);
|
|
779
779
|
await deleteObjectsWithPrefix(this.verbMetadataPrefix);
|
|
780
780
|
await deleteObjectsWithPrefix(this.systemPrefix);
|
|
781
|
+
// v5.6.1: Clear COW (copy-on-write) version control data
|
|
782
|
+
// This includes all git-like versioning data (commits, trees, blobs, refs)
|
|
783
|
+
// Must be deleted to fully clear all data including version history
|
|
784
|
+
await deleteObjectsWithPrefix('_cow/');
|
|
785
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
786
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
787
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
788
|
+
this.refManager = undefined;
|
|
789
|
+
this.blobStorage = undefined;
|
|
790
|
+
this.commitLog = undefined;
|
|
791
|
+
this.cowEnabled = false;
|
|
781
792
|
// Clear caches
|
|
782
793
|
this.nounCacheManager.clear();
|
|
783
794
|
this.verbCacheManager.clear();
|
|
@@ -387,9 +387,31 @@ export class OPFSStorage extends BaseStorage {
|
|
|
387
387
|
await removeDirectoryContents(this.verbMetadataDir);
|
|
388
388
|
// Remove all files in the index directory
|
|
389
389
|
await removeDirectoryContents(this.indexDir);
|
|
390
|
+
// v5.6.1: Remove COW (copy-on-write) version control data
|
|
391
|
+
// This directory stores all git-like versioning data (commits, trees, blobs, refs)
|
|
392
|
+
// Must be deleted to fully clear all data including version history
|
|
393
|
+
try {
|
|
394
|
+
// Delete the entire _cow/ directory (not just contents)
|
|
395
|
+
await this.rootDir.removeEntry('_cow', { recursive: true });
|
|
396
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
397
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
398
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
399
|
+
this.refManager = undefined;
|
|
400
|
+
this.blobStorage = undefined;
|
|
401
|
+
this.commitLog = undefined;
|
|
402
|
+
this.cowEnabled = false;
|
|
403
|
+
}
|
|
404
|
+
catch (error) {
|
|
405
|
+
// Ignore if _cow directory doesn't exist (not all instances use COW)
|
|
406
|
+
if (error.name !== 'NotFoundError') {
|
|
407
|
+
throw error;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
390
410
|
// Clear the statistics cache
|
|
391
411
|
this.statisticsCache = null;
|
|
392
412
|
this.statisticsModified = false;
|
|
413
|
+
this.totalNounCount = 0;
|
|
414
|
+
this.totalVerbCount = 0;
|
|
393
415
|
}
|
|
394
416
|
catch (error) {
|
|
395
417
|
console.error('Error clearing storage:', error);
|
|
@@ -771,13 +771,22 @@ export class R2Storage extends BaseStorage {
|
|
|
771
771
|
async clear() {
|
|
772
772
|
await this.ensureInitialized();
|
|
773
773
|
prodLog.info('๐งน R2: Clearing all data from bucket...');
|
|
774
|
-
// Clear all prefixes
|
|
775
|
-
|
|
774
|
+
// Clear all prefixes (v5.6.1: includes _cow/ for version control data)
|
|
775
|
+
// _cow/ stores all git-like versioning data (commits, trees, blobs, refs)
|
|
776
|
+
// Must be deleted to fully clear all data including version history
|
|
777
|
+
for (const prefix of [this.nounPrefix, this.verbPrefix, this.metadataPrefix, this.verbMetadataPrefix, this.systemPrefix, '_cow/']) {
|
|
776
778
|
const objects = await this.listObjectsUnderPath(prefix);
|
|
777
779
|
for (const key of objects) {
|
|
778
780
|
await this.deleteObjectFromPath(key);
|
|
779
781
|
}
|
|
780
782
|
}
|
|
783
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
784
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
785
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
786
|
+
this.refManager = undefined;
|
|
787
|
+
this.blobStorage = undefined;
|
|
788
|
+
this.commitLog = undefined;
|
|
789
|
+
this.cowEnabled = false;
|
|
781
790
|
this.nounCacheManager.clear();
|
|
782
791
|
this.verbCacheManager.clear();
|
|
783
792
|
this.totalNounCount = 0;
|
|
@@ -1621,9 +1621,22 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1621
1621
|
await deleteObjectsWithPrefix(this.verbMetadataPrefix);
|
|
1622
1622
|
// Delete all objects in the index directory
|
|
1623
1623
|
await deleteObjectsWithPrefix(this.indexPrefix);
|
|
1624
|
+
// v5.6.1: Delete COW (copy-on-write) version control data
|
|
1625
|
+
// This includes all git-like versioning data (commits, trees, blobs, refs)
|
|
1626
|
+
// Must be deleted to fully clear all data including version history
|
|
1627
|
+
await deleteObjectsWithPrefix('_cow/');
|
|
1628
|
+
// CRITICAL: Reset COW state to prevent automatic reinitialization
|
|
1629
|
+
// When COW data is cleared, we must also clear the COW managers
|
|
1630
|
+
// Otherwise initializeCOW() will auto-recreate initial commit on next operation
|
|
1631
|
+
this.refManager = undefined;
|
|
1632
|
+
this.blobStorage = undefined;
|
|
1633
|
+
this.commitLog = undefined;
|
|
1634
|
+
this.cowEnabled = false;
|
|
1624
1635
|
// Clear the statistics cache
|
|
1625
1636
|
this.statisticsCache = null;
|
|
1626
1637
|
this.statisticsModified = false;
|
|
1638
|
+
this.totalNounCount = 0;
|
|
1639
|
+
this.totalVerbCount = 0;
|
|
1627
1640
|
}
|
|
1628
1641
|
catch (error) {
|
|
1629
1642
|
prodLog.error('Failed to clear storage:', error);
|
|
@@ -61,6 +61,7 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
61
61
|
protected verbCountsByType: Uint32Array<ArrayBuffer>;
|
|
62
62
|
protected nounTypeCache: Map<string, NounType>;
|
|
63
63
|
protected verbTypeCache: Map<string, VerbType>;
|
|
64
|
+
private typeCountsRebuilt;
|
|
64
65
|
/**
|
|
65
66
|
* Analyze a storage key to determine its routing and path
|
|
66
67
|
* @param id - The key to analyze (UUID or system key)
|
|
@@ -224,7 +225,15 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
224
225
|
* Get nouns with pagination (v5.4.0: Type-first implementation)
|
|
225
226
|
*
|
|
226
227
|
* CRITICAL: This method is required for brain.find() to work!
|
|
227
|
-
* Iterates through
|
|
228
|
+
* Iterates through noun types with billion-scale optimizations.
|
|
229
|
+
*
|
|
230
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
231
|
+
* Storage โ Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
232
|
+
*
|
|
233
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
234
|
+
* - Skip empty types using nounCountsByType[] tracking (O(1) check)
|
|
235
|
+
* - Early termination when offset + limit entities collected
|
|
236
|
+
* - Memory efficient: Never loads full dataset
|
|
228
237
|
*/
|
|
229
238
|
getNounsWithPagination(options: {
|
|
230
239
|
limit: number;
|
|
@@ -241,6 +250,38 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
241
250
|
hasMore: boolean;
|
|
242
251
|
nextCursor?: string;
|
|
243
252
|
}>;
|
|
253
|
+
/**
|
|
254
|
+
* Get verbs with pagination (v5.5.0: Type-first implementation with billion-scale optimizations)
|
|
255
|
+
*
|
|
256
|
+
* CRITICAL: This method is required for brain.getRelations() to work!
|
|
257
|
+
* Iterates through verb types with the same optimizations as nouns.
|
|
258
|
+
*
|
|
259
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
260
|
+
* Storage โ Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
261
|
+
*
|
|
262
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
263
|
+
* - Skip empty types using verbCountsByType[] tracking (O(1) check)
|
|
264
|
+
* - Early termination when offset + limit verbs collected
|
|
265
|
+
* - Memory efficient: Never loads full dataset
|
|
266
|
+
* - Inline filtering for sourceId, targetId, verbType
|
|
267
|
+
*/
|
|
268
|
+
getVerbsWithPagination(options: {
|
|
269
|
+
limit: number;
|
|
270
|
+
offset: number;
|
|
271
|
+
cursor?: string;
|
|
272
|
+
filter?: {
|
|
273
|
+
verbType?: string | string[];
|
|
274
|
+
sourceId?: string | string[];
|
|
275
|
+
targetId?: string | string[];
|
|
276
|
+
service?: string | string[];
|
|
277
|
+
metadata?: Record<string, any>;
|
|
278
|
+
};
|
|
279
|
+
}): Promise<{
|
|
280
|
+
items: HNSWVerbWithMetadata[];
|
|
281
|
+
totalCount: number;
|
|
282
|
+
hasMore: boolean;
|
|
283
|
+
nextCursor?: string;
|
|
284
|
+
}>;
|
|
244
285
|
/**
|
|
245
286
|
* Get verbs with pagination and filtering
|
|
246
287
|
* @param options Pagination and filtering options
|
|
@@ -393,6 +434,12 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
|
|
|
393
434
|
* Periodically called when counts are updated
|
|
394
435
|
*/
|
|
395
436
|
protected saveTypeStatistics(): Promise<void>;
|
|
437
|
+
/**
|
|
438
|
+
* Rebuild type counts from actual storage (v5.5.0)
|
|
439
|
+
* Called when statistics are missing or inconsistent
|
|
440
|
+
* Ensures verbCountsByType is always accurate for reliable pagination
|
|
441
|
+
*/
|
|
442
|
+
protected rebuildTypeCounts(): Promise<void>;
|
|
396
443
|
/**
|
|
397
444
|
* Get noun type from cache or metadata
|
|
398
445
|
* Relies on nounTypeCache populated during metadata saves
|