@soulcraft/brainy 4.7.4 → 4.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/DataAPI.js +3 -3
- package/dist/brainy.d.ts +7 -2
- package/dist/brainy.js +69 -39
- package/dist/coreTypes.d.ts +64 -14
- package/dist/coreTypes.js +3 -1
- package/dist/graph/graphAdjacencyIndex.js +38 -2
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/storage/adapters/azureBlobStorage.js +68 -9
- package/dist/storage/adapters/fileSystemStorage.js +41 -16
- package/dist/storage/adapters/gcsStorage.js +24 -4
- package/dist/storage/adapters/memoryStorage.js +30 -5
- package/dist/storage/adapters/opfsStorage.js +24 -4
- package/dist/storage/adapters/r2Storage.js +13 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +24 -3
- package/dist/storage/adapters/typeAwareStorageAdapter.js +27 -95
- package/dist/storage/baseStorage.js +43 -6
- package/dist/types/brainy.types.d.ts +4 -0
- package/dist/types/graphTypes.d.ts +1 -0
- package/dist/utils/entityIdMapper.js +3 -2
- package/dist/utils/metadataIndex.d.ts +23 -2
- package/dist/utils/metadataIndex.js +43 -12
- package/package.json +1 -1
|
@@ -33,8 +33,9 @@ export class EntityIdMapper {
|
|
|
33
33
|
async init() {
|
|
34
34
|
try {
|
|
35
35
|
const metadata = await this.storage.getMetadata(this.storageKey);
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
// v4.8.0: metadata IS the data (no nested 'data' property)
|
|
37
|
+
if (metadata && metadata.nextId !== undefined) {
|
|
38
|
+
const data = metadata;
|
|
38
39
|
this.nextId = data.nextId;
|
|
39
40
|
// Rebuild maps from serialized data
|
|
40
41
|
this.uuidToInt = new Map(Object.entries(data.uuidToInt).map(([k, v]) => [k, Number(v)]));
|
|
@@ -228,7 +228,13 @@ export declare class MetadataIndexManager {
|
|
|
228
228
|
*/
|
|
229
229
|
private shouldIndexField;
|
|
230
230
|
/**
|
|
231
|
-
* Extract indexable field-value pairs from metadata
|
|
231
|
+
* Extract indexable field-value pairs from entity or metadata
|
|
232
|
+
*
|
|
233
|
+
* v4.8.0: Now handles BOTH entity structure (with top-level fields) AND plain metadata
|
|
234
|
+
* - Extracts from top-level fields (confidence, weight, timestamps, type, service, etc.)
|
|
235
|
+
* - Also extracts from nested metadata field (custom user fields)
|
|
236
|
+
* - Skips HNSW-specific fields (vector, connections, level, id)
|
|
237
|
+
* - Maps 'type' → 'noun' for backward compatibility with existing indexes
|
|
232
238
|
*
|
|
233
239
|
* BUG FIX (v3.50.1): Exclude vector embeddings and large arrays from indexing
|
|
234
240
|
* BUG FIX (v3.50.2): Also exclude purely numeric field names (array indices)
|
|
@@ -238,14 +244,29 @@ export declare class MetadataIndexManager {
|
|
|
238
244
|
private extractIndexableFields;
|
|
239
245
|
/**
|
|
240
246
|
* Add item to metadata indexes
|
|
247
|
+
*
|
|
248
|
+
* v4.8.0: Now accepts either entity structure or plain metadata
|
|
249
|
+
* - Entity structure: { id, type, confidence, weight, createdAt, metadata: {...} }
|
|
250
|
+
* - Plain metadata: { noun, confidence, weight, createdAt, ... }
|
|
251
|
+
*
|
|
252
|
+
* @param id - Entity ID
|
|
253
|
+
* @param entityOrMetadata - Either full entity structure (v4.8.0+) or plain metadata (backward compat)
|
|
254
|
+
* @param skipFlush - Skip automatic flush (used during batch operations)
|
|
241
255
|
*/
|
|
242
|
-
addToIndex(id: string,
|
|
256
|
+
addToIndex(id: string, entityOrMetadata: any, skipFlush?: boolean): Promise<void>;
|
|
243
257
|
/**
|
|
244
258
|
* Update field index with value count
|
|
245
259
|
*/
|
|
246
260
|
private updateFieldIndex;
|
|
247
261
|
/**
|
|
248
262
|
* Remove item from metadata indexes
|
|
263
|
+
*
|
|
264
|
+
* v4.8.0: Now accepts either entity structure or plain metadata (same as addToIndex)
|
|
265
|
+
* - Entity structure: { id, type, confidence, weight, createdAt, metadata: {...} }
|
|
266
|
+
* - Plain metadata: { noun, confidence, weight, createdAt, ... }
|
|
267
|
+
*
|
|
268
|
+
* @param id - Entity ID to remove
|
|
269
|
+
* @param metadata - Optional entity or metadata structure (if not provided, requires scanning all fields - slow!)
|
|
249
270
|
*/
|
|
250
271
|
removeFromIndex(id: string, metadata?: any): Promise<void>;
|
|
251
272
|
/**
|
|
@@ -856,22 +856,28 @@ export class MetadataIndexManager {
|
|
|
856
856
|
return true;
|
|
857
857
|
}
|
|
858
858
|
/**
|
|
859
|
-
* Extract indexable field-value pairs from metadata
|
|
859
|
+
* Extract indexable field-value pairs from entity or metadata
|
|
860
|
+
*
|
|
861
|
+
* v4.8.0: Now handles BOTH entity structure (with top-level fields) AND plain metadata
|
|
862
|
+
* - Extracts from top-level fields (confidence, weight, timestamps, type, service, etc.)
|
|
863
|
+
* - Also extracts from nested metadata field (custom user fields)
|
|
864
|
+
* - Skips HNSW-specific fields (vector, connections, level, id)
|
|
865
|
+
* - Maps 'type' → 'noun' for backward compatibility with existing indexes
|
|
860
866
|
*
|
|
861
867
|
* BUG FIX (v3.50.1): Exclude vector embeddings and large arrays from indexing
|
|
862
868
|
* BUG FIX (v3.50.2): Also exclude purely numeric field names (array indices)
|
|
863
869
|
* - Vector fields (384+ dimensions) were creating 825K chunk files for 1,144 entities
|
|
864
870
|
* - Arrays converted to objects with numeric keys were still being indexed
|
|
865
871
|
*/
|
|
866
|
-
extractIndexableFields(
|
|
872
|
+
extractIndexableFields(data) {
|
|
867
873
|
const fields = [];
|
|
868
|
-
// Fields that should NEVER be indexed (vectors, embeddings, large arrays)
|
|
869
|
-
const NEVER_INDEX = new Set(['vector', 'embedding', 'embeddings', 'connections']);
|
|
874
|
+
// Fields that should NEVER be indexed (vectors, embeddings, large arrays, HNSW internals)
|
|
875
|
+
const NEVER_INDEX = new Set(['vector', 'embedding', 'embeddings', 'connections', 'level', 'id']);
|
|
870
876
|
const extract = (obj, prefix = '') => {
|
|
871
877
|
for (const [key, value] of Object.entries(obj)) {
|
|
872
878
|
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
873
|
-
// Skip fields in never-index list (CRITICAL: prevents vector indexing bug)
|
|
874
|
-
if (NEVER_INDEX.has(key))
|
|
879
|
+
// Skip fields in never-index list (CRITICAL: prevents vector indexing bug + HNSW fields)
|
|
880
|
+
if (!prefix && NEVER_INDEX.has(key))
|
|
875
881
|
continue;
|
|
876
882
|
// Skip purely numeric field names (array indices converted to object keys)
|
|
877
883
|
// Legitimate field names should never be purely numeric
|
|
@@ -881,6 +887,14 @@ export class MetadataIndexManager {
|
|
|
881
887
|
// Skip fields based on user configuration
|
|
882
888
|
if (!this.shouldIndexField(fullKey))
|
|
883
889
|
continue;
|
|
890
|
+
// Special handling for metadata field at top level
|
|
891
|
+
// v4.8.0: Flatten metadata fields to top-level (no prefix) for cleaner queries
|
|
892
|
+
// Standard fields are already at top-level, custom fields go in metadata
|
|
893
|
+
// By flattening here, queries can use { category: 'B' } instead of { 'metadata.category': 'B' }
|
|
894
|
+
if (key === 'metadata' && !prefix && typeof value === 'object' && !Array.isArray(value)) {
|
|
895
|
+
extract(value, ''); // Flatten to top-level, no prefix
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
884
898
|
// Skip large arrays (> 10 elements) - likely vectors or bulk data
|
|
885
899
|
if (Array.isArray(value) && value.length > 10)
|
|
886
900
|
continue;
|
|
@@ -900,20 +914,30 @@ export class MetadataIndexManager {
|
|
|
900
914
|
}
|
|
901
915
|
else {
|
|
902
916
|
// Primitive value: index it
|
|
903
|
-
|
|
917
|
+
// v4.8.0: Map 'type' → 'noun' for backward compatibility
|
|
918
|
+
const indexField = (!prefix && key === 'type') ? 'noun' : fullKey;
|
|
919
|
+
fields.push({ field: indexField, value });
|
|
904
920
|
}
|
|
905
921
|
}
|
|
906
922
|
};
|
|
907
|
-
if (
|
|
908
|
-
extract(
|
|
923
|
+
if (data && typeof data === 'object') {
|
|
924
|
+
extract(data);
|
|
909
925
|
}
|
|
910
926
|
return fields;
|
|
911
927
|
}
|
|
912
928
|
/**
|
|
913
929
|
* Add item to metadata indexes
|
|
930
|
+
*
|
|
931
|
+
* v4.8.0: Now accepts either entity structure or plain metadata
|
|
932
|
+
* - Entity structure: { id, type, confidence, weight, createdAt, metadata: {...} }
|
|
933
|
+
* - Plain metadata: { noun, confidence, weight, createdAt, ... }
|
|
934
|
+
*
|
|
935
|
+
* @param id - Entity ID
|
|
936
|
+
* @param entityOrMetadata - Either full entity structure (v4.8.0+) or plain metadata (backward compat)
|
|
937
|
+
* @param skipFlush - Skip automatic flush (used during batch operations)
|
|
914
938
|
*/
|
|
915
|
-
async addToIndex(id,
|
|
916
|
-
const fields = this.extractIndexableFields(
|
|
939
|
+
async addToIndex(id, entityOrMetadata, skipFlush = false) {
|
|
940
|
+
const fields = this.extractIndexableFields(entityOrMetadata);
|
|
917
941
|
// Sort fields to process 'noun' field first for type-field affinity tracking
|
|
918
942
|
fields.sort((a, b) => {
|
|
919
943
|
if (a.field === 'noun')
|
|
@@ -930,7 +954,7 @@ export class MetadataIndexManager {
|
|
|
930
954
|
await this.addToChunkedIndex(field, value, id);
|
|
931
955
|
// Update statistics and tracking
|
|
932
956
|
this.updateCardinalityStats(field, value, 'add');
|
|
933
|
-
this.updateTypeFieldAffinity(id, field, value, 'add',
|
|
957
|
+
this.updateTypeFieldAffinity(id, field, value, 'add', entityOrMetadata);
|
|
934
958
|
await this.updateFieldIndex(field, value, 1);
|
|
935
959
|
// Yield to event loop every 5 fields to prevent blocking
|
|
936
960
|
if (i % 5 === 4) {
|
|
@@ -988,6 +1012,13 @@ export class MetadataIndexManager {
|
|
|
988
1012
|
}
|
|
989
1013
|
/**
|
|
990
1014
|
* Remove item from metadata indexes
|
|
1015
|
+
*
|
|
1016
|
+
* v4.8.0: Now accepts either entity structure or plain metadata (same as addToIndex)
|
|
1017
|
+
* - Entity structure: { id, type, confidence, weight, createdAt, metadata: {...} }
|
|
1018
|
+
* - Plain metadata: { noun, confidence, weight, createdAt, ... }
|
|
1019
|
+
*
|
|
1020
|
+
* @param id - Entity ID to remove
|
|
1021
|
+
* @param metadata - Optional entity or metadata structure (if not provided, requires scanning all fields - slow!)
|
|
991
1022
|
*/
|
|
992
1023
|
async removeFromIndex(id, metadata) {
|
|
993
1024
|
if (metadata) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.8.1",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|