@soulcraft/brainy 5.5.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/augmentations/display/fieldPatterns.js +3 -3
- package/dist/augmentations/display/intelligentComputation.d.ts +1 -1
- package/dist/augmentations/display/intelligentComputation.js +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +1 -1
- package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
- package/dist/augmentations/universalDisplayAugmentation.js +1 -1
- package/dist/brainy.js +1 -1
- package/dist/cli/commands/types.js +2 -2
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +3 -3
- package/dist/hnsw/typeAwareHNSWIndex.js +5 -5
- package/dist/importers/SmartExcelImporter.js +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.js +1 -1
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/query/typeAwareQueryPlanner.d.ts +7 -7
- package/dist/query/typeAwareQueryPlanner.js +7 -7
- package/dist/storage/baseStorage.d.ts +48 -1
- package/dist/storage/baseStorage.js +237 -19
- package/package.json +1 -1
- package/dist/importManager.d.ts +0 -78
- package/dist/importManager.js +0 -267
- package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +0 -300
- package/dist/storage/adapters/typeAwareStorageAdapter.js +0 -1012
|
@@ -77,12 +77,14 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
77
77
|
this.cowEnabled = false;
|
|
78
78
|
// Type-first indexing support (v5.4.0)
|
|
79
79
|
// Built into all storage adapters for billion-scale efficiency
|
|
80
|
-
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); //
|
|
81
|
-
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); //
|
|
82
|
-
// Total:
|
|
80
|
+
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); // 168 bytes (Stage 3: 42 types)
|
|
81
|
+
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); // 508 bytes (Stage 3: 127 types)
|
|
82
|
+
// Total: 676 bytes (99.2% reduction vs Map-based tracking)
|
|
83
83
|
// Type cache for O(1) lookups after first access
|
|
84
84
|
this.nounTypeCache = new Map();
|
|
85
85
|
this.verbTypeCache = new Map();
|
|
86
|
+
// v5.5.0: Track if type counts have been rebuilt (prevent repeated rebuilds)
|
|
87
|
+
this.typeCountsRebuilt = false;
|
|
86
88
|
}
|
|
87
89
|
/**
|
|
88
90
|
* Analyze a storage key to determine its routing and path
|
|
@@ -825,14 +827,30 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
825
827
|
* Get nouns with pagination (v5.4.0: Type-first implementation)
|
|
826
828
|
*
|
|
827
829
|
* CRITICAL: This method is required for brain.find() to work!
|
|
828
|
-
* Iterates through
|
|
830
|
+
* Iterates through noun types with billion-scale optimizations.
|
|
831
|
+
*
|
|
832
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
833
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
834
|
+
*
|
|
835
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
836
|
+
* - Skip empty types using nounCountsByType[] tracking (O(1) check)
|
|
837
|
+
* - Early termination when offset + limit entities collected
|
|
838
|
+
* - Memory efficient: Never loads full dataset
|
|
829
839
|
*/
|
|
830
840
|
async getNounsWithPagination(options) {
|
|
831
841
|
await this.ensureInitialized();
|
|
832
|
-
const { limit, offset, filter } = options;
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
842
|
+
const { limit, offset = 0, filter } = options;
|
|
843
|
+
const collectedNouns = [];
|
|
844
|
+
const targetCount = offset + limit; // Early termination target
|
|
845
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
846
|
+
const totalNounCountFromArray = this.nounCountsByType.reduce((sum, c) => sum + c, 0);
|
|
847
|
+
const useOptimization = totalNounCountFromArray > 0;
|
|
848
|
+
// v5.5.0: Iterate through noun types with billion-scale optimizations
|
|
849
|
+
for (let i = 0; i < NOUN_TYPE_COUNT && collectedNouns.length < targetCount; i++) {
|
|
850
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
851
|
+
if (useOptimization && this.nounCountsByType[i] === 0) {
|
|
852
|
+
continue;
|
|
853
|
+
}
|
|
836
854
|
const type = TypeUtils.getNounFromIndex(i);
|
|
837
855
|
// If filtering by type, skip other types
|
|
838
856
|
if (filter?.nounType) {
|
|
@@ -846,6 +864,10 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
846
864
|
// List all noun files for this type
|
|
847
865
|
const nounFiles = await this.listObjectsInBranch(typeDir);
|
|
848
866
|
for (const nounPath of nounFiles) {
|
|
867
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
868
|
+
if (collectedNouns.length >= targetCount) {
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
849
871
|
// Skip if not a .json file
|
|
850
872
|
if (!nounPath.endsWith('.json'))
|
|
851
873
|
continue;
|
|
@@ -864,7 +886,7 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
864
886
|
}
|
|
865
887
|
}
|
|
866
888
|
// Combine noun + metadata (v5.4.0: Extract standard fields to top-level)
|
|
867
|
-
|
|
889
|
+
collectedNouns.push({
|
|
868
890
|
...noun,
|
|
869
891
|
type: metadata.noun || type, // Required: Extract type from metadata
|
|
870
892
|
confidence: metadata.confidence,
|
|
@@ -892,19 +914,104 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
892
914
|
// Skip types that have no data
|
|
893
915
|
}
|
|
894
916
|
}
|
|
895
|
-
// Apply pagination
|
|
896
|
-
const
|
|
897
|
-
const
|
|
898
|
-
const hasMore = offset + limit < totalCount;
|
|
917
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
918
|
+
const paginatedNouns = collectedNouns.slice(offset, offset + limit);
|
|
919
|
+
const hasMore = collectedNouns.length >= targetCount;
|
|
899
920
|
return {
|
|
900
921
|
items: paginatedNouns,
|
|
901
|
-
totalCount,
|
|
922
|
+
totalCount: collectedNouns.length, // Accurate count of collected results
|
|
902
923
|
hasMore,
|
|
903
924
|
nextCursor: hasMore && paginatedNouns.length > 0
|
|
904
925
|
? paginatedNouns[paginatedNouns.length - 1].id
|
|
905
926
|
: undefined
|
|
906
927
|
};
|
|
907
928
|
}
|
|
929
|
+
/**
|
|
930
|
+
* Get verbs with pagination (v5.5.0: Type-first implementation with billion-scale optimizations)
|
|
931
|
+
*
|
|
932
|
+
* CRITICAL: This method is required for brain.getRelations() to work!
|
|
933
|
+
* Iterates through verb types with the same optimizations as nouns.
|
|
934
|
+
*
|
|
935
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
936
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
937
|
+
*
|
|
938
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
939
|
+
* - Skip empty types using verbCountsByType[] tracking (O(1) check)
|
|
940
|
+
* - Early termination when offset + limit verbs collected
|
|
941
|
+
* - Memory efficient: Never loads full dataset
|
|
942
|
+
* - Inline filtering for sourceId, targetId, verbType
|
|
943
|
+
*/
|
|
944
|
+
async getVerbsWithPagination(options) {
|
|
945
|
+
await this.ensureInitialized();
|
|
946
|
+
const { limit, offset = 0, filter } = options;
|
|
947
|
+
const collectedVerbs = [];
|
|
948
|
+
const targetCount = offset + limit; // Early termination target
|
|
949
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
950
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
951
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
952
|
+
// v5.5.0: Iterate through verb types with billion-scale optimizations
|
|
953
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
954
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
955
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
956
|
+
continue;
|
|
957
|
+
}
|
|
958
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
959
|
+
// If filtering by verbType, skip other types
|
|
960
|
+
if (filter?.verbType) {
|
|
961
|
+
const filterTypes = Array.isArray(filter.verbType) ? filter.verbType : [filter.verbType];
|
|
962
|
+
if (!filterTypes.includes(type)) {
|
|
963
|
+
continue;
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
try {
|
|
967
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
968
|
+
// Apply filtering inline (memory efficient)
|
|
969
|
+
for (const verb of verbsOfType) {
|
|
970
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
971
|
+
if (collectedVerbs.length >= targetCount) {
|
|
972
|
+
break;
|
|
973
|
+
}
|
|
974
|
+
// Apply filters if specified
|
|
975
|
+
if (filter) {
|
|
976
|
+
// Filter by sourceId
|
|
977
|
+
if (filter.sourceId) {
|
|
978
|
+
const sourceIds = Array.isArray(filter.sourceId)
|
|
979
|
+
? filter.sourceId
|
|
980
|
+
: [filter.sourceId];
|
|
981
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
982
|
+
continue;
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
// Filter by targetId
|
|
986
|
+
if (filter.targetId) {
|
|
987
|
+
const targetIds = Array.isArray(filter.targetId)
|
|
988
|
+
? filter.targetId
|
|
989
|
+
: [filter.targetId];
|
|
990
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
991
|
+
continue;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
// Verb passed all filters - add to collection
|
|
996
|
+
collectedVerbs.push(verb);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
catch (error) {
|
|
1000
|
+
// Skip types that have no data (directory may not exist)
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
1004
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1005
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1006
|
+
return {
|
|
1007
|
+
items: paginatedVerbs,
|
|
1008
|
+
totalCount: collectedVerbs.length, // Accurate count of collected results
|
|
1009
|
+
hasMore,
|
|
1010
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1011
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1012
|
+
: undefined
|
|
1013
|
+
};
|
|
1014
|
+
}
|
|
908
1015
|
/**
|
|
909
1016
|
* Get verbs with pagination and filtering
|
|
910
1017
|
* @param options Pagination and filtering options
|
|
@@ -1080,12 +1187,84 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1080
1187
|
nextCursor: result.nextCursor
|
|
1081
1188
|
};
|
|
1082
1189
|
}
|
|
1083
|
-
//
|
|
1084
|
-
|
|
1190
|
+
// UNIVERSAL FALLBACK: Iterate through verb types with early termination (billion-scale safe)
|
|
1191
|
+
// This approach works for ALL storage adapters without requiring adapter-specific pagination
|
|
1192
|
+
console.warn('Using universal type-iteration strategy for getVerbs(). ' +
|
|
1193
|
+
'This works for all adapters but may be slower than native pagination. ' +
|
|
1194
|
+
'For optimal performance at scale, storage adapters can implement getVerbsWithPagination().');
|
|
1195
|
+
const collectedVerbs = [];
|
|
1196
|
+
let totalScanned = 0;
|
|
1197
|
+
const targetCount = offset + limit; // We need this many verbs total (including offset)
|
|
1198
|
+
// v5.5.0 BUG FIX: Check if optimization should be used
|
|
1199
|
+
// Only use type-skipping optimization if counts are non-zero (reliable)
|
|
1200
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
1201
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
1202
|
+
// Iterate through all 127 verb types (Stage 3 CANONICAL) with early termination
|
|
1203
|
+
// OPTIMIZATION: Skip types with zero count (only if counts are reliable)
|
|
1204
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
1205
|
+
// Skip empty types for performance (but only if optimization is enabled)
|
|
1206
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
1207
|
+
continue;
|
|
1208
|
+
}
|
|
1209
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1210
|
+
try {
|
|
1211
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
1212
|
+
// Apply filtering inline (memory efficient)
|
|
1213
|
+
for (const verb of verbsOfType) {
|
|
1214
|
+
// Apply filters if specified
|
|
1215
|
+
if (options?.filter) {
|
|
1216
|
+
// Filter by sourceId
|
|
1217
|
+
if (options.filter.sourceId) {
|
|
1218
|
+
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
1219
|
+
? options.filter.sourceId
|
|
1220
|
+
: [options.filter.sourceId];
|
|
1221
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
1222
|
+
continue;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
// Filter by targetId
|
|
1226
|
+
if (options.filter.targetId) {
|
|
1227
|
+
const targetIds = Array.isArray(options.filter.targetId)
|
|
1228
|
+
? options.filter.targetId
|
|
1229
|
+
: [options.filter.targetId];
|
|
1230
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
1231
|
+
continue;
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
// Filter by verbType
|
|
1235
|
+
if (options.filter.verbType) {
|
|
1236
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1237
|
+
? options.filter.verbType
|
|
1238
|
+
: [options.filter.verbType];
|
|
1239
|
+
if (!verbTypes.includes(verb.verb)) {
|
|
1240
|
+
continue;
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
// Verb passed filters - add to collection
|
|
1245
|
+
collectedVerbs.push(verb);
|
|
1246
|
+
// Early termination: stop when we have enough for offset + limit
|
|
1247
|
+
if (collectedVerbs.length >= targetCount) {
|
|
1248
|
+
break;
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
totalScanned += verbsOfType.length;
|
|
1252
|
+
}
|
|
1253
|
+
catch (error) {
|
|
1254
|
+
// Ignore errors for types with no verbs (directory may not exist)
|
|
1255
|
+
// This is expected for types that haven't been used yet
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
// Apply pagination (slice for offset)
|
|
1259
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1260
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1085
1261
|
return {
|
|
1086
|
-
items:
|
|
1087
|
-
totalCount:
|
|
1088
|
-
hasMore
|
|
1262
|
+
items: paginatedVerbs,
|
|
1263
|
+
totalCount: collectedVerbs.length, // Accurate count of filtered results
|
|
1264
|
+
hasMore,
|
|
1265
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1266
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1267
|
+
: undefined
|
|
1089
1268
|
};
|
|
1090
1269
|
}
|
|
1091
1270
|
catch (error) {
|
|
@@ -1402,6 +1581,45 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1402
1581
|
};
|
|
1403
1582
|
await this.writeObjectToPath(`${SYSTEM_DIR}/type-statistics.json`, stats);
|
|
1404
1583
|
}
|
|
1584
|
+
/**
|
|
1585
|
+
* Rebuild type counts from actual storage (v5.5.0)
|
|
1586
|
+
* Called when statistics are missing or inconsistent
|
|
1587
|
+
* Ensures verbCountsByType is always accurate for reliable pagination
|
|
1588
|
+
*/
|
|
1589
|
+
async rebuildTypeCounts() {
|
|
1590
|
+
console.log('[BaseStorage] Rebuilding type counts from storage...');
|
|
1591
|
+
// Rebuild verb counts by checking each type directory
|
|
1592
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
1593
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1594
|
+
const prefix = `entities/verbs/${type}/vectors/`;
|
|
1595
|
+
try {
|
|
1596
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1597
|
+
this.verbCountsByType[i] = paths.length;
|
|
1598
|
+
}
|
|
1599
|
+
catch (error) {
|
|
1600
|
+
// Type directory doesn't exist - count is 0
|
|
1601
|
+
this.verbCountsByType[i] = 0;
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
// Rebuild noun counts similarly
|
|
1605
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
1606
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
1607
|
+
const prefix = `entities/nouns/${type}/vectors/`;
|
|
1608
|
+
try {
|
|
1609
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1610
|
+
this.nounCountsByType[i] = paths.length;
|
|
1611
|
+
}
|
|
1612
|
+
catch (error) {
|
|
1613
|
+
// Type directory doesn't exist - count is 0
|
|
1614
|
+
this.nounCountsByType[i] = 0;
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
// Save rebuilt counts to storage
|
|
1618
|
+
await this.saveTypeStatistics();
|
|
1619
|
+
const totalVerbs = this.verbCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1620
|
+
const totalNouns = this.nounCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1621
|
+
console.log(`[BaseStorage] Rebuilt counts: ${totalNouns} nouns, ${totalVerbs} verbs`);
|
|
1622
|
+
}
|
|
1405
1623
|
/**
|
|
1406
1624
|
* Get noun type from cache or metadata
|
|
1407
1625
|
* Relies on nounTypeCache populated during metadata saves
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.6.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
package/dist/importManager.d.ts
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Import Manager - Comprehensive data import with intelligent type detection
|
|
3
|
-
*
|
|
4
|
-
* Handles multiple data sources:
|
|
5
|
-
* - Direct data (objects, arrays)
|
|
6
|
-
* - Files (JSON, CSV, text)
|
|
7
|
-
* - URLs (fetch and parse)
|
|
8
|
-
* - Streams (for large files)
|
|
9
|
-
*
|
|
10
|
-
* Uses NeuralImportAugmentation for intelligent processing
|
|
11
|
-
*/
|
|
12
|
-
import { NounType } from './types/graphTypes.js';
|
|
13
|
-
export interface ImportOptions {
|
|
14
|
-
source?: 'data' | 'file' | 'url' | 'auto';
|
|
15
|
-
format?: 'json' | 'csv' | 'text' | 'yaml' | 'auto';
|
|
16
|
-
batchSize?: number;
|
|
17
|
-
autoDetect?: boolean;
|
|
18
|
-
typeHint?: NounType;
|
|
19
|
-
extractRelationships?: boolean;
|
|
20
|
-
csvDelimiter?: string;
|
|
21
|
-
csvHeaders?: boolean;
|
|
22
|
-
parallel?: boolean;
|
|
23
|
-
maxConcurrency?: number;
|
|
24
|
-
}
|
|
25
|
-
export interface ImportResult {
|
|
26
|
-
success: boolean;
|
|
27
|
-
nouns: string[];
|
|
28
|
-
verbs: string[];
|
|
29
|
-
errors: string[];
|
|
30
|
-
stats: {
|
|
31
|
-
total: number;
|
|
32
|
-
imported: number;
|
|
33
|
-
failed: number;
|
|
34
|
-
relationships: number;
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
export declare class ImportManager {
|
|
38
|
-
private neuralImport;
|
|
39
|
-
private typeMatcher;
|
|
40
|
-
private brain;
|
|
41
|
-
constructor(brain: any);
|
|
42
|
-
/**
|
|
43
|
-
* Initialize the import manager
|
|
44
|
-
*/
|
|
45
|
-
init(): Promise<void>;
|
|
46
|
-
/**
|
|
47
|
-
* Main import method - handles all sources
|
|
48
|
-
*/
|
|
49
|
-
import(source: string | Buffer | any[] | any, options?: ImportOptions): Promise<ImportResult>;
|
|
50
|
-
/**
|
|
51
|
-
* Import from file
|
|
52
|
-
*/
|
|
53
|
-
importFile(filePath: string, options?: ImportOptions): Promise<ImportResult>;
|
|
54
|
-
/**
|
|
55
|
-
* Import from URL
|
|
56
|
-
*/
|
|
57
|
-
importUrl(url: string, options?: ImportOptions): Promise<ImportResult>;
|
|
58
|
-
/**
|
|
59
|
-
* Detect source type
|
|
60
|
-
*/
|
|
61
|
-
private detectSourceType;
|
|
62
|
-
/**
|
|
63
|
-
* Detect format from file path
|
|
64
|
-
*/
|
|
65
|
-
private detectFormatFromPath;
|
|
66
|
-
/**
|
|
67
|
-
* Read file
|
|
68
|
-
*/
|
|
69
|
-
private readFile;
|
|
70
|
-
/**
|
|
71
|
-
* Fetch from URL
|
|
72
|
-
*/
|
|
73
|
-
private fetchFromUrl;
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Create an import manager instance
|
|
77
|
-
*/
|
|
78
|
-
export declare function createImportManager(brain: any): ImportManager;
|
package/dist/importManager.js
DELETED
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Import Manager - Comprehensive data import with intelligent type detection
|
|
3
|
-
*
|
|
4
|
-
* Handles multiple data sources:
|
|
5
|
-
* - Direct data (objects, arrays)
|
|
6
|
-
* - Files (JSON, CSV, text)
|
|
7
|
-
* - URLs (fetch and parse)
|
|
8
|
-
* - Streams (for large files)
|
|
9
|
-
*
|
|
10
|
-
* Uses NeuralImportAugmentation for intelligent processing
|
|
11
|
-
*/
|
|
12
|
-
import { VerbType } from './types/graphTypes.js';
|
|
13
|
-
import { NeuralImportAugmentation } from './augmentations/neuralImport.js';
|
|
14
|
-
import * as fs from './universal/fs.js';
|
|
15
|
-
import * as path from './universal/path.js';
|
|
16
|
-
import { prodLog } from './utils/logger.js';
|
|
17
|
-
export class ImportManager {
|
|
18
|
-
constructor(brain) {
|
|
19
|
-
this.typeMatcher = null;
|
|
20
|
-
this.brain = brain;
|
|
21
|
-
this.neuralImport = new NeuralImportAugmentation();
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Initialize the import manager
|
|
25
|
-
*/
|
|
26
|
-
async init() {
|
|
27
|
-
// Initialize neural import with proper context
|
|
28
|
-
const context = {
|
|
29
|
-
brain: this.brain,
|
|
30
|
-
storage: this.brain.storage,
|
|
31
|
-
config: {},
|
|
32
|
-
log: (message, level) => {
|
|
33
|
-
if (level === 'error') {
|
|
34
|
-
prodLog.error(message);
|
|
35
|
-
}
|
|
36
|
-
else if (level === 'warn') {
|
|
37
|
-
prodLog.warn(message);
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
prodLog.info(message);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
await this.neuralImport.initialize(context);
|
|
45
|
-
// Get type matcher
|
|
46
|
-
const { getBrainyTypes } = await import('./augmentations/typeMatching/brainyTypes.js');
|
|
47
|
-
this.typeMatcher = await getBrainyTypes();
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* Main import method - handles all sources
|
|
51
|
-
*/
|
|
52
|
-
async import(source, options = {}) {
|
|
53
|
-
const result = {
|
|
54
|
-
success: false,
|
|
55
|
-
nouns: [],
|
|
56
|
-
verbs: [],
|
|
57
|
-
errors: [],
|
|
58
|
-
stats: {
|
|
59
|
-
total: 0,
|
|
60
|
-
imported: 0,
|
|
61
|
-
failed: 0,
|
|
62
|
-
relationships: 0
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
try {
|
|
66
|
-
// Detect source type
|
|
67
|
-
const sourceType = await this.detectSourceType(source, options.source);
|
|
68
|
-
// Get data based on source type
|
|
69
|
-
let data;
|
|
70
|
-
let format = options.format || 'auto';
|
|
71
|
-
switch (sourceType) {
|
|
72
|
-
case 'url':
|
|
73
|
-
data = await this.fetchFromUrl(source);
|
|
74
|
-
break;
|
|
75
|
-
case 'file':
|
|
76
|
-
const filePath = source;
|
|
77
|
-
data = await this.readFile(filePath);
|
|
78
|
-
if (format === 'auto') {
|
|
79
|
-
format = this.detectFormatFromPath(filePath);
|
|
80
|
-
}
|
|
81
|
-
break;
|
|
82
|
-
case 'data':
|
|
83
|
-
default:
|
|
84
|
-
data = source;
|
|
85
|
-
break;
|
|
86
|
-
}
|
|
87
|
-
// Process data through neural import
|
|
88
|
-
let items;
|
|
89
|
-
let relationships = [];
|
|
90
|
-
if (Buffer.isBuffer(data) || typeof data === 'string') {
|
|
91
|
-
// Use neural import for parsing and analysis
|
|
92
|
-
const analysis = await this.neuralImport.getNeuralAnalysis(data, format);
|
|
93
|
-
// Extract items and relationships
|
|
94
|
-
items = analysis.detectedEntities.map(entity => ({
|
|
95
|
-
data: entity.originalData,
|
|
96
|
-
type: entity.nounType,
|
|
97
|
-
confidence: entity.confidence,
|
|
98
|
-
id: entity.suggestedId
|
|
99
|
-
}));
|
|
100
|
-
if (options.extractRelationships !== false) {
|
|
101
|
-
relationships = analysis.detectedRelationships;
|
|
102
|
-
}
|
|
103
|
-
// Log insights
|
|
104
|
-
for (const insight of analysis.insights) {
|
|
105
|
-
prodLog.info(`🧠 ${insight.description} (confidence: ${insight.confidence})`);
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
else if (Array.isArray(data)) {
|
|
109
|
-
items = data;
|
|
110
|
-
}
|
|
111
|
-
else {
|
|
112
|
-
items = [data];
|
|
113
|
-
}
|
|
114
|
-
result.stats.total = items.length;
|
|
115
|
-
// Import items in batches
|
|
116
|
-
const batchSize = options.batchSize || 50;
|
|
117
|
-
for (let i = 0; i < items.length; i += batchSize) {
|
|
118
|
-
const batch = items.slice(i, i + batchSize);
|
|
119
|
-
// Process batch in parallel if enabled
|
|
120
|
-
const promises = batch.map(async (item) => {
|
|
121
|
-
try {
|
|
122
|
-
// Detect type if needed
|
|
123
|
-
let nounType = item.type || options.typeHint;
|
|
124
|
-
if (!nounType && options.autoDetect !== false && this.typeMatcher) {
|
|
125
|
-
const match = await this.typeMatcher.matchNounType(item.data || item);
|
|
126
|
-
nounType = match.type;
|
|
127
|
-
}
|
|
128
|
-
// Prepare the data to import
|
|
129
|
-
const dataToImport = item.data || item;
|
|
130
|
-
// Create metadata combining original data with import metadata
|
|
131
|
-
const metadata = {
|
|
132
|
-
...(typeof dataToImport === 'object' ? dataToImport : {}),
|
|
133
|
-
...(item.data?.metadata || {}),
|
|
134
|
-
nounType,
|
|
135
|
-
_importedAt: new Date().toISOString(),
|
|
136
|
-
_confidence: item.confidence
|
|
137
|
-
};
|
|
138
|
-
// Add to brain using modern API signature
|
|
139
|
-
const id = await this.brain.add({ data: dataToImport, type: nounType || 'content', metadata });
|
|
140
|
-
result.nouns.push(id);
|
|
141
|
-
result.stats.imported++;
|
|
142
|
-
return id;
|
|
143
|
-
}
|
|
144
|
-
catch (error) {
|
|
145
|
-
result.errors.push(`Failed to import item: ${error.message}`);
|
|
146
|
-
result.stats.failed++;
|
|
147
|
-
return null;
|
|
148
|
-
}
|
|
149
|
-
});
|
|
150
|
-
if (options.parallel !== false) {
|
|
151
|
-
await Promise.all(promises);
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
for (const promise of promises) {
|
|
155
|
-
await promise;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
// Import relationships
|
|
160
|
-
for (const rel of relationships) {
|
|
161
|
-
try {
|
|
162
|
-
// Match verb type if needed
|
|
163
|
-
let verbType = rel.verbType;
|
|
164
|
-
if (!Object.values(VerbType).includes(verbType) && this.typeMatcher) {
|
|
165
|
-
const match = await this.typeMatcher.matchVerbType({ id: rel.sourceId }, { id: rel.targetId }, rel.verbType);
|
|
166
|
-
verbType = match.type;
|
|
167
|
-
}
|
|
168
|
-
const verbId = await this.brain.relate({
|
|
169
|
-
from: rel.sourceId,
|
|
170
|
-
to: rel.targetId,
|
|
171
|
-
type: verbType,
|
|
172
|
-
metadata: rel.metadata,
|
|
173
|
-
weight: rel.weight
|
|
174
|
-
});
|
|
175
|
-
result.verbs.push(verbId);
|
|
176
|
-
result.stats.relationships++;
|
|
177
|
-
}
|
|
178
|
-
catch (error) {
|
|
179
|
-
result.errors.push(`Failed to create relationship: ${error.message}`);
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
result.success = result.stats.imported > 0;
|
|
183
|
-
prodLog.info(`✨ Import complete: ${result.stats.imported}/${result.stats.total} items, ${result.stats.relationships} relationships`);
|
|
184
|
-
}
|
|
185
|
-
catch (error) {
|
|
186
|
-
result.errors.push(`Import failed: ${error.message}`);
|
|
187
|
-
prodLog.error('Import failed:', error);
|
|
188
|
-
}
|
|
189
|
-
return result;
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Import from file
|
|
193
|
-
*/
|
|
194
|
-
async importFile(filePath, options = {}) {
|
|
195
|
-
return this.import(filePath, { ...options, source: 'file' });
|
|
196
|
-
}
|
|
197
|
-
/**
|
|
198
|
-
* Import from URL
|
|
199
|
-
*/
|
|
200
|
-
async importUrl(url, options = {}) {
|
|
201
|
-
return this.import(url, { ...options, source: 'url' });
|
|
202
|
-
}
|
|
203
|
-
/**
|
|
204
|
-
* Detect source type
|
|
205
|
-
*/
|
|
206
|
-
async detectSourceType(source, hint) {
|
|
207
|
-
if (hint && hint !== 'auto') {
|
|
208
|
-
return hint;
|
|
209
|
-
}
|
|
210
|
-
if (typeof source === 'string') {
|
|
211
|
-
// Check if URL
|
|
212
|
-
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
213
|
-
return 'url';
|
|
214
|
-
}
|
|
215
|
-
// Check if file path exists
|
|
216
|
-
try {
|
|
217
|
-
if (await fs.exists(source)) {
|
|
218
|
-
return 'file';
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
catch (error) {
|
|
222
|
-
// File system check failed, not a file path
|
|
223
|
-
console.debug('File path check failed:', error);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return 'data';
|
|
227
|
-
}
|
|
228
|
-
/**
|
|
229
|
-
* Detect format from file path
|
|
230
|
-
*/
|
|
231
|
-
detectFormatFromPath(filePath) {
|
|
232
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
233
|
-
switch (ext) {
|
|
234
|
-
case '.json': return 'json';
|
|
235
|
-
case '.csv': return 'csv';
|
|
236
|
-
case '.txt': return 'text';
|
|
237
|
-
case '.md': return 'text';
|
|
238
|
-
case '.yaml':
|
|
239
|
-
case '.yml': return 'yaml';
|
|
240
|
-
default: return 'auto';
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Read file
|
|
245
|
-
*/
|
|
246
|
-
async readFile(filePath) {
|
|
247
|
-
const content = await fs.readFile(filePath, 'utf8');
|
|
248
|
-
return Buffer.from(content, 'utf8');
|
|
249
|
-
}
|
|
250
|
-
/**
|
|
251
|
-
* Fetch from URL
|
|
252
|
-
*/
|
|
253
|
-
async fetchFromUrl(url) {
|
|
254
|
-
const response = await fetch(url);
|
|
255
|
-
if (!response.ok) {
|
|
256
|
-
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
|
257
|
-
}
|
|
258
|
-
return response.text();
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
/**
|
|
262
|
-
* Create an import manager instance
|
|
263
|
-
*/
|
|
264
|
-
export function createImportManager(brain) {
|
|
265
|
-
return new ImportManager(brain);
|
|
266
|
-
}
|
|
267
|
-
//# sourceMappingURL=importManager.js.map
|