@soulcraft/brainy 5.5.0 → 5.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/augmentations/display/fieldPatterns.js +3 -3
- package/dist/augmentations/display/intelligentComputation.d.ts +1 -1
- package/dist/augmentations/display/intelligentComputation.js +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +1 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +1 -1
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +1 -1
- package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
- package/dist/augmentations/universalDisplayAugmentation.js +1 -1
- package/dist/brainy.js +1 -1
- package/dist/cli/commands/types.js +2 -2
- package/dist/hnsw/typeAwareHNSWIndex.d.ts +3 -3
- package/dist/hnsw/typeAwareHNSWIndex.js +5 -5
- package/dist/importers/SmartExcelImporter.js +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedKeywordEmbeddings.js +1 -1
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/query/typeAwareQueryPlanner.d.ts +7 -7
- package/dist/query/typeAwareQueryPlanner.js +7 -7
- package/dist/storage/adapters/azureBlobStorage.js +9 -0
- package/dist/storage/adapters/fileSystemStorage.js +17 -0
- package/dist/storage/adapters/gcsStorage.js +11 -0
- package/dist/storage/adapters/opfsStorage.js +22 -0
- package/dist/storage/adapters/r2Storage.js +11 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +13 -0
- package/dist/storage/baseStorage.d.ts +48 -1
- package/dist/storage/baseStorage.js +242 -19
- package/package.json +1 -1
- package/dist/importManager.d.ts +0 -78
- package/dist/importManager.js +0 -267
- package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +0 -300
- package/dist/storage/adapters/typeAwareStorageAdapter.js +0 -1012
|
@@ -77,12 +77,14 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
77
77
|
this.cowEnabled = false;
|
|
78
78
|
// Type-first indexing support (v5.4.0)
|
|
79
79
|
// Built into all storage adapters for billion-scale efficiency
|
|
80
|
-
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); //
|
|
81
|
-
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); //
|
|
82
|
-
// Total:
|
|
80
|
+
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); // 168 bytes (Stage 3: 42 types)
|
|
81
|
+
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); // 508 bytes (Stage 3: 127 types)
|
|
82
|
+
// Total: 676 bytes (99.2% reduction vs Map-based tracking)
|
|
83
83
|
// Type cache for O(1) lookups after first access
|
|
84
84
|
this.nounTypeCache = new Map();
|
|
85
85
|
this.verbTypeCache = new Map();
|
|
86
|
+
// v5.5.0: Track if type counts have been rebuilt (prevent repeated rebuilds)
|
|
87
|
+
this.typeCountsRebuilt = false;
|
|
86
88
|
}
|
|
87
89
|
/**
|
|
88
90
|
* Analyze a storage key to determine its routing and path
|
|
@@ -201,6 +203,11 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
201
203
|
* @returns Promise that resolves when COW is initialized
|
|
202
204
|
*/
|
|
203
205
|
async initializeCOW(options) {
|
|
206
|
+
// v5.6.1: If COW was explicitly disabled (e.g., via clear()), don't reinitialize
|
|
207
|
+
// This prevents automatic recreation of COW data after clear() operations
|
|
208
|
+
if (this.cowEnabled === false) {
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
204
211
|
// Check if RefManager already initialized (full COW setup complete)
|
|
205
212
|
if (this.refManager) {
|
|
206
213
|
return;
|
|
@@ -825,14 +832,30 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
825
832
|
* Get nouns with pagination (v5.4.0: Type-first implementation)
|
|
826
833
|
*
|
|
827
834
|
* CRITICAL: This method is required for brain.find() to work!
|
|
828
|
-
* Iterates through
|
|
835
|
+
* Iterates through noun types with billion-scale optimizations.
|
|
836
|
+
*
|
|
837
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
838
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
839
|
+
*
|
|
840
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
841
|
+
* - Skip empty types using nounCountsByType[] tracking (O(1) check)
|
|
842
|
+
* - Early termination when offset + limit entities collected
|
|
843
|
+
* - Memory efficient: Never loads full dataset
|
|
829
844
|
*/
|
|
830
845
|
async getNounsWithPagination(options) {
|
|
831
846
|
await this.ensureInitialized();
|
|
832
|
-
const { limit, offset, filter } = options;
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
847
|
+
const { limit, offset = 0, filter } = options;
|
|
848
|
+
const collectedNouns = [];
|
|
849
|
+
const targetCount = offset + limit; // Early termination target
|
|
850
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
851
|
+
const totalNounCountFromArray = this.nounCountsByType.reduce((sum, c) => sum + c, 0);
|
|
852
|
+
const useOptimization = totalNounCountFromArray > 0;
|
|
853
|
+
// v5.5.0: Iterate through noun types with billion-scale optimizations
|
|
854
|
+
for (let i = 0; i < NOUN_TYPE_COUNT && collectedNouns.length < targetCount; i++) {
|
|
855
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
856
|
+
if (useOptimization && this.nounCountsByType[i] === 0) {
|
|
857
|
+
continue;
|
|
858
|
+
}
|
|
836
859
|
const type = TypeUtils.getNounFromIndex(i);
|
|
837
860
|
// If filtering by type, skip other types
|
|
838
861
|
if (filter?.nounType) {
|
|
@@ -846,6 +869,10 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
846
869
|
// List all noun files for this type
|
|
847
870
|
const nounFiles = await this.listObjectsInBranch(typeDir);
|
|
848
871
|
for (const nounPath of nounFiles) {
|
|
872
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
873
|
+
if (collectedNouns.length >= targetCount) {
|
|
874
|
+
break;
|
|
875
|
+
}
|
|
849
876
|
// Skip if not a .json file
|
|
850
877
|
if (!nounPath.endsWith('.json'))
|
|
851
878
|
continue;
|
|
@@ -864,7 +891,7 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
864
891
|
}
|
|
865
892
|
}
|
|
866
893
|
// Combine noun + metadata (v5.4.0: Extract standard fields to top-level)
|
|
867
|
-
|
|
894
|
+
collectedNouns.push({
|
|
868
895
|
...noun,
|
|
869
896
|
type: metadata.noun || type, // Required: Extract type from metadata
|
|
870
897
|
confidence: metadata.confidence,
|
|
@@ -892,19 +919,104 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
892
919
|
// Skip types that have no data
|
|
893
920
|
}
|
|
894
921
|
}
|
|
895
|
-
// Apply pagination
|
|
896
|
-
const
|
|
897
|
-
const
|
|
898
|
-
const hasMore = offset + limit < totalCount;
|
|
922
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
923
|
+
const paginatedNouns = collectedNouns.slice(offset, offset + limit);
|
|
924
|
+
const hasMore = collectedNouns.length >= targetCount;
|
|
899
925
|
return {
|
|
900
926
|
items: paginatedNouns,
|
|
901
|
-
totalCount,
|
|
927
|
+
totalCount: collectedNouns.length, // Accurate count of collected results
|
|
902
928
|
hasMore,
|
|
903
929
|
nextCursor: hasMore && paginatedNouns.length > 0
|
|
904
930
|
? paginatedNouns[paginatedNouns.length - 1].id
|
|
905
931
|
: undefined
|
|
906
932
|
};
|
|
907
933
|
}
|
|
934
|
+
/**
|
|
935
|
+
* Get verbs with pagination (v5.5.0: Type-first implementation with billion-scale optimizations)
|
|
936
|
+
*
|
|
937
|
+
* CRITICAL: This method is required for brain.getRelations() to work!
|
|
938
|
+
* Iterates through verb types with the same optimizations as nouns.
|
|
939
|
+
*
|
|
940
|
+
* ARCHITECTURE: Reads storage directly (not indexes) to avoid circular dependencies.
|
|
941
|
+
* Storage → Indexes (one direction only). GraphAdjacencyIndex built FROM storage.
|
|
942
|
+
*
|
|
943
|
+
* OPTIMIZATIONS (v5.5.0):
|
|
944
|
+
* - Skip empty types using verbCountsByType[] tracking (O(1) check)
|
|
945
|
+
* - Early termination when offset + limit verbs collected
|
|
946
|
+
* - Memory efficient: Never loads full dataset
|
|
947
|
+
* - Inline filtering for sourceId, targetId, verbType
|
|
948
|
+
*/
|
|
949
|
+
async getVerbsWithPagination(options) {
|
|
950
|
+
await this.ensureInitialized();
|
|
951
|
+
const { limit, offset = 0, filter } = options;
|
|
952
|
+
const collectedVerbs = [];
|
|
953
|
+
const targetCount = offset + limit; // Early termination target
|
|
954
|
+
// v5.5.0 BUG FIX: Only use optimization if counts are reliable
|
|
955
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
956
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
957
|
+
// v5.5.0: Iterate through verb types with billion-scale optimizations
|
|
958
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
959
|
+
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
960
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
961
|
+
continue;
|
|
962
|
+
}
|
|
963
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
964
|
+
// If filtering by verbType, skip other types
|
|
965
|
+
if (filter?.verbType) {
|
|
966
|
+
const filterTypes = Array.isArray(filter.verbType) ? filter.verbType : [filter.verbType];
|
|
967
|
+
if (!filterTypes.includes(type)) {
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
try {
|
|
972
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
973
|
+
// Apply filtering inline (memory efficient)
|
|
974
|
+
for (const verb of verbsOfType) {
|
|
975
|
+
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
976
|
+
if (collectedVerbs.length >= targetCount) {
|
|
977
|
+
break;
|
|
978
|
+
}
|
|
979
|
+
// Apply filters if specified
|
|
980
|
+
if (filter) {
|
|
981
|
+
// Filter by sourceId
|
|
982
|
+
if (filter.sourceId) {
|
|
983
|
+
const sourceIds = Array.isArray(filter.sourceId)
|
|
984
|
+
? filter.sourceId
|
|
985
|
+
: [filter.sourceId];
|
|
986
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
987
|
+
continue;
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
// Filter by targetId
|
|
991
|
+
if (filter.targetId) {
|
|
992
|
+
const targetIds = Array.isArray(filter.targetId)
|
|
993
|
+
? filter.targetId
|
|
994
|
+
: [filter.targetId];
|
|
995
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
996
|
+
continue;
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
// Verb passed all filters - add to collection
|
|
1001
|
+
collectedVerbs.push(verb);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
catch (error) {
|
|
1005
|
+
// Skip types that have no data (directory may not exist)
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
1009
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1010
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1011
|
+
return {
|
|
1012
|
+
items: paginatedVerbs,
|
|
1013
|
+
totalCount: collectedVerbs.length, // Accurate count of collected results
|
|
1014
|
+
hasMore,
|
|
1015
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1016
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1017
|
+
: undefined
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
908
1020
|
/**
|
|
909
1021
|
* Get verbs with pagination and filtering
|
|
910
1022
|
* @param options Pagination and filtering options
|
|
@@ -1080,12 +1192,84 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1080
1192
|
nextCursor: result.nextCursor
|
|
1081
1193
|
};
|
|
1082
1194
|
}
|
|
1083
|
-
//
|
|
1084
|
-
|
|
1195
|
+
// UNIVERSAL FALLBACK: Iterate through verb types with early termination (billion-scale safe)
|
|
1196
|
+
// This approach works for ALL storage adapters without requiring adapter-specific pagination
|
|
1197
|
+
console.warn('Using universal type-iteration strategy for getVerbs(). ' +
|
|
1198
|
+
'This works for all adapters but may be slower than native pagination. ' +
|
|
1199
|
+
'For optimal performance at scale, storage adapters can implement getVerbsWithPagination().');
|
|
1200
|
+
const collectedVerbs = [];
|
|
1201
|
+
let totalScanned = 0;
|
|
1202
|
+
const targetCount = offset + limit; // We need this many verbs total (including offset)
|
|
1203
|
+
// v5.5.0 BUG FIX: Check if optimization should be used
|
|
1204
|
+
// Only use type-skipping optimization if counts are non-zero (reliable)
|
|
1205
|
+
const totalVerbCountFromArray = this.verbCountsByType.reduce((sum, c) => sum + c, 0);
|
|
1206
|
+
const useOptimization = totalVerbCountFromArray > 0;
|
|
1207
|
+
// Iterate through all 127 verb types (Stage 3 CANONICAL) with early termination
|
|
1208
|
+
// OPTIMIZATION: Skip types with zero count (only if counts are reliable)
|
|
1209
|
+
for (let i = 0; i < VERB_TYPE_COUNT && collectedVerbs.length < targetCount; i++) {
|
|
1210
|
+
// Skip empty types for performance (but only if optimization is enabled)
|
|
1211
|
+
if (useOptimization && this.verbCountsByType[i] === 0) {
|
|
1212
|
+
continue;
|
|
1213
|
+
}
|
|
1214
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1215
|
+
try {
|
|
1216
|
+
const verbsOfType = await this.getVerbsByType_internal(type);
|
|
1217
|
+
// Apply filtering inline (memory efficient)
|
|
1218
|
+
for (const verb of verbsOfType) {
|
|
1219
|
+
// Apply filters if specified
|
|
1220
|
+
if (options?.filter) {
|
|
1221
|
+
// Filter by sourceId
|
|
1222
|
+
if (options.filter.sourceId) {
|
|
1223
|
+
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
1224
|
+
? options.filter.sourceId
|
|
1225
|
+
: [options.filter.sourceId];
|
|
1226
|
+
if (!sourceIds.includes(verb.sourceId)) {
|
|
1227
|
+
continue;
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
// Filter by targetId
|
|
1231
|
+
if (options.filter.targetId) {
|
|
1232
|
+
const targetIds = Array.isArray(options.filter.targetId)
|
|
1233
|
+
? options.filter.targetId
|
|
1234
|
+
: [options.filter.targetId];
|
|
1235
|
+
if (!targetIds.includes(verb.targetId)) {
|
|
1236
|
+
continue;
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
// Filter by verbType
|
|
1240
|
+
if (options.filter.verbType) {
|
|
1241
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1242
|
+
? options.filter.verbType
|
|
1243
|
+
: [options.filter.verbType];
|
|
1244
|
+
if (!verbTypes.includes(verb.verb)) {
|
|
1245
|
+
continue;
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
// Verb passed filters - add to collection
|
|
1250
|
+
collectedVerbs.push(verb);
|
|
1251
|
+
// Early termination: stop when we have enough for offset + limit
|
|
1252
|
+
if (collectedVerbs.length >= targetCount) {
|
|
1253
|
+
break;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
totalScanned += verbsOfType.length;
|
|
1257
|
+
}
|
|
1258
|
+
catch (error) {
|
|
1259
|
+
// Ignore errors for types with no verbs (directory may not exist)
|
|
1260
|
+
// This is expected for types that haven't been used yet
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
// Apply pagination (slice for offset)
|
|
1264
|
+
const paginatedVerbs = collectedVerbs.slice(offset, offset + limit);
|
|
1265
|
+
const hasMore = collectedVerbs.length >= targetCount;
|
|
1085
1266
|
return {
|
|
1086
|
-
items:
|
|
1087
|
-
totalCount:
|
|
1088
|
-
hasMore
|
|
1267
|
+
items: paginatedVerbs,
|
|
1268
|
+
totalCount: collectedVerbs.length, // Accurate count of filtered results
|
|
1269
|
+
hasMore,
|
|
1270
|
+
nextCursor: hasMore && paginatedVerbs.length > 0
|
|
1271
|
+
? paginatedVerbs[paginatedVerbs.length - 1].id
|
|
1272
|
+
: undefined
|
|
1089
1273
|
};
|
|
1090
1274
|
}
|
|
1091
1275
|
catch (error) {
|
|
@@ -1402,6 +1586,45 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1402
1586
|
};
|
|
1403
1587
|
await this.writeObjectToPath(`${SYSTEM_DIR}/type-statistics.json`, stats);
|
|
1404
1588
|
}
|
|
1589
|
+
/**
|
|
1590
|
+
* Rebuild type counts from actual storage (v5.5.0)
|
|
1591
|
+
* Called when statistics are missing or inconsistent
|
|
1592
|
+
* Ensures verbCountsByType is always accurate for reliable pagination
|
|
1593
|
+
*/
|
|
1594
|
+
async rebuildTypeCounts() {
|
|
1595
|
+
console.log('[BaseStorage] Rebuilding type counts from storage...');
|
|
1596
|
+
// Rebuild verb counts by checking each type directory
|
|
1597
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
1598
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
1599
|
+
const prefix = `entities/verbs/${type}/vectors/`;
|
|
1600
|
+
try {
|
|
1601
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1602
|
+
this.verbCountsByType[i] = paths.length;
|
|
1603
|
+
}
|
|
1604
|
+
catch (error) {
|
|
1605
|
+
// Type directory doesn't exist - count is 0
|
|
1606
|
+
this.verbCountsByType[i] = 0;
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
// Rebuild noun counts similarly
|
|
1610
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
1611
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
1612
|
+
const prefix = `entities/nouns/${type}/vectors/`;
|
|
1613
|
+
try {
|
|
1614
|
+
const paths = await this.listObjectsInBranch(prefix);
|
|
1615
|
+
this.nounCountsByType[i] = paths.length;
|
|
1616
|
+
}
|
|
1617
|
+
catch (error) {
|
|
1618
|
+
// Type directory doesn't exist - count is 0
|
|
1619
|
+
this.nounCountsByType[i] = 0;
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
// Save rebuilt counts to storage
|
|
1623
|
+
await this.saveTypeStatistics();
|
|
1624
|
+
const totalVerbs = this.verbCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1625
|
+
const totalNouns = this.nounCountsByType.reduce((sum, count) => sum + count, 0);
|
|
1626
|
+
console.log(`[BaseStorage] Rebuilt counts: ${totalNouns} nouns, ${totalVerbs} verbs`);
|
|
1627
|
+
}
|
|
1405
1628
|
/**
|
|
1406
1629
|
* Get noun type from cache or metadata
|
|
1407
1630
|
* Relies on nounTypeCache populated during metadata saves
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.6.1",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
package/dist/importManager.d.ts
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Import Manager - Comprehensive data import with intelligent type detection
|
|
3
|
-
*
|
|
4
|
-
* Handles multiple data sources:
|
|
5
|
-
* - Direct data (objects, arrays)
|
|
6
|
-
* - Files (JSON, CSV, text)
|
|
7
|
-
* - URLs (fetch and parse)
|
|
8
|
-
* - Streams (for large files)
|
|
9
|
-
*
|
|
10
|
-
* Uses NeuralImportAugmentation for intelligent processing
|
|
11
|
-
*/
|
|
12
|
-
import { NounType } from './types/graphTypes.js';
|
|
13
|
-
export interface ImportOptions {
|
|
14
|
-
source?: 'data' | 'file' | 'url' | 'auto';
|
|
15
|
-
format?: 'json' | 'csv' | 'text' | 'yaml' | 'auto';
|
|
16
|
-
batchSize?: number;
|
|
17
|
-
autoDetect?: boolean;
|
|
18
|
-
typeHint?: NounType;
|
|
19
|
-
extractRelationships?: boolean;
|
|
20
|
-
csvDelimiter?: string;
|
|
21
|
-
csvHeaders?: boolean;
|
|
22
|
-
parallel?: boolean;
|
|
23
|
-
maxConcurrency?: number;
|
|
24
|
-
}
|
|
25
|
-
export interface ImportResult {
|
|
26
|
-
success: boolean;
|
|
27
|
-
nouns: string[];
|
|
28
|
-
verbs: string[];
|
|
29
|
-
errors: string[];
|
|
30
|
-
stats: {
|
|
31
|
-
total: number;
|
|
32
|
-
imported: number;
|
|
33
|
-
failed: number;
|
|
34
|
-
relationships: number;
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
export declare class ImportManager {
|
|
38
|
-
private neuralImport;
|
|
39
|
-
private typeMatcher;
|
|
40
|
-
private brain;
|
|
41
|
-
constructor(brain: any);
|
|
42
|
-
/**
|
|
43
|
-
* Initialize the import manager
|
|
44
|
-
*/
|
|
45
|
-
init(): Promise<void>;
|
|
46
|
-
/**
|
|
47
|
-
* Main import method - handles all sources
|
|
48
|
-
*/
|
|
49
|
-
import(source: string | Buffer | any[] | any, options?: ImportOptions): Promise<ImportResult>;
|
|
50
|
-
/**
|
|
51
|
-
* Import from file
|
|
52
|
-
*/
|
|
53
|
-
importFile(filePath: string, options?: ImportOptions): Promise<ImportResult>;
|
|
54
|
-
/**
|
|
55
|
-
* Import from URL
|
|
56
|
-
*/
|
|
57
|
-
importUrl(url: string, options?: ImportOptions): Promise<ImportResult>;
|
|
58
|
-
/**
|
|
59
|
-
* Detect source type
|
|
60
|
-
*/
|
|
61
|
-
private detectSourceType;
|
|
62
|
-
/**
|
|
63
|
-
* Detect format from file path
|
|
64
|
-
*/
|
|
65
|
-
private detectFormatFromPath;
|
|
66
|
-
/**
|
|
67
|
-
* Read file
|
|
68
|
-
*/
|
|
69
|
-
private readFile;
|
|
70
|
-
/**
|
|
71
|
-
* Fetch from URL
|
|
72
|
-
*/
|
|
73
|
-
private fetchFromUrl;
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Create an import manager instance
|
|
77
|
-
*/
|
|
78
|
-
export declare function createImportManager(brain: any): ImportManager;
|
package/dist/importManager.js
DELETED
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Import Manager - Comprehensive data import with intelligent type detection
|
|
3
|
-
*
|
|
4
|
-
* Handles multiple data sources:
|
|
5
|
-
* - Direct data (objects, arrays)
|
|
6
|
-
* - Files (JSON, CSV, text)
|
|
7
|
-
* - URLs (fetch and parse)
|
|
8
|
-
* - Streams (for large files)
|
|
9
|
-
*
|
|
10
|
-
* Uses NeuralImportAugmentation for intelligent processing
|
|
11
|
-
*/
|
|
12
|
-
import { VerbType } from './types/graphTypes.js';
|
|
13
|
-
import { NeuralImportAugmentation } from './augmentations/neuralImport.js';
|
|
14
|
-
import * as fs from './universal/fs.js';
|
|
15
|
-
import * as path from './universal/path.js';
|
|
16
|
-
import { prodLog } from './utils/logger.js';
|
|
17
|
-
export class ImportManager {
|
|
18
|
-
constructor(brain) {
|
|
19
|
-
this.typeMatcher = null;
|
|
20
|
-
this.brain = brain;
|
|
21
|
-
this.neuralImport = new NeuralImportAugmentation();
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Initialize the import manager
|
|
25
|
-
*/
|
|
26
|
-
async init() {
|
|
27
|
-
// Initialize neural import with proper context
|
|
28
|
-
const context = {
|
|
29
|
-
brain: this.brain,
|
|
30
|
-
storage: this.brain.storage,
|
|
31
|
-
config: {},
|
|
32
|
-
log: (message, level) => {
|
|
33
|
-
if (level === 'error') {
|
|
34
|
-
prodLog.error(message);
|
|
35
|
-
}
|
|
36
|
-
else if (level === 'warn') {
|
|
37
|
-
prodLog.warn(message);
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
prodLog.info(message);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
await this.neuralImport.initialize(context);
|
|
45
|
-
// Get type matcher
|
|
46
|
-
const { getBrainyTypes } = await import('./augmentations/typeMatching/brainyTypes.js');
|
|
47
|
-
this.typeMatcher = await getBrainyTypes();
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* Main import method - handles all sources
|
|
51
|
-
*/
|
|
52
|
-
async import(source, options = {}) {
|
|
53
|
-
const result = {
|
|
54
|
-
success: false,
|
|
55
|
-
nouns: [],
|
|
56
|
-
verbs: [],
|
|
57
|
-
errors: [],
|
|
58
|
-
stats: {
|
|
59
|
-
total: 0,
|
|
60
|
-
imported: 0,
|
|
61
|
-
failed: 0,
|
|
62
|
-
relationships: 0
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
try {
|
|
66
|
-
// Detect source type
|
|
67
|
-
const sourceType = await this.detectSourceType(source, options.source);
|
|
68
|
-
// Get data based on source type
|
|
69
|
-
let data;
|
|
70
|
-
let format = options.format || 'auto';
|
|
71
|
-
switch (sourceType) {
|
|
72
|
-
case 'url':
|
|
73
|
-
data = await this.fetchFromUrl(source);
|
|
74
|
-
break;
|
|
75
|
-
case 'file':
|
|
76
|
-
const filePath = source;
|
|
77
|
-
data = await this.readFile(filePath);
|
|
78
|
-
if (format === 'auto') {
|
|
79
|
-
format = this.detectFormatFromPath(filePath);
|
|
80
|
-
}
|
|
81
|
-
break;
|
|
82
|
-
case 'data':
|
|
83
|
-
default:
|
|
84
|
-
data = source;
|
|
85
|
-
break;
|
|
86
|
-
}
|
|
87
|
-
// Process data through neural import
|
|
88
|
-
let items;
|
|
89
|
-
let relationships = [];
|
|
90
|
-
if (Buffer.isBuffer(data) || typeof data === 'string') {
|
|
91
|
-
// Use neural import for parsing and analysis
|
|
92
|
-
const analysis = await this.neuralImport.getNeuralAnalysis(data, format);
|
|
93
|
-
// Extract items and relationships
|
|
94
|
-
items = analysis.detectedEntities.map(entity => ({
|
|
95
|
-
data: entity.originalData,
|
|
96
|
-
type: entity.nounType,
|
|
97
|
-
confidence: entity.confidence,
|
|
98
|
-
id: entity.suggestedId
|
|
99
|
-
}));
|
|
100
|
-
if (options.extractRelationships !== false) {
|
|
101
|
-
relationships = analysis.detectedRelationships;
|
|
102
|
-
}
|
|
103
|
-
// Log insights
|
|
104
|
-
for (const insight of analysis.insights) {
|
|
105
|
-
prodLog.info(`🧠 ${insight.description} (confidence: ${insight.confidence})`);
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
else if (Array.isArray(data)) {
|
|
109
|
-
items = data;
|
|
110
|
-
}
|
|
111
|
-
else {
|
|
112
|
-
items = [data];
|
|
113
|
-
}
|
|
114
|
-
result.stats.total = items.length;
|
|
115
|
-
// Import items in batches
|
|
116
|
-
const batchSize = options.batchSize || 50;
|
|
117
|
-
for (let i = 0; i < items.length; i += batchSize) {
|
|
118
|
-
const batch = items.slice(i, i + batchSize);
|
|
119
|
-
// Process batch in parallel if enabled
|
|
120
|
-
const promises = batch.map(async (item) => {
|
|
121
|
-
try {
|
|
122
|
-
// Detect type if needed
|
|
123
|
-
let nounType = item.type || options.typeHint;
|
|
124
|
-
if (!nounType && options.autoDetect !== false && this.typeMatcher) {
|
|
125
|
-
const match = await this.typeMatcher.matchNounType(item.data || item);
|
|
126
|
-
nounType = match.type;
|
|
127
|
-
}
|
|
128
|
-
// Prepare the data to import
|
|
129
|
-
const dataToImport = item.data || item;
|
|
130
|
-
// Create metadata combining original data with import metadata
|
|
131
|
-
const metadata = {
|
|
132
|
-
...(typeof dataToImport === 'object' ? dataToImport : {}),
|
|
133
|
-
...(item.data?.metadata || {}),
|
|
134
|
-
nounType,
|
|
135
|
-
_importedAt: new Date().toISOString(),
|
|
136
|
-
_confidence: item.confidence
|
|
137
|
-
};
|
|
138
|
-
// Add to brain using modern API signature
|
|
139
|
-
const id = await this.brain.add({ data: dataToImport, type: nounType || 'content', metadata });
|
|
140
|
-
result.nouns.push(id);
|
|
141
|
-
result.stats.imported++;
|
|
142
|
-
return id;
|
|
143
|
-
}
|
|
144
|
-
catch (error) {
|
|
145
|
-
result.errors.push(`Failed to import item: ${error.message}`);
|
|
146
|
-
result.stats.failed++;
|
|
147
|
-
return null;
|
|
148
|
-
}
|
|
149
|
-
});
|
|
150
|
-
if (options.parallel !== false) {
|
|
151
|
-
await Promise.all(promises);
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
for (const promise of promises) {
|
|
155
|
-
await promise;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
// Import relationships
|
|
160
|
-
for (const rel of relationships) {
|
|
161
|
-
try {
|
|
162
|
-
// Match verb type if needed
|
|
163
|
-
let verbType = rel.verbType;
|
|
164
|
-
if (!Object.values(VerbType).includes(verbType) && this.typeMatcher) {
|
|
165
|
-
const match = await this.typeMatcher.matchVerbType({ id: rel.sourceId }, { id: rel.targetId }, rel.verbType);
|
|
166
|
-
verbType = match.type;
|
|
167
|
-
}
|
|
168
|
-
const verbId = await this.brain.relate({
|
|
169
|
-
from: rel.sourceId,
|
|
170
|
-
to: rel.targetId,
|
|
171
|
-
type: verbType,
|
|
172
|
-
metadata: rel.metadata,
|
|
173
|
-
weight: rel.weight
|
|
174
|
-
});
|
|
175
|
-
result.verbs.push(verbId);
|
|
176
|
-
result.stats.relationships++;
|
|
177
|
-
}
|
|
178
|
-
catch (error) {
|
|
179
|
-
result.errors.push(`Failed to create relationship: ${error.message}`);
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
result.success = result.stats.imported > 0;
|
|
183
|
-
prodLog.info(`✨ Import complete: ${result.stats.imported}/${result.stats.total} items, ${result.stats.relationships} relationships`);
|
|
184
|
-
}
|
|
185
|
-
catch (error) {
|
|
186
|
-
result.errors.push(`Import failed: ${error.message}`);
|
|
187
|
-
prodLog.error('Import failed:', error);
|
|
188
|
-
}
|
|
189
|
-
return result;
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Import from file
|
|
193
|
-
*/
|
|
194
|
-
async importFile(filePath, options = {}) {
|
|
195
|
-
return this.import(filePath, { ...options, source: 'file' });
|
|
196
|
-
}
|
|
197
|
-
/**
|
|
198
|
-
* Import from URL
|
|
199
|
-
*/
|
|
200
|
-
async importUrl(url, options = {}) {
|
|
201
|
-
return this.import(url, { ...options, source: 'url' });
|
|
202
|
-
}
|
|
203
|
-
/**
|
|
204
|
-
* Detect source type
|
|
205
|
-
*/
|
|
206
|
-
async detectSourceType(source, hint) {
|
|
207
|
-
if (hint && hint !== 'auto') {
|
|
208
|
-
return hint;
|
|
209
|
-
}
|
|
210
|
-
if (typeof source === 'string') {
|
|
211
|
-
// Check if URL
|
|
212
|
-
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
213
|
-
return 'url';
|
|
214
|
-
}
|
|
215
|
-
// Check if file path exists
|
|
216
|
-
try {
|
|
217
|
-
if (await fs.exists(source)) {
|
|
218
|
-
return 'file';
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
catch (error) {
|
|
222
|
-
// File system check failed, not a file path
|
|
223
|
-
console.debug('File path check failed:', error);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return 'data';
|
|
227
|
-
}
|
|
228
|
-
/**
|
|
229
|
-
* Detect format from file path
|
|
230
|
-
*/
|
|
231
|
-
detectFormatFromPath(filePath) {
|
|
232
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
233
|
-
switch (ext) {
|
|
234
|
-
case '.json': return 'json';
|
|
235
|
-
case '.csv': return 'csv';
|
|
236
|
-
case '.txt': return 'text';
|
|
237
|
-
case '.md': return 'text';
|
|
238
|
-
case '.yaml':
|
|
239
|
-
case '.yml': return 'yaml';
|
|
240
|
-
default: return 'auto';
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
/**
|
|
244
|
-
* Read file
|
|
245
|
-
*/
|
|
246
|
-
async readFile(filePath) {
|
|
247
|
-
const content = await fs.readFile(filePath, 'utf8');
|
|
248
|
-
return Buffer.from(content, 'utf8');
|
|
249
|
-
}
|
|
250
|
-
/**
|
|
251
|
-
* Fetch from URL
|
|
252
|
-
*/
|
|
253
|
-
async fetchFromUrl(url) {
|
|
254
|
-
const response = await fetch(url);
|
|
255
|
-
if (!response.ok) {
|
|
256
|
-
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
|
257
|
-
}
|
|
258
|
-
return response.text();
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
/**
|
|
262
|
-
* Create an import manager instance
|
|
263
|
-
*/
|
|
264
|
-
export function createImportManager(brain) {
|
|
265
|
-
return new ImportManager(brain);
|
|
266
|
-
}
|
|
267
|
-
//# sourceMappingURL=importManager.js.map
|