@soulcraft/brainy 3.32.2 → 3.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +169 -0
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
- package/dist/brainy.d.ts +16 -0
- package/dist/brainy.js +56 -0
- package/dist/importers/SmartExcelImporter.js +12 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
- package/dist/neural/embeddedTypeEmbeddings.js +96 -0
- package/dist/neural/entityExtractor.d.ts +2 -0
- package/dist/neural/entityExtractor.js +21 -42
- package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
- package/dist/neural/naturalLanguageProcessor.js +17 -31
- package/dist/storage/adapters/baseStorageAdapter.d.ts +37 -0
- package/dist/storage/adapters/baseStorageAdapter.js +105 -10
- package/dist/storage/adapters/gcsStorage.d.ts +9 -0
- package/dist/storage/adapters/gcsStorage.js +32 -4
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +9 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +11 -0
- package/package.json +5 -2
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { PatternLibrary } from './patternLibrary.js';
|
|
12
12
|
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
13
|
+
import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from './embeddedTypeEmbeddings.js';
|
|
13
14
|
export class NaturalLanguageProcessor {
|
|
14
15
|
constructor(brain) {
|
|
15
16
|
this.initialized = false;
|
|
@@ -54,41 +55,26 @@ export class NaturalLanguageProcessor {
|
|
|
54
55
|
}
|
|
55
56
|
/**
|
|
56
57
|
* Initialize embeddings for all NounTypes and VerbTypes
|
|
57
|
-
*
|
|
58
|
+
* PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
|
|
59
|
+
* Zero runtime cost - embeddings are loaded instantly from embedded data
|
|
58
60
|
*/
|
|
59
61
|
async initializeTypeEmbeddings() {
|
|
60
62
|
if (this.typeEmbeddingsInitialized)
|
|
61
63
|
return;
|
|
62
|
-
//
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
// Embed all VerbTypes (40+ types)
|
|
79
|
-
for (const [key, value] of Object.entries(VerbType)) {
|
|
80
|
-
if (typeof value === 'string') {
|
|
81
|
-
const keyEmbedding = await this.getEmbedding(key);
|
|
82
|
-
const valueEmbedding = await this.getEmbedding(value);
|
|
83
|
-
this.verbTypeEmbeddings.set(key, keyEmbedding);
|
|
84
|
-
this.verbTypeEmbeddings.set(value, valueEmbedding);
|
|
85
|
-
// Common variations for verbs
|
|
86
|
-
const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
|
|
87
|
-
if (spaceSeparated !== value) {
|
|
88
|
-
const variantEmbedding = await this.getEmbedding(spaceSeparated);
|
|
89
|
-
this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
64
|
+
// Load pre-computed embeddings (instant, no computation)
|
|
65
|
+
const nounEmbeddings = getNounTypeEmbeddings();
|
|
66
|
+
const verbEmbeddings = getVerbTypeEmbeddings();
|
|
67
|
+
// Store noun type embeddings with all variations for lookup
|
|
68
|
+
for (const [type, embedding] of nounEmbeddings.entries()) {
|
|
69
|
+
this.nounTypeEmbeddings.set(type, embedding);
|
|
70
|
+
// Also store lowercase version for case-insensitive matching
|
|
71
|
+
this.nounTypeEmbeddings.set(type.toLowerCase(), embedding);
|
|
72
|
+
}
|
|
73
|
+
// Store verb type embeddings with all variations for lookup
|
|
74
|
+
for (const [type, embedding] of verbEmbeddings.entries()) {
|
|
75
|
+
this.verbTypeEmbeddings.set(type, embedding);
|
|
76
|
+
// Also store lowercase version for case-insensitive matching
|
|
77
|
+
this.verbTypeEmbeddings.set(type.toLowerCase(), embedding);
|
|
92
78
|
}
|
|
93
79
|
this.typeEmbeddingsInitialized = true;
|
|
94
80
|
}
|
|
@@ -263,6 +263,12 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
263
263
|
timestamp: number;
|
|
264
264
|
}>;
|
|
265
265
|
protected readonly COUNT_CACHE_TTL = 60000;
|
|
266
|
+
protected pendingCountPersist: boolean;
|
|
267
|
+
protected lastCountPersistTime: number;
|
|
268
|
+
protected scheduledCountPersistTimeout: NodeJS.Timeout | null;
|
|
269
|
+
protected pendingCountOperations: number;
|
|
270
|
+
protected countPersistBatchSize: number;
|
|
271
|
+
protected countPersistInterval: number;
|
|
266
272
|
/**
|
|
267
273
|
* Get total noun count - O(1) operation
|
|
268
274
|
* @returns Promise that resolves to the total number of nouns
|
|
@@ -303,6 +309,37 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
303
309
|
* @param type The verb type
|
|
304
310
|
*/
|
|
305
311
|
protected decrementVerbCount(type: string): Promise<void>;
|
|
312
|
+
/**
|
|
313
|
+
* Detect if this storage adapter uses cloud storage (network I/O)
|
|
314
|
+
* Cloud storage benefits from batching; local storage does not.
|
|
315
|
+
*
|
|
316
|
+
* Override this method in subclasses for accurate detection.
|
|
317
|
+
* Default implementation checks storage type from getStorageStatus().
|
|
318
|
+
*
|
|
319
|
+
* @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
|
|
320
|
+
*/
|
|
321
|
+
protected isCloudStorage(): boolean;
|
|
322
|
+
/**
|
|
323
|
+
* Schedule a smart batched persist operation.
|
|
324
|
+
*
|
|
325
|
+
* Strategy:
|
|
326
|
+
* - Local Storage: Persist immediately (fast, no network latency)
|
|
327
|
+
* - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
|
|
328
|
+
*
|
|
329
|
+
* This mirrors the statistics batching pattern for consistency.
|
|
330
|
+
*/
|
|
331
|
+
protected scheduleCountPersist(): Promise<void>;
|
|
332
|
+
/**
|
|
333
|
+
* Flush counts immediately to storage.
|
|
334
|
+
*
|
|
335
|
+
* Used for:
|
|
336
|
+
* - Graceful shutdown (SIGTERM handler)
|
|
337
|
+
* - Forced persist (batch threshold reached)
|
|
338
|
+
* - Local storage immediate persist
|
|
339
|
+
*
|
|
340
|
+
* This is the public API that shutdown hooks can call.
|
|
341
|
+
*/
|
|
342
|
+
flushCounts(): Promise<void>;
|
|
306
343
|
/**
|
|
307
344
|
* Initialize counts from storage - must be implemented by each adapter
|
|
308
345
|
* @protected
|
|
@@ -48,6 +48,17 @@ export class BaseStorageAdapter {
|
|
|
48
48
|
this.verbCounts = new Map(); // verb type -> count
|
|
49
49
|
this.countCache = new Map();
|
|
50
50
|
this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
|
|
51
|
+
// =============================================
|
|
52
|
+
// Smart Count Batching (v3.32.3+)
|
|
53
|
+
// =============================================
|
|
54
|
+
// Count batching state - mirrors statistics batching pattern
|
|
55
|
+
this.pendingCountPersist = false; // Counts changed since last persist?
|
|
56
|
+
this.lastCountPersistTime = 0; // Timestamp of last persist
|
|
57
|
+
this.scheduledCountPersistTimeout = null; // Scheduled persist timer
|
|
58
|
+
this.pendingCountOperations = 0; // Operations since last persist
|
|
59
|
+
// Batching configuration (overridable by subclasses for custom strategies)
|
|
60
|
+
this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
|
|
61
|
+
this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
|
|
51
62
|
}
|
|
52
63
|
/**
|
|
53
64
|
* Save statistics data
|
|
@@ -659,10 +670,10 @@ export class BaseStorageAdapter {
|
|
|
659
670
|
const mutex = getGlobalMutex();
|
|
660
671
|
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
661
672
|
this.incrementEntityCount(type);
|
|
662
|
-
//
|
|
663
|
-
//
|
|
664
|
-
//
|
|
665
|
-
await this.
|
|
673
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
674
|
+
// - Cloud storage (GCS, S3): Batches 10 ops OR 5 seconds
|
|
675
|
+
// - Local storage (File, Memory): Persists immediately
|
|
676
|
+
await this.scheduleCountPersist();
|
|
666
677
|
});
|
|
667
678
|
}
|
|
668
679
|
/**
|
|
@@ -693,8 +704,8 @@ export class BaseStorageAdapter {
|
|
|
693
704
|
const mutex = getGlobalMutex();
|
|
694
705
|
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
695
706
|
this.decrementEntityCount(type);
|
|
696
|
-
//
|
|
697
|
-
await this.
|
|
707
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
708
|
+
await this.scheduleCountPersist();
|
|
698
709
|
});
|
|
699
710
|
}
|
|
700
711
|
/**
|
|
@@ -711,8 +722,8 @@ export class BaseStorageAdapter {
|
|
|
711
722
|
count: this.totalVerbCount,
|
|
712
723
|
timestamp: Date.now()
|
|
713
724
|
});
|
|
714
|
-
//
|
|
715
|
-
await this.
|
|
725
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
726
|
+
await this.scheduleCountPersist();
|
|
716
727
|
});
|
|
717
728
|
}
|
|
718
729
|
/**
|
|
@@ -737,9 +748,93 @@ export class BaseStorageAdapter {
|
|
|
737
748
|
count: this.totalVerbCount,
|
|
738
749
|
timestamp: Date.now()
|
|
739
750
|
});
|
|
740
|
-
//
|
|
741
|
-
await this.
|
|
751
|
+
// Smart batching (v3.32.3+): Adapts to storage type
|
|
752
|
+
await this.scheduleCountPersist();
|
|
742
753
|
});
|
|
743
754
|
}
|
|
755
|
+
// =============================================
|
|
756
|
+
// Smart Batching Methods (v3.32.3+)
|
|
757
|
+
// =============================================
|
|
758
|
+
/**
|
|
759
|
+
* Detect if this storage adapter uses cloud storage (network I/O)
|
|
760
|
+
* Cloud storage benefits from batching; local storage does not.
|
|
761
|
+
*
|
|
762
|
+
* Override this method in subclasses for accurate detection.
|
|
763
|
+
* Default implementation checks storage type from getStorageStatus().
|
|
764
|
+
*
|
|
765
|
+
* @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
|
|
766
|
+
*/
|
|
767
|
+
isCloudStorage() {
|
|
768
|
+
// Default: assume local storage (conservative, prefers reliability over performance)
|
|
769
|
+
// Subclasses should override this for accurate detection
|
|
770
|
+
return false;
|
|
771
|
+
}
|
|
772
|
+
/**
|
|
773
|
+
* Schedule a smart batched persist operation.
|
|
774
|
+
*
|
|
775
|
+
* Strategy:
|
|
776
|
+
* - Local Storage: Persist immediately (fast, no network latency)
|
|
777
|
+
* - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
|
|
778
|
+
*
|
|
779
|
+
* This mirrors the statistics batching pattern for consistency.
|
|
780
|
+
*/
|
|
781
|
+
async scheduleCountPersist() {
|
|
782
|
+
// Mark counts as pending persist
|
|
783
|
+
this.pendingCountPersist = true;
|
|
784
|
+
this.pendingCountOperations++;
|
|
785
|
+
// Local storage: persist immediately (fast enough, no benefit from batching)
|
|
786
|
+
if (!this.isCloudStorage()) {
|
|
787
|
+
await this.flushCounts();
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
// Cloud storage: use smart batching
|
|
791
|
+
// Persist if we've hit the batch size threshold
|
|
792
|
+
if (this.pendingCountOperations >= this.countPersistBatchSize) {
|
|
793
|
+
await this.flushCounts();
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
// Otherwise, schedule a time-based persist if not already scheduled
|
|
797
|
+
if (!this.scheduledCountPersistTimeout) {
|
|
798
|
+
this.scheduledCountPersistTimeout = setTimeout(() => {
|
|
799
|
+
this.flushCounts().catch(error => {
|
|
800
|
+
console.error('Failed to flush counts on timer:', error);
|
|
801
|
+
});
|
|
802
|
+
}, this.countPersistInterval);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Flush counts immediately to storage.
|
|
807
|
+
*
|
|
808
|
+
* Used for:
|
|
809
|
+
* - Graceful shutdown (SIGTERM handler)
|
|
810
|
+
* - Forced persist (batch threshold reached)
|
|
811
|
+
* - Local storage immediate persist
|
|
812
|
+
*
|
|
813
|
+
* This is the public API that shutdown hooks can call.
|
|
814
|
+
*/
|
|
815
|
+
async flushCounts() {
|
|
816
|
+
// Clear any scheduled persist
|
|
817
|
+
if (this.scheduledCountPersistTimeout) {
|
|
818
|
+
clearTimeout(this.scheduledCountPersistTimeout);
|
|
819
|
+
this.scheduledCountPersistTimeout = null;
|
|
820
|
+
}
|
|
821
|
+
// Nothing to flush?
|
|
822
|
+
if (!this.pendingCountPersist) {
|
|
823
|
+
return;
|
|
824
|
+
}
|
|
825
|
+
try {
|
|
826
|
+
// Persist to storage (implemented by subclass)
|
|
827
|
+
await this.persistCounts();
|
|
828
|
+
// Update state
|
|
829
|
+
this.lastCountPersistTime = Date.now();
|
|
830
|
+
this.pendingCountPersist = false;
|
|
831
|
+
this.pendingCountOperations = 0;
|
|
832
|
+
}
|
|
833
|
+
catch (error) {
|
|
834
|
+
console.error('❌ CRITICAL: Failed to flush counts to storage:', error);
|
|
835
|
+
// Keep pending flag set so we retry on next operation
|
|
836
|
+
throw error;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
744
839
|
}
|
|
745
840
|
//# sourceMappingURL=baseStorageAdapter.js.map
|
|
@@ -102,6 +102,15 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
102
102
|
* Override base class method to detect GCS-specific throttling errors
|
|
103
103
|
*/
|
|
104
104
|
protected isThrottlingError(error: any): boolean;
|
|
105
|
+
/**
|
|
106
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
107
|
+
*
|
|
108
|
+
* GCS is cloud storage with network latency (~50ms per write).
|
|
109
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
110
|
+
*
|
|
111
|
+
* @returns true (GCS is cloud storage)
|
|
112
|
+
*/
|
|
113
|
+
protected isCloudStorage(): boolean;
|
|
105
114
|
/**
|
|
106
115
|
* Apply backpressure before starting an operation
|
|
107
116
|
* @returns Request ID for tracking
|
|
@@ -195,6 +195,17 @@ export class GcsStorage extends BaseStorage {
|
|
|
195
195
|
message.includes('rate limit') ||
|
|
196
196
|
message.includes('too many requests'));
|
|
197
197
|
}
|
|
198
|
+
/**
|
|
199
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
200
|
+
*
|
|
201
|
+
* GCS is cloud storage with network latency (~50ms per write).
|
|
202
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
203
|
+
*
|
|
204
|
+
* @returns true (GCS is cloud storage)
|
|
205
|
+
*/
|
|
206
|
+
isCloudStorage() {
|
|
207
|
+
return true; // GCS benefits from batching
|
|
208
|
+
}
|
|
198
209
|
/**
|
|
199
210
|
* Apply backpressure before starting an operation
|
|
200
211
|
* @returns Request ID for tracking
|
|
@@ -1128,15 +1139,32 @@ export class GcsStorage extends BaseStorage {
|
|
|
1128
1139
|
async initializeCountsFromScan() {
|
|
1129
1140
|
try {
|
|
1130
1141
|
prodLog.info('📊 Scanning GCS bucket to initialize counts...');
|
|
1142
|
+
prodLog.info(`🔍 Noun prefix: ${this.nounPrefix}`);
|
|
1143
|
+
prodLog.info(`🔍 Verb prefix: ${this.verbPrefix}`);
|
|
1131
1144
|
// Count nouns
|
|
1132
1145
|
const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
|
|
1133
|
-
|
|
1146
|
+
prodLog.info(`🔍 Found ${nounFiles?.length || 0} total files under noun prefix`);
|
|
1147
|
+
const jsonNounFiles = nounFiles?.filter((f) => f.name?.endsWith('.json')) || [];
|
|
1148
|
+
this.totalNounCount = jsonNounFiles.length;
|
|
1149
|
+
if (jsonNounFiles.length > 0 && jsonNounFiles.length <= 5) {
|
|
1150
|
+
prodLog.info(`📄 Sample noun files: ${jsonNounFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
|
|
1151
|
+
}
|
|
1134
1152
|
// Count verbs
|
|
1135
1153
|
const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
|
|
1136
|
-
|
|
1154
|
+
prodLog.info(`🔍 Found ${verbFiles?.length || 0} total files under verb prefix`);
|
|
1155
|
+
const jsonVerbFiles = verbFiles?.filter((f) => f.name?.endsWith('.json')) || [];
|
|
1156
|
+
this.totalVerbCount = jsonVerbFiles.length;
|
|
1157
|
+
if (jsonVerbFiles.length > 0 && jsonVerbFiles.length <= 5) {
|
|
1158
|
+
prodLog.info(`📄 Sample verb files: ${jsonVerbFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
|
|
1159
|
+
}
|
|
1137
1160
|
// Save initial counts
|
|
1138
|
-
|
|
1139
|
-
|
|
1161
|
+
if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
|
|
1162
|
+
await this.persistCounts();
|
|
1163
|
+
prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1164
|
+
}
|
|
1165
|
+
else {
|
|
1166
|
+
prodLog.warn(`⚠️ No entities found during bucket scan. Check that entities exist and prefixes are correct.`);
|
|
1167
|
+
}
|
|
1140
1168
|
}
|
|
1141
1169
|
catch (error) {
|
|
1142
1170
|
// CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
|
|
@@ -563,4 +563,13 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
563
563
|
* Persist counts to S3 storage
|
|
564
564
|
*/
|
|
565
565
|
protected persistCounts(): Promise<void>;
|
|
566
|
+
/**
|
|
567
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
568
|
+
*
|
|
569
|
+
* S3 is cloud storage with network latency (~50ms per write).
|
|
570
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
571
|
+
*
|
|
572
|
+
* @returns true (S3 is cloud storage)
|
|
573
|
+
*/
|
|
574
|
+
protected isCloudStorage(): boolean;
|
|
566
575
|
}
|
|
@@ -2754,5 +2754,16 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
2754
2754
|
console.error('Error persisting counts to S3:', error);
|
|
2755
2755
|
}
|
|
2756
2756
|
}
|
|
2757
|
+
/**
|
|
2758
|
+
* Override base class to enable smart batching for cloud storage (v3.32.3+)
|
|
2759
|
+
*
|
|
2760
|
+
* S3 is cloud storage with network latency (~50ms per write).
|
|
2761
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
2762
|
+
*
|
|
2763
|
+
* @returns true (S3 is cloud storage)
|
|
2764
|
+
*/
|
|
2765
|
+
isCloudStorage() {
|
|
2766
|
+
return true; // S3 benefits from batching
|
|
2767
|
+
}
|
|
2757
2768
|
}
|
|
2758
2769
|
//# sourceMappingURL=s3CompatibleStorage.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.34.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -55,7 +55,10 @@
|
|
|
55
55
|
"node": "22.x"
|
|
56
56
|
},
|
|
57
57
|
"scripts": {
|
|
58
|
-
"build": "npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
|
|
58
|
+
"build": "npm run build:types:if-needed && npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
|
|
59
|
+
"build:types": "tsx scripts/buildTypeEmbeddings.ts",
|
|
60
|
+
"build:types:if-needed": "node scripts/check-type-embeddings.cjs || npm run build:types",
|
|
61
|
+
"build:types:force": "npm run build:types",
|
|
59
62
|
"build:patterns": "tsx scripts/buildEmbeddedPatterns.ts",
|
|
60
63
|
"build:patterns:if-needed": "node scripts/check-patterns.cjs || npm run build:patterns",
|
|
61
64
|
"build:patterns:force": "npm run build:patterns",
|