@soulcraft/brainy 3.32.1 β†’ 3.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@
10
10
  */
11
11
  import { PatternLibrary } from './patternLibrary.js';
12
12
  import { NounType, VerbType } from '../types/graphTypes.js';
13
+ import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from './embeddedTypeEmbeddings.js';
13
14
  export class NaturalLanguageProcessor {
14
15
  constructor(brain) {
15
16
  this.initialized = false;
@@ -54,41 +55,26 @@ export class NaturalLanguageProcessor {
54
55
  }
55
56
  /**
56
57
  * Initialize embeddings for all NounTypes and VerbTypes
57
- * These are fixed types that never change - perfect for caching
58
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
59
+ * Zero runtime cost - embeddings are loaded instantly from embedded data
58
60
  */
59
61
  async initializeTypeEmbeddings() {
60
62
  if (this.typeEmbeddingsInitialized)
61
63
  return;
62
- // Embed all NounTypes (30+ types)
63
- for (const [key, value] of Object.entries(NounType)) {
64
- if (typeof value === 'string') {
65
- // Embed both the key (Person) and value (person)
66
- const keyEmbedding = await this.getEmbedding(key);
67
- const valueEmbedding = await this.getEmbedding(value);
68
- this.nounTypeEmbeddings.set(key, keyEmbedding);
69
- this.nounTypeEmbeddings.set(value, valueEmbedding);
70
- // Also embed common variations
71
- const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
72
- if (spaceSeparated !== value) {
73
- const variantEmbedding = await this.getEmbedding(spaceSeparated);
74
- this.nounTypeEmbeddings.set(spaceSeparated, variantEmbedding);
75
- }
76
- }
77
- }
78
- // Embed all VerbTypes (40+ types)
79
- for (const [key, value] of Object.entries(VerbType)) {
80
- if (typeof value === 'string') {
81
- const keyEmbedding = await this.getEmbedding(key);
82
- const valueEmbedding = await this.getEmbedding(value);
83
- this.verbTypeEmbeddings.set(key, keyEmbedding);
84
- this.verbTypeEmbeddings.set(value, valueEmbedding);
85
- // Common variations for verbs
86
- const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
87
- if (spaceSeparated !== value) {
88
- const variantEmbedding = await this.getEmbedding(spaceSeparated);
89
- this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
90
- }
91
- }
64
+ // Load pre-computed embeddings (instant, no computation)
65
+ const nounEmbeddings = getNounTypeEmbeddings();
66
+ const verbEmbeddings = getVerbTypeEmbeddings();
67
+ // Store noun type embeddings with all variations for lookup
68
+ for (const [type, embedding] of nounEmbeddings.entries()) {
69
+ this.nounTypeEmbeddings.set(type, embedding);
70
+ // Also store lowercase version for case-insensitive matching
71
+ this.nounTypeEmbeddings.set(type.toLowerCase(), embedding);
72
+ }
73
+ // Store verb type embeddings with all variations for lookup
74
+ for (const [type, embedding] of verbEmbeddings.entries()) {
75
+ this.verbTypeEmbeddings.set(type, embedding);
76
+ // Also store lowercase version for case-insensitive matching
77
+ this.verbTypeEmbeddings.set(type.toLowerCase(), embedding);
92
78
  }
93
79
  this.typeEmbeddingsInitialized = true;
94
80
  }
@@ -263,6 +263,12 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
263
263
  timestamp: number;
264
264
  }>;
265
265
  protected readonly COUNT_CACHE_TTL = 60000;
266
+ protected pendingCountPersist: boolean;
267
+ protected lastCountPersistTime: number;
268
+ protected scheduledCountPersistTimeout: NodeJS.Timeout | null;
269
+ protected pendingCountOperations: number;
270
+ protected countPersistBatchSize: number;
271
+ protected countPersistInterval: number;
266
272
  /**
267
273
  * Get total noun count - O(1) operation
268
274
  * @returns Promise that resolves to the total number of nouns
@@ -303,6 +309,37 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
303
309
  * @param type The verb type
304
310
  */
305
311
  protected decrementVerbCount(type: string): Promise<void>;
312
+ /**
313
+ * Detect if this storage adapter uses cloud storage (network I/O)
314
+ * Cloud storage benefits from batching; local storage does not.
315
+ *
316
+ * Override this method in subclasses for accurate detection.
317
+ * Default implementation checks storage type from getStorageStatus().
318
+ *
319
+ * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
320
+ */
321
+ protected isCloudStorage(): boolean;
322
+ /**
323
+ * Schedule a smart batched persist operation.
324
+ *
325
+ * Strategy:
326
+ * - Local Storage: Persist immediately (fast, no network latency)
327
+ * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
328
+ *
329
+ * This mirrors the statistics batching pattern for consistency.
330
+ */
331
+ protected scheduleCountPersist(): Promise<void>;
332
+ /**
333
+ * Flush counts immediately to storage.
334
+ *
335
+ * Used for:
336
+ * - Graceful shutdown (SIGTERM handler)
337
+ * - Forced persist (batch threshold reached)
338
+ * - Local storage immediate persist
339
+ *
340
+ * This is the public API that shutdown hooks can call.
341
+ */
342
+ flushCounts(): Promise<void>;
306
343
  /**
307
344
  * Initialize counts from storage - must be implemented by each adapter
308
345
  * @protected
@@ -48,6 +48,17 @@ export class BaseStorageAdapter {
48
48
  this.verbCounts = new Map(); // verb type -> count
49
49
  this.countCache = new Map();
50
50
  this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
51
+ // =============================================
52
+ // Smart Count Batching (v3.32.3+)
53
+ // =============================================
54
+ // Count batching state - mirrors statistics batching pattern
55
+ this.pendingCountPersist = false; // Counts changed since last persist?
56
+ this.lastCountPersistTime = 0; // Timestamp of last persist
57
+ this.scheduledCountPersistTimeout = null; // Scheduled persist timer
58
+ this.pendingCountOperations = 0; // Operations since last persist
59
+ // Batching configuration (overridable by subclasses for custom strategies)
60
+ this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
61
+ this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
51
62
  }
52
63
  /**
53
64
  * Save statistics data
@@ -659,10 +670,10 @@ export class BaseStorageAdapter {
659
670
  const mutex = getGlobalMutex();
660
671
  await mutex.runExclusive(`count-entity-${type}`, async () => {
661
672
  this.incrementEntityCount(type);
662
- // Persist counts periodically
663
- if (this.totalNounCount % 10 === 0) {
664
- await this.persistCounts();
665
- }
673
+ // Smart batching (v3.32.3+): Adapts to storage type
674
+ // - Cloud storage (GCS, S3): Batches 10 ops OR 5 seconds
675
+ // - Local storage (File, Memory): Persists immediately
676
+ await this.scheduleCountPersist();
666
677
  });
667
678
  }
668
679
  /**
@@ -693,9 +704,8 @@ export class BaseStorageAdapter {
693
704
  const mutex = getGlobalMutex();
694
705
  await mutex.runExclusive(`count-entity-${type}`, async () => {
695
706
  this.decrementEntityCount(type);
696
- if (this.totalNounCount % 10 === 0) {
697
- await this.persistCounts();
698
- }
707
+ // Smart batching (v3.32.3+): Adapts to storage type
708
+ await this.scheduleCountPersist();
699
709
  });
700
710
  }
701
711
  /**
@@ -712,10 +722,8 @@ export class BaseStorageAdapter {
712
722
  count: this.totalVerbCount,
713
723
  timestamp: Date.now()
714
724
  });
715
- // Persist counts immediately for consistency
716
- if (this.totalVerbCount % 10 === 0) {
717
- await this.persistCounts();
718
- }
725
+ // Smart batching (v3.32.3+): Adapts to storage type
726
+ await this.scheduleCountPersist();
719
727
  });
720
728
  }
721
729
  /**
@@ -740,11 +748,93 @@ export class BaseStorageAdapter {
740
748
  count: this.totalVerbCount,
741
749
  timestamp: Date.now()
742
750
  });
743
- // Persist counts immediately for consistency
744
- if (this.totalVerbCount % 10 === 0) {
745
- await this.persistCounts();
746
- }
751
+ // Smart batching (v3.32.3+): Adapts to storage type
752
+ await this.scheduleCountPersist();
747
753
  });
748
754
  }
755
+ // =============================================
756
+ // Smart Batching Methods (v3.32.3+)
757
+ // =============================================
758
+ /**
759
+ * Detect if this storage adapter uses cloud storage (network I/O)
760
+ * Cloud storage benefits from batching; local storage does not.
761
+ *
762
+ * Override this method in subclasses for accurate detection.
763
+ * Default implementation checks storage type from getStorageStatus().
764
+ *
765
+ * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
766
+ */
767
+ isCloudStorage() {
768
+ // Default: assume local storage (conservative, prefers reliability over performance)
769
+ // Subclasses should override this for accurate detection
770
+ return false;
771
+ }
772
+ /**
773
+ * Schedule a smart batched persist operation.
774
+ *
775
+ * Strategy:
776
+ * - Local Storage: Persist immediately (fast, no network latency)
777
+ * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
778
+ *
779
+ * This mirrors the statistics batching pattern for consistency.
780
+ */
781
+ async scheduleCountPersist() {
782
+ // Mark counts as pending persist
783
+ this.pendingCountPersist = true;
784
+ this.pendingCountOperations++;
785
+ // Local storage: persist immediately (fast enough, no benefit from batching)
786
+ if (!this.isCloudStorage()) {
787
+ await this.flushCounts();
788
+ return;
789
+ }
790
+ // Cloud storage: use smart batching
791
+ // Persist if we've hit the batch size threshold
792
+ if (this.pendingCountOperations >= this.countPersistBatchSize) {
793
+ await this.flushCounts();
794
+ return;
795
+ }
796
+ // Otherwise, schedule a time-based persist if not already scheduled
797
+ if (!this.scheduledCountPersistTimeout) {
798
+ this.scheduledCountPersistTimeout = setTimeout(() => {
799
+ this.flushCounts().catch(error => {
800
+ console.error('Failed to flush counts on timer:', error);
801
+ });
802
+ }, this.countPersistInterval);
803
+ }
804
+ }
805
+ /**
806
+ * Flush counts immediately to storage.
807
+ *
808
+ * Used for:
809
+ * - Graceful shutdown (SIGTERM handler)
810
+ * - Forced persist (batch threshold reached)
811
+ * - Local storage immediate persist
812
+ *
813
+ * This is the public API that shutdown hooks can call.
814
+ */
815
+ async flushCounts() {
816
+ // Clear any scheduled persist
817
+ if (this.scheduledCountPersistTimeout) {
818
+ clearTimeout(this.scheduledCountPersistTimeout);
819
+ this.scheduledCountPersistTimeout = null;
820
+ }
821
+ // Nothing to flush?
822
+ if (!this.pendingCountPersist) {
823
+ return;
824
+ }
825
+ try {
826
+ // Persist to storage (implemented by subclass)
827
+ await this.persistCounts();
828
+ // Update state
829
+ this.lastCountPersistTime = Date.now();
830
+ this.pendingCountPersist = false;
831
+ this.pendingCountOperations = 0;
832
+ }
833
+ catch (error) {
834
+ console.error('❌ CRITICAL: Failed to flush counts to storage:', error);
835
+ // Keep pending flag set so we retry on next operation
836
+ throw error;
837
+ }
838
+ }
749
839
  }
750
840
  //# sourceMappingURL=baseStorageAdapter.js.map
@@ -102,6 +102,15 @@ export declare class GcsStorage extends BaseStorage {
102
102
  * Override base class method to detect GCS-specific throttling errors
103
103
  */
104
104
  protected isThrottlingError(error: any): boolean;
105
+ /**
106
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
107
+ *
108
+ * GCS is cloud storage with network latency (~50ms per write).
109
+ * Smart batching reduces writes from 1000 ops β†’ 100 batches.
110
+ *
111
+ * @returns true (GCS is cloud storage)
112
+ */
113
+ protected isCloudStorage(): boolean;
105
114
  /**
106
115
  * Apply backpressure before starting an operation
107
116
  * @returns Request ID for tracking
@@ -195,6 +195,17 @@ export class GcsStorage extends BaseStorage {
195
195
  message.includes('rate limit') ||
196
196
  message.includes('too many requests'));
197
197
  }
198
+ /**
199
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
200
+ *
201
+ * GCS is cloud storage with network latency (~50ms per write).
202
+ * Smart batching reduces writes from 1000 ops β†’ 100 batches.
203
+ *
204
+ * @returns true (GCS is cloud storage)
205
+ */
206
+ isCloudStorage() {
207
+ return true; // GCS benefits from batching
208
+ }
198
209
  /**
199
210
  * Apply backpressure before starting an operation
200
211
  * @returns Request ID for tracking
@@ -1095,8 +1106,8 @@ export class GcsStorage extends BaseStorage {
1095
1106
  * Initialize counts from storage
1096
1107
  */
1097
1108
  async initializeCounts() {
1109
+ const key = `${this.systemPrefix}counts.json`;
1098
1110
  try {
1099
- const key = `${this.systemPrefix}counts.json`;
1100
1111
  const file = this.bucket.file(key);
1101
1112
  const [contents] = await file.download();
1102
1113
  const counts = JSON.parse(contents.toString());
@@ -1104,16 +1115,21 @@ export class GcsStorage extends BaseStorage {
1104
1115
  this.totalVerbCount = counts.totalVerbCount || 0;
1105
1116
  this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
1106
1117
  this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
1107
- prodLog.info(`πŸ“Š Loaded counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1118
+ prodLog.info(`πŸ“Š Loaded counts from storage: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1108
1119
  }
1109
1120
  catch (error) {
1110
1121
  if (error.code === 404) {
1111
- // No counts file yet - initialize from scan
1112
- prodLog.info('πŸ“Š No counts file found - initializing from storage scan...');
1122
+ // No counts file yet - initialize from scan (first-time setup or counts not persisted)
1123
+ prodLog.info('πŸ“Š No counts file found - this is normal for first init or if <10 entities were added');
1113
1124
  await this.initializeCountsFromScan();
1114
1125
  }
1115
1126
  else {
1116
- this.logger.error('Error loading counts:', error);
1127
+ // CRITICAL FIX: Don't silently fail on network/permission errors
1128
+ this.logger.error('❌ CRITICAL: Failed to load counts from GCS:', error);
1129
+ prodLog.error(`❌ Error loading ${key}: ${error.message}`);
1130
+ // Try to recover by scanning the bucket
1131
+ prodLog.warn('⚠️ Attempting recovery by scanning GCS bucket...');
1132
+ await this.initializeCountsFromScan();
1117
1133
  }
1118
1134
  }
1119
1135
  }
@@ -1122,18 +1138,38 @@ export class GcsStorage extends BaseStorage {
1122
1138
  */
1123
1139
  async initializeCountsFromScan() {
1124
1140
  try {
1141
+ prodLog.info('πŸ“Š Scanning GCS bucket to initialize counts...');
1142
+ prodLog.info(`πŸ” Noun prefix: ${this.nounPrefix}`);
1143
+ prodLog.info(`πŸ” Verb prefix: ${this.verbPrefix}`);
1125
1144
  // Count nouns
1126
1145
  const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
1127
- this.totalNounCount = nounFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1146
+ prodLog.info(`πŸ” Found ${nounFiles?.length || 0} total files under noun prefix`);
1147
+ const jsonNounFiles = nounFiles?.filter((f) => f.name?.endsWith('.json')) || [];
1148
+ this.totalNounCount = jsonNounFiles.length;
1149
+ if (jsonNounFiles.length > 0 && jsonNounFiles.length <= 5) {
1150
+ prodLog.info(`πŸ“„ Sample noun files: ${jsonNounFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
1151
+ }
1128
1152
  // Count verbs
1129
1153
  const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
1130
- this.totalVerbCount = verbFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
1154
+ prodLog.info(`πŸ” Found ${verbFiles?.length || 0} total files under verb prefix`);
1155
+ const jsonVerbFiles = verbFiles?.filter((f) => f.name?.endsWith('.json')) || [];
1156
+ this.totalVerbCount = jsonVerbFiles.length;
1157
+ if (jsonVerbFiles.length > 0 && jsonVerbFiles.length <= 5) {
1158
+ prodLog.info(`πŸ“„ Sample verb files: ${jsonVerbFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
1159
+ }
1131
1160
  // Save initial counts
1132
- await this.persistCounts();
1133
- prodLog.info(`βœ… Initialized counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1161
+ if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
1162
+ await this.persistCounts();
1163
+ prodLog.info(`βœ… Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1164
+ }
1165
+ else {
1166
+ prodLog.warn(`⚠️ No entities found during bucket scan. Check that entities exist and prefixes are correct.`);
1167
+ }
1134
1168
  }
1135
1169
  catch (error) {
1136
- this.logger.error('Error initializing counts from scan:', error);
1170
+ // CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
1171
+ this.logger.error('❌ CRITICAL: Failed to initialize counts from GCS bucket scan:', error);
1172
+ throw new Error(`Failed to initialize GCS storage counts: ${error}. This prevents container restarts from working correctly.`);
1137
1173
  }
1138
1174
  }
1139
1175
  /**
@@ -563,4 +563,13 @@ export declare class S3CompatibleStorage extends BaseStorage {
563
563
  * Persist counts to S3 storage
564
564
  */
565
565
  protected persistCounts(): Promise<void>;
566
+ /**
567
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
568
+ *
569
+ * S3 is cloud storage with network latency (~50ms per write).
570
+ * Smart batching reduces writes from 1000 ops β†’ 100 batches.
571
+ *
572
+ * @returns true (S3 is cloud storage)
573
+ */
574
+ protected isCloudStorage(): boolean;
566
575
  }
@@ -2754,5 +2754,16 @@ export class S3CompatibleStorage extends BaseStorage {
2754
2754
  console.error('Error persisting counts to S3:', error);
2755
2755
  }
2756
2756
  }
2757
+ /**
2758
+ * Override base class to enable smart batching for cloud storage (v3.32.3+)
2759
+ *
2760
+ * S3 is cloud storage with network latency (~50ms per write).
2761
+ * Smart batching reduces writes from 1000 ops β†’ 100 batches.
2762
+ *
2763
+ * @returns true (S3 is cloud storage)
2764
+ */
2765
+ isCloudStorage() {
2766
+ return true; // S3 benefits from batching
2767
+ }
2757
2768
  }
2758
2769
  //# sourceMappingURL=s3CompatibleStorage.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.32.1",
3
+ "version": "3.34.0",
4
4
  "description": "Universal Knowledge Protocolβ„’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ— 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -55,7 +55,10 @@
55
55
  "node": "22.x"
56
56
  },
57
57
  "scripts": {
58
- "build": "npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
58
+ "build": "npm run build:types:if-needed && npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
59
+ "build:types": "tsx scripts/buildTypeEmbeddings.ts",
60
+ "build:types:if-needed": "node scripts/check-type-embeddings.cjs || npm run build:types",
61
+ "build:types:force": "npm run build:types",
59
62
  "build:patterns": "tsx scripts/buildEmbeddedPatterns.ts",
60
63
  "build:patterns:if-needed": "node scripts/check-patterns.cjs || npm run build:patterns",
61
64
  "build:patterns:force": "npm run build:patterns",