@soulcraft/brainy 3.49.0 → 3.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +38 -0
  2. package/dist/coreTypes.d.ts +17 -1
  3. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  4. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  5. package/dist/storage/adapters/baseStorageAdapter.d.ts +13 -13
  6. package/dist/storage/adapters/fileSystemStorage.js +25 -6
  7. package/dist/storage/adapters/gcsStorage.js +17 -5
  8. package/dist/storage/adapters/memoryStorage.js +17 -9
  9. package/dist/storage/adapters/opfsStorage.js +25 -6
  10. package/dist/storage/adapters/r2Storage.js +17 -2
  11. package/dist/storage/adapters/s3CompatibleStorage.js +14 -2
  12. package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +11 -1
  13. package/dist/storage/adapters/typeAwareStorageAdapter.js +25 -16
  14. package/dist/storage/baseStorage.d.ts +7 -0
  15. package/dist/storage/baseStorage.js +47 -28
  16. package/dist/utils/fieldTypeInference.d.ts +181 -0
  17. package/dist/utils/fieldTypeInference.js +420 -0
  18. package/dist/utils/metadataIndex.d.ts +11 -1
  19. package/dist/utils/metadataIndex.js +67 -18
  20. package/dist/utils/metadataIndexChunking.d.ts +7 -0
  21. package/dist/utils/metadataIndexChunking.js +14 -0
  22. package/package.json +1 -1
  23. package/dist/augmentations/KnowledgeAugmentation.d.ts +0 -40
  24. package/dist/augmentations/KnowledgeAugmentation.js +0 -251
  25. package/dist/query/typeInference.d.ts +0 -158
  26. package/dist/query/typeInference.js +0 -760
  27. package/dist/types/brainyDataInterface.d.ts +0 -52
  28. package/dist/types/brainyDataInterface.js +0 -10
  29. package/dist/vfs/ConceptSystem.d.ts +0 -203
  30. package/dist/vfs/ConceptSystem.js +0 -545
  31. package/dist/vfs/EntityManager.d.ts +0 -75
  32. package/dist/vfs/EntityManager.js +0 -216
  33. package/dist/vfs/EventRecorder.d.ts +0 -84
  34. package/dist/vfs/EventRecorder.js +0 -269
  35. package/dist/vfs/GitBridge.d.ts +0 -167
  36. package/dist/vfs/GitBridge.js +0 -537
  37. package/dist/vfs/KnowledgeLayer.d.ts +0 -35
  38. package/dist/vfs/KnowledgeLayer.js +0 -443
  39. package/dist/vfs/PersistentEntitySystem.d.ts +0 -165
  40. package/dist/vfs/PersistentEntitySystem.js +0 -503
  41. package/dist/vfs/SemanticVersioning.d.ts +0 -105
  42. package/dist/vfs/SemanticVersioning.js +0 -309
@@ -74,7 +74,9 @@ export class BaseStorage extends BaseStorageAdapter {
74
74
  id.startsWith('__index_') ||
75
75
  id.startsWith('__system_') ||
76
76
  id.startsWith('statistics_') ||
77
- id === 'statistics';
77
+ id === 'statistics' ||
78
+ id.startsWith('__chunk__') || // Metadata index chunks (roaring bitmap data)
79
+ id.startsWith('__sparse_index__'); // Metadata sparse indices (zone maps + bloom filters)
78
80
  if (isSystemKey) {
79
81
  return {
80
82
  original: id,
@@ -203,6 +205,10 @@ export class BaseStorage extends BaseStorageAdapter {
203
205
  }
204
206
  /**
205
207
  * Save a verb to storage
208
+ *
209
+ * ARCHITECTURAL FIX (v3.50.1): HNSWVerb now includes verb/sourceId/targetId
210
+ * These are core relational fields, not metadata. They're stored in the vector
211
+ * file for fast access and to align with actual usage patterns.
206
212
  */
207
213
  async saveVerb(verb) {
208
214
  await this.ensureInitialized();
@@ -210,27 +216,29 @@ export class BaseStorage extends BaseStorageAdapter {
210
216
  if (verb.verb) {
211
217
  validateVerbType(verb.verb);
212
218
  }
213
- // Extract the lightweight HNSWVerb data
219
+ // Extract HNSWVerb with CORE relational fields included
214
220
  const hnswVerb = {
215
221
  id: verb.id,
216
222
  vector: verb.vector,
217
- connections: verb.connections || new Map()
223
+ connections: verb.connections || new Map(),
224
+ // CORE RELATIONAL DATA (v3.50.1+)
225
+ verb: (verb.verb || verb.type || 'relatedTo'),
226
+ sourceId: verb.sourceId || verb.source || '',
227
+ targetId: verb.targetId || verb.target || '',
228
+ // User metadata (if any)
229
+ metadata: verb.metadata
218
230
  };
219
- // Extract and save the metadata separately
231
+ // Extract lightweight metadata for separate file (optional fields only)
220
232
  const metadata = {
221
- sourceId: verb.sourceId || verb.source,
222
- targetId: verb.targetId || verb.target,
223
- source: verb.source || verb.sourceId,
224
- target: verb.target || verb.targetId,
225
- type: verb.type || verb.verb,
226
- verb: verb.verb || verb.type,
227
233
  weight: verb.weight,
228
- metadata: verb.metadata,
229
234
  data: verb.data,
230
235
  createdAt: verb.createdAt,
231
236
  updatedAt: verb.updatedAt,
232
237
  createdBy: verb.createdBy,
233
- embedding: verb.embedding
238
+ // Legacy aliases for backward compatibility
239
+ source: verb.source || verb.sourceId,
240
+ target: verb.target || verb.targetId,
241
+ type: verb.type || verb.verb
234
242
  };
235
243
  // Save both the HNSWVerb and metadata atomically
236
244
  try {
@@ -271,13 +279,14 @@ export class BaseStorage extends BaseStorageAdapter {
271
279
  }
272
280
  /**
273
281
  * Convert HNSWVerb to GraphVerb by combining with metadata
282
+ *
283
+ * ARCHITECTURAL FIX (v3.50.1): Core fields (verb/sourceId/targetId) are now in HNSWVerb
284
+ * Only optional fields (weight, timestamps, etc.) come from metadata file
274
285
  */
275
286
  async convertHNSWVerbToGraphVerb(hnswVerb) {
276
287
  try {
288
+ // Metadata file is now optional - contains only weight, timestamps, etc.
277
289
  const metadata = await this.getVerbMetadata(hnswVerb.id);
278
- if (!metadata) {
279
- return null;
280
- }
281
290
  // Create default timestamp if not present
282
291
  const defaultTimestamp = {
283
292
  seconds: Math.floor(Date.now() / 1000),
@@ -291,18 +300,21 @@ export class BaseStorage extends BaseStorageAdapter {
291
300
  return {
292
301
  id: hnswVerb.id,
293
302
  vector: hnswVerb.vector,
294
- sourceId: metadata.sourceId,
295
- targetId: metadata.targetId,
296
- source: metadata.source,
297
- target: metadata.target,
298
- verb: metadata.verb,
299
- type: metadata.type,
300
- weight: metadata.weight || 1.0,
301
- metadata: metadata.metadata || {},
302
- createdAt: metadata.createdAt || defaultTimestamp,
303
- updatedAt: metadata.updatedAt || defaultTimestamp,
304
- createdBy: metadata.createdBy || defaultCreatedBy,
305
- data: metadata.data,
303
+ // CORE FIELDS from HNSWVerb (v3.50.1+)
304
+ verb: hnswVerb.verb,
305
+ sourceId: hnswVerb.sourceId,
306
+ targetId: hnswVerb.targetId,
307
+ // Aliases for backward compatibility
308
+ type: hnswVerb.verb,
309
+ source: hnswVerb.sourceId,
310
+ target: hnswVerb.targetId,
311
+ // Optional fields from metadata file
312
+ weight: metadata?.weight || 1.0,
313
+ metadata: hnswVerb.metadata || {},
314
+ createdAt: metadata?.createdAt || defaultTimestamp,
315
+ updatedAt: metadata?.updatedAt || defaultTimestamp,
316
+ createdBy: metadata?.createdBy || defaultCreatedBy,
317
+ data: metadata?.data,
306
318
  embedding: hnswVerb.vector
307
319
  };
308
320
  }
@@ -322,12 +334,19 @@ export class BaseStorage extends BaseStorageAdapter {
322
334
  pagination: { limit: Number.MAX_SAFE_INTEGER }
323
335
  });
324
336
  // Convert GraphVerbs back to HNSWVerbs for internal use
337
+ // ARCHITECTURAL FIX (v3.50.1): Include core relational fields
325
338
  const hnswVerbs = [];
326
339
  for (const graphVerb of result.items) {
327
340
  const hnswVerb = {
328
341
  id: graphVerb.id,
329
342
  vector: graphVerb.vector,
330
- connections: new Map()
343
+ connections: new Map(),
344
+ // CORE RELATIONAL DATA
345
+ verb: (graphVerb.verb || graphVerb.type || 'relatedTo'),
346
+ sourceId: graphVerb.sourceId || graphVerb.source || '',
347
+ targetId: graphVerb.targetId || graphVerb.target || '',
348
+ // User metadata
349
+ metadata: graphVerb.metadata
331
350
  };
332
351
  hnswVerbs.push(hnswVerb);
333
352
  }
@@ -0,0 +1,181 @@
1
+ /**
2
+ * Field Type Inference System
3
+ *
4
+ * Production-ready value-based type detection inspired by DuckDB, Arrow, and Snowflake.
5
+ *
6
+ * Replaces unreliable pattern matching with robust value analysis:
7
+ * - Samples actual data values (not field names)
8
+ * - Persistent caching for O(1) lookups at billion scale
9
+ * - Progressive refinement as more data arrives
10
+ * - Zero configuration required
11
+ *
12
+ * Performance:
13
+ * - Cache hit: 0.1-0.5ms (O(1))
14
+ * - Cache miss: 5-10ms (analyze 100 samples)
15
+ * - Accuracy: 95%+ (vs 70% with pattern matching)
16
+ * - Memory: ~500 bytes per field
17
+ *
18
+ * Architecture:
19
+ * 1. Check in-memory cache (hot path)
20
+ * 2. Check persistent storage (_system/)
21
+ * 3. Analyze values if cache miss
22
+ * 4. Store result for future queries
23
+ */
24
+ import { StorageAdapter } from '../coreTypes.js';
25
+ /**
26
+ * Field type enumeration
27
+ * Ordered from most to least specific (DuckDB-inspired)
28
+ */
29
+ export declare enum FieldType {
30
+ TIMESTAMP_MS = "timestamp_ms",// Unix timestamp in milliseconds
31
+ TIMESTAMP_S = "timestamp_s",// Unix timestamp in seconds
32
+ DATE_ISO8601 = "date_iso8601",// ISO 8601 date string (YYYY-MM-DD)
33
+ DATETIME_ISO8601 = "datetime_iso8601",// ISO 8601 datetime string
34
+ BOOLEAN = "boolean",
35
+ INTEGER = "integer",
36
+ FLOAT = "float",
37
+ UUID = "uuid",
38
+ STRING = "string",
39
+ ARRAY = "array",
40
+ OBJECT = "object"
41
+ }
42
+ /**
43
+ * Field type information with metadata
44
+ */
45
+ export interface FieldTypeInfo {
46
+ field: string;
47
+ inferredType: FieldType;
48
+ confidence: number;
49
+ sampleSize: number;
50
+ lastUpdated: number;
51
+ detectionMethod: 'value';
52
+ metadata?: {
53
+ format?: string;
54
+ precision?: string;
55
+ bucketSize?: number;
56
+ minValue?: number;
57
+ maxValue?: number;
58
+ };
59
+ }
60
+ /**
61
+ * Field Type Inference System
62
+ *
63
+ * Infers data types by analyzing actual values, not field names.
64
+ * Maintains persistent cache for billion-scale performance.
65
+ */
66
+ export declare class FieldTypeInference {
67
+ private storage;
68
+ private typeCache;
69
+ private readonly SAMPLE_SIZE;
70
+ private readonly CACHE_STORAGE_PREFIX;
71
+ private readonly MIN_TIMESTAMP_S;
72
+ private readonly MAX_TIMESTAMP_S;
73
+ private readonly MIN_TIMESTAMP_MS;
74
+ private readonly MAX_TIMESTAMP_MS;
75
+ private readonly CACHE_AGE_THRESHOLD;
76
+ private readonly MIN_SAMPLE_SIZE_FOR_CONFIDENCE;
77
+ constructor(storage: StorageAdapter);
78
+ /**
79
+ * THE ONE FUNCTION: Infer field type from values
80
+ *
81
+ * Three-phase approach for billion-scale performance:
82
+ * 1. Check in-memory cache (O(1), <1ms)
83
+ * 2. Check persistent storage (O(1), ~1-2ms)
84
+ * 3. Analyze values (O(n), ~5-10ms for 100 samples)
85
+ *
86
+ * @param field Field name
87
+ * @param values Sample values to analyze (provide 1-100+ values)
88
+ * @returns Field type information with metadata
89
+ */
90
+ inferFieldType(field: string, values: any[]): Promise<FieldTypeInfo>;
91
+ /**
92
+ * Analyze values to determine field type
93
+ *
94
+ * Uses DuckDB-inspired type detection order:
95
+ * BOOLEAN → INTEGER → FLOAT → DATE → TIMESTAMP → UUID → STRING
96
+ *
97
+ * No fallbacks - pure value-based detection
98
+ */
99
+ private analyzeValues;
100
+ /**
101
+ * Check if values look like booleans
102
+ */
103
+ private looksLikeBoolean;
104
+ /**
105
+ * Check if values look like integers
106
+ */
107
+ private looksLikeInteger;
108
+ /**
109
+ * Check if values look like floats
110
+ */
111
+ private looksLikeFloat;
112
+ /**
113
+ * Detect Unix timestamp (milliseconds or seconds)
114
+ *
115
+ * Unix timestamp range: 2000-01-01 to 2100-01-01
116
+ * - Seconds: 946,684,800 to 4,102,444,800
117
+ * - Milliseconds: 946,684,800,000 to 4,102,444,800,000
118
+ */
119
+ private detectUnixTimestamp;
120
+ /**
121
+ * Detect ISO 8601 dates and datetimes
122
+ *
123
+ * Formats supported:
124
+ * - Date: YYYY-MM-DD
125
+ * - Datetime: YYYY-MM-DDTHH:MM:SS[.mmm][Z|±HH:MM]
126
+ */
127
+ private detectISO8601;
128
+ /**
129
+ * Check if values look like UUIDs
130
+ */
131
+ private looksLikeUUID;
132
+ /**
133
+ * Load type info from persistent storage
134
+ */
135
+ private loadFromStorage;
136
+ /**
137
+ * Save type info to both in-memory and persistent cache
138
+ */
139
+ private saveToCache;
140
+ /**
141
+ * Check if cached type info is still fresh
142
+ *
143
+ * Cache is considered fresh if:
144
+ * - High confidence (>= 0.9)
145
+ * - Updated within last 24 hours
146
+ * - Analyzed at least 50 samples
147
+ */
148
+ private isCacheFresh;
149
+ /**
150
+ * Progressive refinement: Update type inference as more data arrives
151
+ *
152
+ * This is called when we have more samples and want to improve confidence.
153
+ * Only updates cache if confidence improves.
154
+ */
155
+ refineTypeInference(field: string, newValues: any[]): Promise<void>;
156
+ /**
157
+ * Check if a field type is temporal
158
+ */
159
+ isTemporal(type: FieldType): boolean;
160
+ /**
161
+ * Get bucket size for a temporal field type
162
+ */
163
+ getBucketSize(typeInfo: FieldTypeInfo): number;
164
+ /**
165
+ * Clear cache for a field (useful for testing)
166
+ */
167
+ clearCache(field?: string): Promise<void>;
168
+ /**
169
+ * Get cache statistics for monitoring
170
+ */
171
+ getCacheStats(): {
172
+ size: number;
173
+ fields: string[];
174
+ temporalFields: number;
175
+ nonTemporalFields: number;
176
+ };
177
+ /**
178
+ * Create a FieldTypeInfo object
179
+ */
180
+ private createTypeInfo;
181
+ }