@soulcraft/brainy 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@
11
11
 
12
12
  **🧠 Brainy 2.0 - The Universal Knowledge Protocol™**
13
13
 
14
- **World's first Triple Intelligence™ database**—unifying vector similarity, graph relationships, and document filtering in one magical API. Model ANY data from ANY domain using 24 standardized noun types × 40 verb types.
14
+ **World's first Triple Intelligence™ database**—unifying vector similarity, graph relationships, and document filtering in one magical API. Model ANY data from ANY domain using 31 standardized noun types × 40 verb types.
15
15
 
16
16
  **Why Brainy Leads**: We're the first to solve the impossible—combining three different database paradigms (vector, graph, document) into one unified query interface. This breakthrough enables us to be the Universal Knowledge Protocol where all tools, augmentations, and AI models speak the same language.
17
17
 
@@ -20,7 +20,7 @@
20
20
  ## 🎉 What's New in 2.0
21
21
 
22
22
  - **World's First Triple Intelligence™**: Unified vector + graph + document in ONE query
23
- - **Universal Knowledge Protocol**: 24 nouns × 40 verbs standardize all knowledge
23
+ - **Universal Knowledge Protocol**: 31 nouns × 40 verbs standardize all knowledge
24
24
  - **Infinite Expressiveness**: Model ANY data with unlimited metadata
25
25
  - **API Consolidation**: 15+ methods → 2 clean APIs (`search()` and `find()`)
26
26
  - **Natural Language**: Ask questions in plain English
@@ -59,6 +59,7 @@ export declare class NeuralImportAugmentation extends BaseAugmentation {
59
59
  readonly priority = 80;
60
60
  private config;
61
61
  private analysisCache;
62
+ private typeMatcher;
62
63
  constructor(config?: Partial<NeuralImportConfig>);
63
64
  protected onInitialize(): Promise<void>;
64
65
  protected onShutdown(): Promise<void>;
@@ -79,15 +80,23 @@ export declare class NeuralImportAugmentation extends BaseAugmentation {
79
80
  */
80
81
  private parseRawData;
81
82
  /**
82
- * Parse CSV data
83
+ * Parse CSV data - handles quoted values, escaped quotes, and edge cases
83
84
  */
84
85
  private parseCSV;
86
+ /**
87
+ * Parse YAML data
88
+ */
89
+ private parseYAML;
90
+ /**
91
+ * Parse a YAML value (handle strings, numbers, booleans, null)
92
+ */
93
+ private parseYAMLValue;
85
94
  /**
86
95
  * Perform neural analysis on parsed data
87
96
  */
88
97
  private performNeuralAnalysis;
89
98
  /**
90
- * Infer noun type from object structure
99
+ * Infer noun type from object structure using intelligent type matching
91
100
  */
92
101
  private inferNounType;
93
102
  /**
@@ -95,7 +104,7 @@ export declare class NeuralImportAugmentation extends BaseAugmentation {
95
104
  */
96
105
  private detectRelationships;
97
106
  /**
98
- * Infer verb type from field name
107
+ * Infer verb type from field name using intelligent type matching
99
108
  */
100
109
  private inferVerbType;
101
110
  /**
@@ -8,6 +8,8 @@
8
8
  */
9
9
  import { BaseAugmentation } from './brainyAugmentation.js';
10
10
  import * as path from '../universal/path.js';
11
+ import { getTypeMatcher } from './typeMatching/intelligentTypeMatcher.js';
12
+ import { prodLog } from '../utils/logger.js';
11
13
  /**
12
14
  * Neural Import Augmentation - Unified Implementation
13
15
  * Processes data with AI before storage operations
@@ -20,6 +22,7 @@ export class NeuralImportAugmentation extends BaseAugmentation {
20
22
  this.operations = ['add', 'addNoun', 'addVerb', 'all']; // Use 'all' to catch batch operations
21
23
  this.priority = 80; // High priority for data processing
22
24
  this.analysisCache = new Map();
25
+ this.typeMatcher = null;
23
26
  this.config = {
24
27
  confidenceThreshold: 0.7,
25
28
  enableWeights: true,
@@ -29,7 +32,13 @@ export class NeuralImportAugmentation extends BaseAugmentation {
29
32
  };
30
33
  }
31
34
  async onInitialize() {
32
- this.log('🧠 Neural Import augmentation initialized');
35
+ try {
36
+ this.typeMatcher = await getTypeMatcher();
37
+ this.log('🧠 Neural Import augmentation initialized with intelligent type matching');
38
+ }
39
+ catch (error) {
40
+ this.log('⚠️ Failed to initialize type matcher, falling back to heuristics', 'warn');
41
+ }
33
42
  }
34
43
  async onShutdown() {
35
44
  this.analysisCache.clear();
@@ -128,13 +137,7 @@ export class NeuralImportAugmentation extends BaseAugmentation {
128
137
  return this.parseCSV(content);
129
138
  case 'yaml':
130
139
  case 'yml':
131
- // For now, basic YAML support - in full implementation would use yaml parser
132
- try {
133
- return JSON.parse(content); // Placeholder
134
- }
135
- catch {
136
- return [{ text: content }];
137
- }
140
+ return this.parseYAML(content);
138
141
  case 'txt':
139
142
  case 'text':
140
143
  // Split text into sentences/paragraphs for analysis
@@ -145,24 +148,174 @@ export class NeuralImportAugmentation extends BaseAugmentation {
145
148
  }
146
149
  }
147
150
  /**
148
- * Parse CSV data
151
+ * Parse CSV data - handles quoted values, escaped quotes, and edge cases
149
152
  */
150
153
  parseCSV(content) {
151
- const lines = content.split('\n').filter(line => line.trim());
154
+ const lines = content.split('\n');
152
155
  if (lines.length === 0)
153
156
  return [];
154
- const headers = lines[0].split(',').map(h => h.trim());
157
+ // Parse a CSV line handling quotes
158
+ const parseLine = (line) => {
159
+ const result = [];
160
+ let current = '';
161
+ let inQuotes = false;
162
+ let i = 0;
163
+ while (i < line.length) {
164
+ const char = line[i];
165
+ const nextChar = line[i + 1];
166
+ if (char === '"') {
167
+ if (inQuotes && nextChar === '"') {
168
+ // Escaped quote
169
+ current += '"';
170
+ i += 2;
171
+ }
172
+ else {
173
+ // Toggle quote mode
174
+ inQuotes = !inQuotes;
175
+ i++;
176
+ }
177
+ }
178
+ else if (char === ',' && !inQuotes) {
179
+ // Field separator
180
+ result.push(current.trim());
181
+ current = '';
182
+ i++;
183
+ }
184
+ else {
185
+ current += char;
186
+ i++;
187
+ }
188
+ }
189
+ // Add last field
190
+ result.push(current.trim());
191
+ return result;
192
+ };
193
+ // Parse headers
194
+ const headers = parseLine(lines[0]);
155
195
  const data = [];
196
+ // Parse data rows
156
197
  for (let i = 1; i < lines.length; i++) {
157
- const values = lines[i].split(',').map(v => v.trim());
198
+ const line = lines[i].trim();
199
+ if (!line)
200
+ continue; // Skip empty lines
201
+ const values = parseLine(line);
158
202
  const row = {};
159
203
  headers.forEach((header, index) => {
160
- row[header] = values[index] || '';
204
+ const value = values[index] || '';
205
+ // Try to parse numbers
206
+ const num = Number(value);
207
+ row[header] = !isNaN(num) && value !== '' ? num : value;
161
208
  });
162
209
  data.push(row);
163
210
  }
164
211
  return data;
165
212
  }
213
+ /**
214
+ * Parse YAML data
215
+ */
216
+ parseYAML(content) {
217
+ try {
218
+ // Simple YAML parser for basic structures
219
+ // For full YAML support, we'd use js-yaml library
220
+ const lines = content.split('\n');
221
+ const result = [];
222
+ let currentObject = null;
223
+ let currentIndent = 0;
224
+ for (const line of lines) {
225
+ const trimmed = line.trim();
226
+ if (!trimmed || trimmed.startsWith('#'))
227
+ continue; // Skip empty lines and comments
228
+ // Calculate indentation
229
+ const indent = line.length - line.trimStart().length;
230
+ // Check for array item
231
+ if (trimmed.startsWith('- ')) {
232
+ const value = trimmed.substring(2).trim();
233
+ if (indent === 0) {
234
+ // Top-level array item
235
+ if (value.includes(':')) {
236
+ // Object in array
237
+ currentObject = {};
238
+ result.push(currentObject);
239
+ const [key, val] = value.split(':').map(s => s.trim());
240
+ currentObject[key] = this.parseYAMLValue(val);
241
+ }
242
+ else {
243
+ result.push(this.parseYAMLValue(value));
244
+ }
245
+ }
246
+ else if (currentObject) {
247
+ // Nested array
248
+ const lastKey = Object.keys(currentObject).pop();
249
+ if (lastKey) {
250
+ if (!Array.isArray(currentObject[lastKey])) {
251
+ currentObject[lastKey] = [];
252
+ }
253
+ currentObject[lastKey].push(this.parseYAMLValue(value));
254
+ }
255
+ }
256
+ }
257
+ else if (trimmed.includes(':')) {
258
+ // Key-value pair
259
+ const colonIndex = trimmed.indexOf(':');
260
+ const key = trimmed.substring(0, colonIndex).trim();
261
+ const value = trimmed.substring(colonIndex + 1).trim();
262
+ if (indent === 0) {
263
+ // Top-level object
264
+ if (!currentObject) {
265
+ currentObject = {};
266
+ result.push(currentObject);
267
+ }
268
+ currentObject[key] = this.parseYAMLValue(value);
269
+ currentIndent = 0;
270
+ }
271
+ else if (currentObject) {
272
+ // Nested object
273
+ if (indent > currentIndent && !value) {
274
+ // Start of nested object
275
+ const lastKey = Object.keys(currentObject).pop();
276
+ if (lastKey) {
277
+ currentObject[lastKey] = { [key]: '' };
278
+ }
279
+ }
280
+ else {
281
+ currentObject[key] = this.parseYAMLValue(value);
282
+ }
283
+ currentIndent = indent;
284
+ }
285
+ }
286
+ }
287
+ // If we built a single object and not an array, wrap it
288
+ if (result.length === 0 && currentObject) {
289
+ result.push(currentObject);
290
+ }
291
+ return result.length > 0 ? result : [{ text: content }];
292
+ }
293
+ catch (error) {
294
+ prodLog.warn('YAML parsing failed, treating as text:', error);
295
+ return [{ text: content }];
296
+ }
297
+ }
298
+ /**
299
+ * Parse a YAML value (handle strings, numbers, booleans, null)
300
+ */
301
+ parseYAMLValue(value) {
302
+ if (!value || value === '~' || value === 'null')
303
+ return null;
304
+ if (value === 'true')
305
+ return true;
306
+ if (value === 'false')
307
+ return false;
308
+ // Remove quotes if present
309
+ if ((value.startsWith('"') && value.endsWith('"')) ||
310
+ (value.startsWith("'") && value.endsWith("'"))) {
311
+ return value.slice(1, -1);
312
+ }
313
+ // Try to parse as number
314
+ const num = Number(value);
315
+ if (!isNaN(num) && value !== '')
316
+ return num;
317
+ return value;
318
+ }
166
319
  /**
167
320
  * Perform neural analysis on parsed data
168
321
  */
@@ -177,14 +330,14 @@ export class NeuralImportAugmentation extends BaseAugmentation {
177
330
  const entityId = item.id || item.name || item.title || `entity_${Date.now()}_${Math.random()}`;
178
331
  detectedEntities.push({
179
332
  originalData: item,
180
- nounType: this.inferNounType(item),
333
+ nounType: await this.inferNounType(item),
181
334
  confidence: 0.85,
182
335
  suggestedId: String(entityId),
183
336
  reasoning: 'Detected from structured data',
184
337
  alternativeTypes: []
185
338
  });
186
339
  // Detect relationships from references
187
- this.detectRelationships(item, entityId, detectedRelationships);
340
+ await this.detectRelationships(item, entityId, detectedRelationships);
188
341
  }
189
342
  }
190
343
  // Generate insights
@@ -216,36 +369,31 @@ export class NeuralImportAugmentation extends BaseAugmentation {
216
369
  };
217
370
  }
218
371
  /**
219
- * Infer noun type from object structure
372
+ * Infer noun type from object structure using intelligent type matching
220
373
  */
221
- inferNounType(obj) {
222
- // Simple heuristics for type detection
223
- if (obj.email || obj.username)
224
- return 'Person';
225
- if (obj.title && obj.content)
226
- return 'Document';
227
- if (obj.price || obj.product)
228
- return 'Product';
229
- if (obj.date || obj.timestamp)
230
- return 'Event';
231
- if (obj.url || obj.link)
232
- return 'Resource';
233
- if (obj.lat || obj.longitude)
234
- return 'Location';
235
- // Default fallback
236
- return 'Entity';
374
+ async inferNounType(obj) {
375
+ if (!this.typeMatcher) {
376
+ // Initialize type matcher if not available
377
+ this.typeMatcher = await getTypeMatcher();
378
+ }
379
+ const result = await this.typeMatcher.matchNounType(obj);
380
+ // Log if confidence is low for debugging
381
+ if (result.confidence < 0.5) {
382
+ this.log(`Low confidence (${result.confidence.toFixed(2)}) for noun type: ${result.type}`, 'warn');
383
+ }
384
+ return result.type;
237
385
  }
238
386
  /**
239
387
  * Detect relationships from object references
240
388
  */
241
- detectRelationships(obj, sourceId, relationships) {
389
+ async detectRelationships(obj, sourceId, relationships) {
242
390
  // Look for reference patterns
243
391
  for (const [key, value] of Object.entries(obj)) {
244
392
  if (key.endsWith('Id') || key.endsWith('_id') || key === 'parentId' || key === 'userId') {
245
393
  relationships.push({
246
394
  sourceId,
247
395
  targetId: String(value),
248
- verbType: this.inferVerbType(key),
396
+ verbType: await this.inferVerbType(key, obj, { id: value }),
249
397
  confidence: 0.75,
250
398
  weight: 1,
251
399
  reasoning: `Reference detected in field: ${key}`,
@@ -259,7 +407,7 @@ export class NeuralImportAugmentation extends BaseAugmentation {
259
407
  relationships.push({
260
408
  sourceId,
261
409
  targetId: String(targetId),
262
- verbType: this.inferVerbType(key),
410
+ verbType: await this.inferVerbType(key, obj, { id: targetId }),
263
411
  confidence: 0.7,
264
412
  weight: 1,
265
413
  reasoning: `Array reference in field: ${key}`,
@@ -271,27 +419,19 @@ export class NeuralImportAugmentation extends BaseAugmentation {
271
419
  }
272
420
  }
273
421
  /**
274
- * Infer verb type from field name
422
+ * Infer verb type from field name using intelligent type matching
275
423
  */
276
- inferVerbType(fieldName) {
277
- const normalized = fieldName.toLowerCase();
278
- if (normalized.includes('parent'))
279
- return 'childOf';
280
- if (normalized.includes('user'))
281
- return 'belongsTo';
282
- if (normalized.includes('author'))
283
- return 'authoredBy';
284
- if (normalized.includes('owner'))
285
- return 'ownedBy';
286
- if (normalized.includes('creator'))
287
- return 'createdBy';
288
- if (normalized.includes('member'))
289
- return 'memberOf';
290
- if (normalized.includes('tag'))
291
- return 'taggedWith';
292
- if (normalized.includes('category'))
293
- return 'categorizedAs';
294
- return 'relatedTo';
424
+ async inferVerbType(fieldName, sourceObj, targetObj) {
425
+ if (!this.typeMatcher) {
426
+ // Initialize type matcher if not available
427
+ this.typeMatcher = await getTypeMatcher();
428
+ }
429
+ const result = await this.typeMatcher.matchVerbType(sourceObj, targetObj, fieldName);
430
+ // Log if confidence is low for debugging
431
+ if (result.confidence < 0.5) {
432
+ this.log(`Low confidence (${result.confidence.toFixed(2)}) for verb type: ${result.type}`, 'warn');
433
+ }
434
+ return result.type;
295
435
  }
296
436
  /**
297
437
  * Group entities by type
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Intelligent Type Matcher - Uses embeddings for semantic type detection
3
+ *
4
+ * This module uses our existing TransformerEmbedding and similarity functions
5
+ * to intelligently match data to our 31 noun types and 40 verb types.
6
+ *
7
+ * Features:
8
+ * - Semantic similarity matching using embeddings
9
+ * - Context-aware type detection
10
+ * - Confidence scoring
11
+ * - Caching for performance
12
+ */
13
+ /**
14
+ * Result of type matching with confidence scores
15
+ */
16
+ export interface TypeMatchResult {
17
+ type: string;
18
+ confidence: number;
19
+ reasoning: string;
20
+ alternatives: Array<{
21
+ type: string;
22
+ confidence: number;
23
+ }>;
24
+ }
25
+ /**
26
+ * Intelligent Type Matcher using semantic embeddings
27
+ */
28
+ export declare class IntelligentTypeMatcher {
29
+ private embedder;
30
+ private nounEmbeddings;
31
+ private verbEmbeddings;
32
+ private initialized;
33
+ private cache;
34
+ constructor();
35
+ /**
36
+ * Initialize the type matcher by generating embeddings for all types
37
+ */
38
+ init(): Promise<void>;
39
+ /**
40
+ * Match an object to the most appropriate noun type
41
+ */
42
+ matchNounType(obj: any): Promise<TypeMatchResult>;
43
+ /**
44
+ * Match a relationship to the most appropriate verb type
45
+ */
46
+ matchVerbType(sourceObj: any, targetObj: any, relationshipHint?: string): Promise<TypeMatchResult>;
47
+ /**
48
+ * Create text representation of an object for embedding
49
+ */
50
+ private createTextRepresentation;
51
+ /**
52
+ * Create text representation of a relationship
53
+ */
54
+ private createRelationshipText;
55
+ /**
56
+ * Get a brief summary of an object
57
+ */
58
+ private getObjectSummary;
59
+ /**
60
+ * Apply heuristic rules for noun type detection
61
+ */
62
+ private applyNounHeuristics;
63
+ /**
64
+ * Apply heuristic rules for verb type detection
65
+ */
66
+ private applyVerbHeuristics;
67
+ /**
68
+ * Generate human-readable reasoning for the type selection
69
+ */
70
+ private generateReasoning;
71
+ /**
72
+ * Clear the cache
73
+ */
74
+ clearCache(): void;
75
+ /**
76
+ * Dispose of resources
77
+ */
78
+ dispose(): Promise<void>;
79
+ }
80
+ /**
81
+ * Get or create the global type matcher instance
82
+ */
83
+ export declare function getTypeMatcher(): Promise<IntelligentTypeMatcher>;