@soulcraft/brainy 4.1.3 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +100 -7
  2. package/dist/brainy.d.ts +74 -16
  3. package/dist/brainy.js +74 -16
  4. package/dist/import/FormatDetector.d.ts +6 -1
  5. package/dist/import/FormatDetector.js +40 -1
  6. package/dist/import/ImportCoordinator.d.ts +155 -5
  7. package/dist/import/ImportCoordinator.js +346 -6
  8. package/dist/import/InstancePool.d.ts +136 -0
  9. package/dist/import/InstancePool.js +231 -0
  10. package/dist/importers/SmartCSVImporter.d.ts +2 -1
  11. package/dist/importers/SmartCSVImporter.js +11 -22
  12. package/dist/importers/SmartDOCXImporter.d.ts +125 -0
  13. package/dist/importers/SmartDOCXImporter.js +227 -0
  14. package/dist/importers/SmartExcelImporter.d.ts +12 -1
  15. package/dist/importers/SmartExcelImporter.js +40 -25
  16. package/dist/importers/SmartJSONImporter.d.ts +1 -0
  17. package/dist/importers/SmartJSONImporter.js +25 -6
  18. package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
  19. package/dist/importers/SmartMarkdownImporter.js +11 -16
  20. package/dist/importers/SmartPDFImporter.d.ts +2 -1
  21. package/dist/importers/SmartPDFImporter.js +11 -22
  22. package/dist/importers/SmartYAMLImporter.d.ts +121 -0
  23. package/dist/importers/SmartYAMLImporter.js +275 -0
  24. package/dist/importers/VFSStructureGenerator.js +12 -0
  25. package/dist/neural/SmartExtractor.d.ts +279 -0
  26. package/dist/neural/SmartExtractor.js +592 -0
  27. package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
  28. package/dist/neural/SmartRelationshipExtractor.js +396 -0
  29. package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
  30. package/dist/neural/embeddedTypeEmbeddings.js +2 -2
  31. package/dist/neural/entityExtractor.d.ts +3 -0
  32. package/dist/neural/entityExtractor.js +34 -36
  33. package/dist/neural/presets.d.ts +189 -0
  34. package/dist/neural/presets.js +365 -0
  35. package/dist/neural/signals/ContextSignal.d.ts +166 -0
  36. package/dist/neural/signals/ContextSignal.js +646 -0
  37. package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
  38. package/dist/neural/signals/EmbeddingSignal.js +435 -0
  39. package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
  40. package/dist/neural/signals/ExactMatchSignal.js +542 -0
  41. package/dist/neural/signals/PatternSignal.d.ts +159 -0
  42. package/dist/neural/signals/PatternSignal.js +478 -0
  43. package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
  44. package/dist/neural/signals/VerbContextSignal.js +390 -0
  45. package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
  46. package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
  47. package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
  48. package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
  49. package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
  50. package/dist/neural/signals/VerbPatternSignal.js +457 -0
  51. package/dist/types/graphTypes.d.ts +2 -0
  52. package/package.json +4 -1
@@ -0,0 +1,390 @@
1
+ /**
2
+ * VerbContextSignal - Type-based relationship inference
3
+ *
4
+ * WEIGHT: 5% (lowest weight, backup signal)
5
+ *
6
+ * Uses:
7
+ * 1. Entity type pairs (Person+Organization → WorksWith)
8
+ * 2. Semantic compatibility (Document+Person → CreatedBy)
9
+ * 3. Domain heuristics (Location+Organization → LocatedAt)
10
+ *
11
+ * PRODUCTION-READY: No TODOs, no mocks, real implementation
12
+ */
13
+ import { VerbType, NounType } from '../../types/graphTypes.js';
14
+ /**
15
+ * VerbContextSignal - Type-based relationship classification
16
+ *
17
+ * Production features:
18
+ * - Pre-defined type pair mappings (zero runtime cost)
19
+ * - Semantic type compatibility
20
+ * - Bidirectional hint support (subject→object and object→subject)
21
+ * - LRU cache for hot paths
22
+ */
23
+ export class VerbContextSignal {
24
+ constructor(brain, options) {
25
+ // Type pair hints (subject type → object type → verb types)
26
+ this.typePairHints = [];
27
+ // LRU cache
28
+ this.cache = new Map();
29
+ this.cacheOrder = [];
30
+ // Statistics
31
+ this.stats = {
32
+ calls: 0,
33
+ cacheHits: 0,
34
+ matches: 0,
35
+ hintHits: new Map()
36
+ };
37
+ this.brain = brain;
38
+ this.options = {
39
+ minConfidence: options?.minConfidence ?? 0.60,
40
+ cacheSize: options?.cacheSize ?? 1000
41
+ };
42
+ // Initialize type pair hints
43
+ this.initializeTypePairHints();
44
+ }
45
+ /**
46
+ * Initialize all type pair hints
47
+ *
48
+ * Maps entity type combinations to likely relationship types
49
+ */
50
+ initializeTypePairHints() {
51
+ this.typePairHints = [
52
+ // ========== Person → Organization ==========
53
+ {
54
+ subjectType: NounType.Person,
55
+ objectType: NounType.Organization,
56
+ verbType: VerbType.WorksWith,
57
+ confidence: 0.75,
58
+ description: 'Person works at Organization'
59
+ },
60
+ {
61
+ subjectType: NounType.Person,
62
+ objectType: NounType.Organization,
63
+ verbType: VerbType.MemberOf,
64
+ confidence: 0.70,
65
+ description: 'Person is member of Organization'
66
+ },
67
+ {
68
+ subjectType: NounType.Person,
69
+ objectType: NounType.Organization,
70
+ verbType: VerbType.ReportsTo,
71
+ confidence: 0.65,
72
+ description: 'Person reports to Organization'
73
+ },
74
+ // ========== Person → Person ==========
75
+ {
76
+ subjectType: NounType.Person,
77
+ objectType: NounType.Person,
78
+ verbType: VerbType.WorksWith,
79
+ confidence: 0.70,
80
+ description: 'Person works with Person'
81
+ },
82
+ {
83
+ subjectType: NounType.Person,
84
+ objectType: NounType.Person,
85
+ verbType: VerbType.FriendOf,
86
+ confidence: 0.65,
87
+ description: 'Person is friend of Person'
88
+ },
89
+ {
90
+ subjectType: NounType.Person,
91
+ objectType: NounType.Person,
92
+ verbType: VerbType.Mentors,
93
+ confidence: 0.65,
94
+ description: 'Person mentors Person'
95
+ },
96
+ // ========== Person → Location ==========
97
+ {
98
+ subjectType: NounType.Person,
99
+ objectType: NounType.Location,
100
+ verbType: VerbType.LocatedAt,
101
+ confidence: 0.70,
102
+ description: 'Person located at Location'
103
+ },
104
+ // ========== Document → Person ==========
105
+ {
106
+ subjectType: NounType.Document,
107
+ objectType: NounType.Person,
108
+ verbType: VerbType.CreatedBy,
109
+ confidence: 0.80,
110
+ description: 'Document created by Person'
111
+ },
112
+ {
113
+ subjectType: NounType.Document,
114
+ objectType: NounType.Person,
115
+ verbType: VerbType.AttributedTo,
116
+ confidence: 0.75,
117
+ description: 'Document attributed to Person'
118
+ },
119
+ // ========== Document → Document ==========
120
+ {
121
+ subjectType: NounType.Document,
122
+ objectType: NounType.Document,
123
+ verbType: VerbType.References,
124
+ confidence: 0.75,
125
+ description: 'Document references Document'
126
+ },
127
+ {
128
+ subjectType: NounType.Document,
129
+ objectType: NounType.Document,
130
+ verbType: VerbType.PartOf,
131
+ confidence: 0.70,
132
+ description: 'Document is part of Document'
133
+ },
134
+ // ========== Document → Concept ==========
135
+ {
136
+ subjectType: NounType.Document,
137
+ objectType: NounType.Concept,
138
+ verbType: VerbType.Describes,
139
+ confidence: 0.75,
140
+ description: 'Document describes Concept'
141
+ },
142
+ {
143
+ subjectType: NounType.Document,
144
+ objectType: NounType.Concept,
145
+ verbType: VerbType.Defines,
146
+ confidence: 0.70,
147
+ description: 'Document defines Concept'
148
+ },
149
+ // ========== Organization → Location ==========
150
+ {
151
+ subjectType: NounType.Organization,
152
+ objectType: NounType.Location,
153
+ verbType: VerbType.LocatedAt,
154
+ confidence: 0.80,
155
+ description: 'Organization located at Location'
156
+ },
157
+ // ========== Organization → Organization ==========
158
+ {
159
+ subjectType: NounType.Organization,
160
+ objectType: NounType.Organization,
161
+ verbType: VerbType.PartOf,
162
+ confidence: 0.70,
163
+ description: 'Organization is part of Organization'
164
+ },
165
+ {
166
+ subjectType: NounType.Organization,
167
+ objectType: NounType.Organization,
168
+ verbType: VerbType.Competes,
169
+ confidence: 0.65,
170
+ description: 'Organization competes with Organization'
171
+ },
172
+ // ========== Product → Organization ==========
173
+ {
174
+ subjectType: NounType.Product,
175
+ objectType: NounType.Organization,
176
+ verbType: VerbType.CreatedBy,
177
+ confidence: 0.75,
178
+ description: 'Product created by Organization'
179
+ },
180
+ {
181
+ subjectType: NounType.Product,
182
+ objectType: NounType.Organization,
183
+ verbType: VerbType.Owns,
184
+ confidence: 0.70,
185
+ description: 'Product owned by Organization'
186
+ },
187
+ // ========== Product → Person ==========
188
+ {
189
+ subjectType: NounType.Product,
190
+ objectType: NounType.Person,
191
+ verbType: VerbType.CreatedBy,
192
+ confidence: 0.75,
193
+ description: 'Product created by Person'
194
+ },
195
+ // ========== Event → Person ==========
196
+ {
197
+ subjectType: NounType.Event,
198
+ objectType: NounType.Person,
199
+ verbType: VerbType.CreatedBy,
200
+ confidence: 0.70,
201
+ description: 'Event created by Person'
202
+ },
203
+ // ========== Event → Location ==========
204
+ {
205
+ subjectType: NounType.Event,
206
+ objectType: NounType.Location,
207
+ verbType: VerbType.LocatedAt,
208
+ confidence: 0.75,
209
+ description: 'Event located at Location'
210
+ },
211
+ // ========== Event → Event ==========
212
+ {
213
+ subjectType: NounType.Event,
214
+ objectType: NounType.Event,
215
+ verbType: VerbType.Precedes,
216
+ confidence: 0.70,
217
+ description: 'Event precedes Event'
218
+ },
219
+ // ========== Project → Organization ==========
220
+ {
221
+ subjectType: NounType.Project,
222
+ objectType: NounType.Organization,
223
+ verbType: VerbType.BelongsTo,
224
+ confidence: 0.75,
225
+ description: 'Project belongs to Organization'
226
+ },
227
+ // ========== Project → Person ==========
228
+ {
229
+ subjectType: NounType.Project,
230
+ objectType: NounType.Person,
231
+ verbType: VerbType.CreatedBy,
232
+ confidence: 0.70,
233
+ description: 'Project created by Person'
234
+ },
235
+ // ========== Thing → Thing (generic fallback) ==========
236
+ {
237
+ subjectType: NounType.Thing,
238
+ objectType: NounType.Thing,
239
+ verbType: VerbType.RelatedTo,
240
+ confidence: 0.60,
241
+ description: 'Thing related to Thing'
242
+ }
243
+ ];
244
+ // Initialize hint hit tracking
245
+ for (const hint of this.typePairHints) {
246
+ this.stats.hintHits.set(hint.description, 0);
247
+ }
248
+ }
249
+ /**
250
+ * Classify relationship type from entity type pair
251
+ *
252
+ * @param subjectType Type of subject entity
253
+ * @param objectType Type of object entity
254
+ * @returns VerbSignal with classified type or null
255
+ */
256
+ async classify(subjectType, objectType) {
257
+ this.stats.calls++;
258
+ if (!subjectType || !objectType) {
259
+ return null;
260
+ }
261
+ // Check cache
262
+ const cacheKey = this.getCacheKey(subjectType, objectType);
263
+ const cached = this.getFromCache(cacheKey);
264
+ if (cached !== undefined) {
265
+ this.stats.cacheHits++;
266
+ return cached;
267
+ }
268
+ try {
269
+ // Find matching hints for this type pair
270
+ const matchingHints = this.typePairHints.filter(hint => (hint.subjectType === subjectType && hint.objectType === objectType) ||
271
+ (hint.subjectType === objectType && hint.objectType === subjectType));
272
+ if (matchingHints.length === 0) {
273
+ // Try fallback to Thing → Thing
274
+ const fallbackHints = this.typePairHints.filter(hint => hint.subjectType === NounType.Thing && hint.objectType === NounType.Thing);
275
+ if (fallbackHints.length > 0) {
276
+ const hint = fallbackHints[0];
277
+ const result = {
278
+ type: hint.verbType,
279
+ confidence: hint.confidence,
280
+ evidence: `Type pair hint (fallback): ${hint.description}`,
281
+ metadata: {
282
+ subjectType,
283
+ objectType
284
+ }
285
+ };
286
+ this.addToCache(cacheKey, result);
287
+ return result;
288
+ }
289
+ const result = null;
290
+ this.addToCache(cacheKey, result);
291
+ return result;
292
+ }
293
+ // Use highest confidence hint
294
+ const bestHint = matchingHints.sort((a, b) => b.confidence - a.confidence)[0];
295
+ // Track hint hit
296
+ const currentHits = this.stats.hintHits.get(bestHint.description) || 0;
297
+ this.stats.hintHits.set(bestHint.description, currentHits + 1);
298
+ // Check confidence threshold
299
+ if (bestHint.confidence < this.options.minConfidence) {
300
+ const result = null;
301
+ this.addToCache(cacheKey, result);
302
+ return result;
303
+ }
304
+ this.stats.matches++;
305
+ const result = {
306
+ type: bestHint.verbType,
307
+ confidence: bestHint.confidence,
308
+ evidence: `Type pair hint: ${bestHint.description}`,
309
+ metadata: {
310
+ subjectType,
311
+ objectType
312
+ }
313
+ };
314
+ this.addToCache(cacheKey, result);
315
+ return result;
316
+ }
317
+ catch (error) {
318
+ return null;
319
+ }
320
+ }
321
+ /**
322
+ * Get cache key
323
+ */
324
+ getCacheKey(subjectType, objectType) {
325
+ return `${subjectType}:${objectType}`;
326
+ }
327
+ /**
328
+ * Get from LRU cache
329
+ */
330
+ getFromCache(key) {
331
+ if (!this.cache.has(key)) {
332
+ return undefined;
333
+ }
334
+ const cached = this.cache.get(key);
335
+ // Move to end (most recently used)
336
+ this.cacheOrder = this.cacheOrder.filter(k => k !== key);
337
+ this.cacheOrder.push(key);
338
+ return cached ?? null;
339
+ }
340
+ /**
341
+ * Add to LRU cache with eviction
342
+ */
343
+ addToCache(key, value) {
344
+ this.cache.set(key, value);
345
+ this.cacheOrder.push(key);
346
+ // Evict oldest if over limit
347
+ if (this.cache.size > this.options.cacheSize) {
348
+ const oldest = this.cacheOrder.shift();
349
+ if (oldest) {
350
+ this.cache.delete(oldest);
351
+ }
352
+ }
353
+ }
354
+ /**
355
+ * Get statistics
356
+ */
357
+ getStats() {
358
+ return {
359
+ ...this.stats,
360
+ hintCount: this.typePairHints.length,
361
+ cacheSize: this.cache.size,
362
+ cacheHitRate: this.stats.calls > 0 ? this.stats.cacheHits / this.stats.calls : 0,
363
+ matchRate: this.stats.calls > 0 ? this.stats.matches / this.stats.calls : 0,
364
+ topHints: Array.from(this.stats.hintHits.entries())
365
+ .sort((a, b) => b[1] - a[1])
366
+ .slice(0, 10)
367
+ .map(([hint, hits]) => ({ hint, hits }))
368
+ };
369
+ }
370
+ /**
371
+ * Reset statistics
372
+ */
373
+ resetStats() {
374
+ this.stats.calls = 0;
375
+ this.stats.cacheHits = 0;
376
+ this.stats.matches = 0;
377
+ // Reset hint hit counts
378
+ for (const hint of this.typePairHints) {
379
+ this.stats.hintHits.set(hint.description, 0);
380
+ }
381
+ }
382
+ /**
383
+ * Clear cache
384
+ */
385
+ clearCache() {
386
+ this.cache.clear();
387
+ this.cacheOrder = [];
388
+ }
389
+ }
390
+ //# sourceMappingURL=VerbContextSignal.js.map
@@ -0,0 +1,131 @@
1
+ /**
2
+ * VerbEmbeddingSignal - Neural semantic similarity for relationship classification
3
+ *
4
+ * WEIGHT: 35% (second highest after exact match)
5
+ *
6
+ * Uses:
7
+ * 1. 40 pre-computed verb type embeddings (384 dimensions)
8
+ * 2. Cosine similarity against context text
9
+ * 3. Semantic understanding of relationship intent
10
+ *
11
+ * PRODUCTION-READY: No TODOs, no mocks, real implementation
12
+ */
13
+ import type { Brainy } from '../../brainy.js';
14
+ import { VerbType } from '../../types/graphTypes.js';
15
+ import type { Vector } from '../../coreTypes.js';
16
+ /**
17
+ * Signal result with classification details
18
+ */
19
+ export interface VerbSignal {
20
+ type: VerbType;
21
+ confidence: number;
22
+ evidence: string;
23
+ metadata?: {
24
+ similarity?: number;
25
+ allScores?: Array<{
26
+ type: VerbType;
27
+ similarity: number;
28
+ }>;
29
+ };
30
+ }
31
+ /**
32
+ * Options for verb embedding signal
33
+ */
34
+ export interface VerbEmbeddingSignalOptions {
35
+ minConfidence?: number;
36
+ minSimilarity?: number;
37
+ topK?: number;
38
+ cacheSize?: number;
39
+ enableTemporalBoosting?: boolean;
40
+ }
41
+ /**
42
+ * VerbEmbeddingSignal - Neural relationship type classification
43
+ *
44
+ * Production features:
45
+ * - Uses 40 pre-computed verb type embeddings (zero runtime cost)
46
+ * - Cosine similarity for semantic matching
47
+ * - Temporal boosting for recently seen patterns
48
+ * - LRU cache for hot paths
49
+ * - Confidence calibration based on similarity distribution
50
+ */
51
+ export declare class VerbEmbeddingSignal {
52
+ private brain;
53
+ private options;
54
+ private verbTypeEmbeddings;
55
+ private history;
56
+ private readonly MAX_HISTORY;
57
+ private cache;
58
+ private cacheOrder;
59
+ private stats;
60
+ constructor(brain: Brainy, options?: VerbEmbeddingSignalOptions);
61
+ /**
62
+ * Classify relationship type using semantic similarity
63
+ *
64
+ * @param context Full context text (sentence or paragraph)
65
+ * @param contextVector Optional pre-computed embedding (performance optimization)
66
+ * @returns VerbSignal with classified type or null
67
+ */
68
+ classify(context: string, contextVector?: Vector): Promise<VerbSignal | null>;
69
+ /**
70
+ * Get embedding for context text
71
+ */
72
+ private getEmbedding;
73
+ /**
74
+ * Calibrate confidence based on similarity distribution
75
+ *
76
+ * Higher confidence when:
77
+ * - Top similarity is high
78
+ * - Clear gap between top and second-best
79
+ * - Top K candidates agree on same type
80
+ */
81
+ private calibrateConfidence;
82
+ /**
83
+ * Get temporal boost for recently seen patterns
84
+ *
85
+ * Boosts confidence if similar context was recently classified as the same type
86
+ */
87
+ private getTemporalBoost;
88
+ /**
89
+ * Add pattern to history for temporal boosting
90
+ */
91
+ addToHistory(text: string, type: VerbType, vector: Vector): void;
92
+ /**
93
+ * Clear history
94
+ */
95
+ clearHistory(): void;
96
+ /**
97
+ * Get cache key
98
+ */
99
+ private getCacheKey;
100
+ /**
101
+ * Get from LRU cache
102
+ */
103
+ private getFromCache;
104
+ /**
105
+ * Add to LRU cache with eviction
106
+ */
107
+ private addToCache;
108
+ /**
109
+ * Get statistics
110
+ */
111
+ getStats(): {
112
+ verbTypeCount: number;
113
+ historySize: number;
114
+ cacheSize: number;
115
+ cacheHitRate: number;
116
+ matchRate: number;
117
+ calls: number;
118
+ cacheHits: number;
119
+ matches: number;
120
+ temporalBoosts: number;
121
+ averageSimilarity: number;
122
+ };
123
+ /**
124
+ * Reset statistics
125
+ */
126
+ resetStats(): void;
127
+ /**
128
+ * Clear cache
129
+ */
130
+ clearCache(): void;
131
+ }