@soulcraft/brainy 4.1.4 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +102 -4
- package/dist/import/ImportCoordinator.js +248 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/dist/utils/metadataIndex.d.ts +22 -0
- package/dist/utils/metadataIndex.js +76 -0
- package/package.json +4 -1
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VerbPatternSignal - Regex pattern matching for relationship classification
|
|
3
|
+
*
|
|
4
|
+
* WEIGHT: 20% (deterministic, high precision)
|
|
5
|
+
*
|
|
6
|
+
* Uses:
|
|
7
|
+
* 1. Subject-verb-object patterns ("X created Y", "X belongs to Y")
|
|
8
|
+
* 2. Prepositional phrase patterns ("in", "at", "by", "of")
|
|
9
|
+
* 3. Structural patterns (parentheses, commas, formatting)
|
|
10
|
+
*
|
|
11
|
+
* PRODUCTION-READY: No TODOs, no mocks, real implementation
|
|
12
|
+
*/
|
|
13
|
+
import { VerbType } from '../../types/graphTypes.js';
|
|
14
|
+
/**
|
|
15
|
+
* VerbPatternSignal - Deterministic relationship type classification
|
|
16
|
+
*
|
|
17
|
+
* Production features:
|
|
18
|
+
* - Pre-compiled regex patterns (zero runtime cost)
|
|
19
|
+
* - Subject-verb-object structure detection
|
|
20
|
+
* - Prepositional phrase recognition
|
|
21
|
+
* - Context-aware pattern matching
|
|
22
|
+
* - LRU cache for hot paths
|
|
23
|
+
*/
|
|
24
|
+
export class VerbPatternSignal {
|
|
25
|
+
constructor(brain, options) {
|
|
26
|
+
// Pre-compiled patterns (compiled once at initialization)
|
|
27
|
+
this.patterns = [];
|
|
28
|
+
// LRU cache
|
|
29
|
+
this.cache = new Map();
|
|
30
|
+
this.cacheOrder = [];
|
|
31
|
+
// Statistics
|
|
32
|
+
this.stats = {
|
|
33
|
+
calls: 0,
|
|
34
|
+
cacheHits: 0,
|
|
35
|
+
matches: 0,
|
|
36
|
+
patternHits: new Map()
|
|
37
|
+
};
|
|
38
|
+
this.brain = brain;
|
|
39
|
+
this.options = {
|
|
40
|
+
minConfidence: options?.minConfidence ?? 0.65,
|
|
41
|
+
cacheSize: options?.cacheSize ?? 2000
|
|
42
|
+
};
|
|
43
|
+
// Initialize and compile all patterns
|
|
44
|
+
this.initializePatterns();
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Initialize all regex patterns
|
|
48
|
+
*
|
|
49
|
+
* Patterns are organized by relationship category for clarity
|
|
50
|
+
*/
|
|
51
|
+
initializePatterns() {
|
|
52
|
+
this.patterns = [
|
|
53
|
+
// ========== Creation & Authorship ==========
|
|
54
|
+
{
|
|
55
|
+
regex: /\b(?:created?|made|built|developed|designed|wrote|authored|composed)\s+(?:by|from)\b/i,
|
|
56
|
+
type: VerbType.CreatedBy,
|
|
57
|
+
confidence: 0.90,
|
|
58
|
+
description: 'Creation with agent (passive)'
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
regex: /\b(?:creates?|makes?|builds?|develops?|designs?|writes?|authors?|composes?)\b/i,
|
|
62
|
+
type: VerbType.Creates,
|
|
63
|
+
confidence: 0.85,
|
|
64
|
+
description: 'Creation (active)'
|
|
65
|
+
},
|
|
66
|
+
// ========== Ownership & Attribution ==========
|
|
67
|
+
{
|
|
68
|
+
regex: /\b(?:owned|possessed|held)\s+by\b/i,
|
|
69
|
+
type: VerbType.Owns,
|
|
70
|
+
confidence: 0.90,
|
|
71
|
+
description: 'Ownership (passive)'
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
regex: /\b(?:owns?|possesses?|holds?)\b/i,
|
|
75
|
+
type: VerbType.Owns,
|
|
76
|
+
confidence: 0.85,
|
|
77
|
+
description: 'Ownership (active)'
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
regex: /\b(?:attributed|ascribed|credited)\s+to\b/i,
|
|
81
|
+
type: VerbType.AttributedTo,
|
|
82
|
+
confidence: 0.90,
|
|
83
|
+
description: 'Attribution'
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
regex: /\bbelongs?\s+to\b/i,
|
|
87
|
+
type: VerbType.BelongsTo,
|
|
88
|
+
confidence: 0.95,
|
|
89
|
+
description: 'Belonging relationship'
|
|
90
|
+
},
|
|
91
|
+
// ========== Part-Whole Relationships ==========
|
|
92
|
+
{
|
|
93
|
+
regex: /\b(?:part|component|element|member|section)\s+of\b/i,
|
|
94
|
+
type: VerbType.PartOf,
|
|
95
|
+
confidence: 0.95,
|
|
96
|
+
description: 'Part-whole relationship'
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
regex: /\b(?:contains?|includes?|comprises?|encompasses?)\b/i,
|
|
100
|
+
type: VerbType.Contains,
|
|
101
|
+
confidence: 0.85,
|
|
102
|
+
description: 'Container relationship'
|
|
103
|
+
},
|
|
104
|
+
// ========== Location Relationships ==========
|
|
105
|
+
{
|
|
106
|
+
regex: /\b(?:located|situated|based|positioned)\s+(?:in|at|on)\b/i,
|
|
107
|
+
type: VerbType.LocatedAt,
|
|
108
|
+
confidence: 0.90,
|
|
109
|
+
description: 'Location (passive)'
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
regex: /\b(?:in|at)\s+(?:the\s+)?(?:city|town|country|state|region|area)\s+of\b/i,
|
|
113
|
+
type: VerbType.LocatedAt,
|
|
114
|
+
confidence: 0.85,
|
|
115
|
+
description: 'Geographic location'
|
|
116
|
+
},
|
|
117
|
+
// ========== Organizational Relationships ==========
|
|
118
|
+
{
|
|
119
|
+
regex: /\b(?:member|employee|staff|personnel)\s+(?:of|at)\b/i,
|
|
120
|
+
type: VerbType.MemberOf,
|
|
121
|
+
confidence: 0.90,
|
|
122
|
+
description: 'Membership'
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
regex: /\b(?:works?|worked)\s+(?:at|for|with)\b/i,
|
|
126
|
+
type: VerbType.WorksWith,
|
|
127
|
+
confidence: 0.85,
|
|
128
|
+
description: 'Work relationship'
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
regex: /\b(?:employed|hired)\s+(?:by|at)\b/i,
|
|
132
|
+
type: VerbType.WorksWith,
|
|
133
|
+
confidence: 0.85,
|
|
134
|
+
description: 'Employment'
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
regex: /\breports?\s+to\b/i,
|
|
138
|
+
type: VerbType.ReportsTo,
|
|
139
|
+
confidence: 0.95,
|
|
140
|
+
description: 'Reporting structure'
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
regex: /\b(?:manages?|supervises?|oversees?)\b/i,
|
|
144
|
+
type: VerbType.Supervises,
|
|
145
|
+
confidence: 0.85,
|
|
146
|
+
description: 'Management relationship'
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
regex: /\bmentors?\b/i,
|
|
150
|
+
type: VerbType.Mentors,
|
|
151
|
+
confidence: 0.90,
|
|
152
|
+
description: 'Mentorship'
|
|
153
|
+
},
|
|
154
|
+
// ========== Social Relationships ==========
|
|
155
|
+
{
|
|
156
|
+
regex: /\b(?:friend|colleague|associate|companion)\s+of\b/i,
|
|
157
|
+
type: VerbType.FriendOf,
|
|
158
|
+
confidence: 0.85,
|
|
159
|
+
description: 'Friendship'
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
regex: /\bfollows?\b/i,
|
|
163
|
+
type: VerbType.Follows,
|
|
164
|
+
confidence: 0.75,
|
|
165
|
+
description: 'Following relationship'
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
regex: /\blikes?\b/i,
|
|
169
|
+
type: VerbType.Likes,
|
|
170
|
+
confidence: 0.70,
|
|
171
|
+
description: 'Preference'
|
|
172
|
+
},
|
|
173
|
+
// ========== Reference & Citation ==========
|
|
174
|
+
{
|
|
175
|
+
regex: /\b(?:references?|cites?|mentions?|quotes?)\b/i,
|
|
176
|
+
type: VerbType.References,
|
|
177
|
+
confidence: 0.85,
|
|
178
|
+
description: 'Reference relationship'
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
regex: /\bdescribes?\b/i,
|
|
182
|
+
type: VerbType.Describes,
|
|
183
|
+
confidence: 0.80,
|
|
184
|
+
description: 'Description'
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
regex: /\bdefines?\b/i,
|
|
188
|
+
type: VerbType.Defines,
|
|
189
|
+
confidence: 0.85,
|
|
190
|
+
description: 'Definition'
|
|
191
|
+
},
|
|
192
|
+
// ========== Temporal Relationships ==========
|
|
193
|
+
{
|
|
194
|
+
regex: /\b(?:precedes?|comes?\s+before|happens?\s+before)\b/i,
|
|
195
|
+
type: VerbType.Precedes,
|
|
196
|
+
confidence: 0.85,
|
|
197
|
+
description: 'Temporal precedence'
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
regex: /\b(?:succeeds?|follows?|comes?\s+after|happens?\s+after)\b/i,
|
|
201
|
+
type: VerbType.Succeeds,
|
|
202
|
+
confidence: 0.85,
|
|
203
|
+
description: 'Temporal succession'
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
regex: /\bbefore\b/i,
|
|
207
|
+
type: VerbType.Precedes,
|
|
208
|
+
confidence: 0.70,
|
|
209
|
+
description: 'Before (temporal)'
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
regex: /\bafter\b/i,
|
|
213
|
+
type: VerbType.Succeeds,
|
|
214
|
+
confidence: 0.70,
|
|
215
|
+
description: 'After (temporal)'
|
|
216
|
+
},
|
|
217
|
+
// ========== Causal Relationships ==========
|
|
218
|
+
{
|
|
219
|
+
regex: /\b(?:causes?|results?\s+in|leads?\s+to|triggers?)\b/i,
|
|
220
|
+
type: VerbType.Causes,
|
|
221
|
+
confidence: 0.85,
|
|
222
|
+
description: 'Causation'
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
regex: /\b(?:requires?|needs?|demands?)\b/i,
|
|
226
|
+
type: VerbType.Requires,
|
|
227
|
+
confidence: 0.80,
|
|
228
|
+
description: 'Requirement'
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
regex: /\bdepends?\s+(?:on|upon)\b/i,
|
|
232
|
+
type: VerbType.DependsOn,
|
|
233
|
+
confidence: 0.90,
|
|
234
|
+
description: 'Dependency'
|
|
235
|
+
},
|
|
236
|
+
// ========== Transformation Relationships ==========
|
|
237
|
+
{
|
|
238
|
+
regex: /\b(?:transforms?|converts?|changes?)\b/i,
|
|
239
|
+
type: VerbType.Transforms,
|
|
240
|
+
confidence: 0.85,
|
|
241
|
+
description: 'Transformation'
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
regex: /\bbecomes?\b/i,
|
|
245
|
+
type: VerbType.Becomes,
|
|
246
|
+
confidence: 0.85,
|
|
247
|
+
description: 'Becoming'
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
regex: /\b(?:modifies?|alters?|adjusts?|adapts?)\b/i,
|
|
251
|
+
type: VerbType.Modifies,
|
|
252
|
+
confidence: 0.80,
|
|
253
|
+
description: 'Modification'
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
regex: /\b(?:consumes?|uses?\s+up|exhausts?)\b/i,
|
|
257
|
+
type: VerbType.Consumes,
|
|
258
|
+
confidence: 0.80,
|
|
259
|
+
description: 'Consumption'
|
|
260
|
+
},
|
|
261
|
+
// ========== Classification & Categorization ==========
|
|
262
|
+
{
|
|
263
|
+
regex: /\b(?:categorizes?|classifies?|groups?)\b/i,
|
|
264
|
+
type: VerbType.Categorizes,
|
|
265
|
+
confidence: 0.85,
|
|
266
|
+
description: 'Categorization'
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
regex: /\b(?:measures?|quantifies?|gauges?)\b/i,
|
|
270
|
+
type: VerbType.Measures,
|
|
271
|
+
confidence: 0.80,
|
|
272
|
+
description: 'Measurement'
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
regex: /\b(?:evaluates?|assesses?|judges?)\b/i,
|
|
276
|
+
type: VerbType.Evaluates,
|
|
277
|
+
confidence: 0.80,
|
|
278
|
+
description: 'Evaluation'
|
|
279
|
+
},
|
|
280
|
+
// ========== Implementation & Extension ==========
|
|
281
|
+
{
|
|
282
|
+
regex: /\b(?:uses?|utilizes?|employs?|applies?)\b/i,
|
|
283
|
+
type: VerbType.Uses,
|
|
284
|
+
confidence: 0.75,
|
|
285
|
+
description: 'Usage'
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
regex: /\b(?:implements?|realizes?|executes?)\b/i,
|
|
289
|
+
type: VerbType.Implements,
|
|
290
|
+
confidence: 0.85,
|
|
291
|
+
description: 'Implementation'
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
regex: /\bextends?\b/i,
|
|
295
|
+
type: VerbType.Extends,
|
|
296
|
+
confidence: 0.90,
|
|
297
|
+
description: 'Extension (inheritance)'
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
regex: /\binherits?\s+(?:from)?\b/i,
|
|
301
|
+
type: VerbType.Inherits,
|
|
302
|
+
confidence: 0.90,
|
|
303
|
+
description: 'Inheritance'
|
|
304
|
+
},
|
|
305
|
+
// ========== Interaction Relationships ==========
|
|
306
|
+
{
|
|
307
|
+
regex: /\b(?:communicates?|talks?\s+to|speaks?\s+to)\b/i,
|
|
308
|
+
type: VerbType.Communicates,
|
|
309
|
+
confidence: 0.80,
|
|
310
|
+
description: 'Communication'
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
regex: /\b(?:conflicts?|clashes?|contradicts?)\b/i,
|
|
314
|
+
type: VerbType.Conflicts,
|
|
315
|
+
confidence: 0.85,
|
|
316
|
+
description: 'Conflict'
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
regex: /\b(?:synchronizes?|syncs?|coordinates?)\b/i,
|
|
320
|
+
type: VerbType.Synchronizes,
|
|
321
|
+
confidence: 0.85,
|
|
322
|
+
description: 'Synchronization'
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
regex: /\b(?:competes?|rivals?)\s+(?:with|against)\b/i,
|
|
326
|
+
type: VerbType.Competes,
|
|
327
|
+
confidence: 0.85,
|
|
328
|
+
description: 'Competition'
|
|
329
|
+
}
|
|
330
|
+
];
|
|
331
|
+
// Initialize pattern hit tracking
|
|
332
|
+
for (const pattern of this.patterns) {
|
|
333
|
+
this.stats.patternHits.set(pattern.description, 0);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Classify relationship type using pattern matching
|
|
338
|
+
*
|
|
339
|
+
* @param subject Subject entity (e.g., "Alice")
|
|
340
|
+
* @param object Object entity (e.g., "UCSF")
|
|
341
|
+
* @param context Full context text
|
|
342
|
+
* @returns VerbSignal with classified type or null
|
|
343
|
+
*/
|
|
344
|
+
async classify(subject, object, context) {
|
|
345
|
+
this.stats.calls++;
|
|
346
|
+
if (!context || context.trim().length === 0) {
|
|
347
|
+
return null;
|
|
348
|
+
}
|
|
349
|
+
// Check cache
|
|
350
|
+
const cacheKey = this.getCacheKey(subject, object, context);
|
|
351
|
+
const cached = this.getFromCache(cacheKey);
|
|
352
|
+
if (cached !== undefined) {
|
|
353
|
+
this.stats.cacheHits++;
|
|
354
|
+
return cached;
|
|
355
|
+
}
|
|
356
|
+
try {
|
|
357
|
+
// Normalize context for matching
|
|
358
|
+
const normalized = context.trim();
|
|
359
|
+
// Try each pattern in order (highest confidence first)
|
|
360
|
+
for (const pattern of this.patterns) {
|
|
361
|
+
if (pattern.regex.test(normalized)) {
|
|
362
|
+
// Track pattern hit
|
|
363
|
+
const currentHits = this.stats.patternHits.get(pattern.description) || 0;
|
|
364
|
+
this.stats.patternHits.set(pattern.description, currentHits + 1);
|
|
365
|
+
this.stats.matches++;
|
|
366
|
+
const result = {
|
|
367
|
+
type: pattern.type,
|
|
368
|
+
confidence: pattern.confidence,
|
|
369
|
+
evidence: `Pattern match: ${pattern.description}`,
|
|
370
|
+
metadata: {
|
|
371
|
+
pattern: pattern.regex.source,
|
|
372
|
+
matchedText: normalized.match(pattern.regex)?.[0]
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
this.addToCache(cacheKey, result);
|
|
376
|
+
return result;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
// No pattern matched
|
|
380
|
+
const result = null;
|
|
381
|
+
this.addToCache(cacheKey, result);
|
|
382
|
+
return result;
|
|
383
|
+
}
|
|
384
|
+
catch (error) {
|
|
385
|
+
return null;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Get cache key
|
|
390
|
+
*/
|
|
391
|
+
getCacheKey(subject, object, context) {
|
|
392
|
+
return `${subject}:${object}:${context.substring(0, 100)}`.toLowerCase();
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Get from LRU cache
|
|
396
|
+
*/
|
|
397
|
+
getFromCache(key) {
|
|
398
|
+
if (!this.cache.has(key)) {
|
|
399
|
+
return undefined;
|
|
400
|
+
}
|
|
401
|
+
const cached = this.cache.get(key);
|
|
402
|
+
// Move to end (most recently used)
|
|
403
|
+
this.cacheOrder = this.cacheOrder.filter(k => k !== key);
|
|
404
|
+
this.cacheOrder.push(key);
|
|
405
|
+
return cached ?? null;
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Add to LRU cache with eviction
|
|
409
|
+
*/
|
|
410
|
+
addToCache(key, value) {
|
|
411
|
+
this.cache.set(key, value);
|
|
412
|
+
this.cacheOrder.push(key);
|
|
413
|
+
// Evict oldest if over limit
|
|
414
|
+
if (this.cache.size > this.options.cacheSize) {
|
|
415
|
+
const oldest = this.cacheOrder.shift();
|
|
416
|
+
if (oldest) {
|
|
417
|
+
this.cache.delete(oldest);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* Get statistics
|
|
423
|
+
*/
|
|
424
|
+
getStats() {
|
|
425
|
+
return {
|
|
426
|
+
...this.stats,
|
|
427
|
+
patternCount: this.patterns.length,
|
|
428
|
+
cacheSize: this.cache.size,
|
|
429
|
+
cacheHitRate: this.stats.calls > 0 ? this.stats.cacheHits / this.stats.calls : 0,
|
|
430
|
+
matchRate: this.stats.calls > 0 ? this.stats.matches / this.stats.calls : 0,
|
|
431
|
+
topPatterns: Array.from(this.stats.patternHits.entries())
|
|
432
|
+
.sort((a, b) => b[1] - a[1])
|
|
433
|
+
.slice(0, 10)
|
|
434
|
+
.map(([pattern, hits]) => ({ pattern, hits }))
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Reset statistics
|
|
439
|
+
*/
|
|
440
|
+
resetStats() {
|
|
441
|
+
this.stats.calls = 0;
|
|
442
|
+
this.stats.cacheHits = 0;
|
|
443
|
+
this.stats.matches = 0;
|
|
444
|
+
// Reset pattern hit counts
|
|
445
|
+
for (const pattern of this.patterns) {
|
|
446
|
+
this.stats.patternHits.set(pattern.description, 0);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* Clear cache
|
|
451
|
+
*/
|
|
452
|
+
clearCache() {
|
|
453
|
+
this.cache.clear();
|
|
454
|
+
this.cacheOrder = [];
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
//# sourceMappingURL=VerbPatternSignal.js.map
|
|
@@ -298,6 +298,28 @@ export declare class MetadataIndexManager {
|
|
|
298
298
|
* Save field index to storage with file locking
|
|
299
299
|
*/
|
|
300
300
|
private saveFieldIndex;
|
|
301
|
+
/**
|
|
302
|
+
* Save field registry to storage for fast cold-start discovery
|
|
303
|
+
* v4.2.1: Solves 100x performance regression by persisting field directory
|
|
304
|
+
*
|
|
305
|
+
* This enables instant cold starts by discovering which fields have persisted indices
|
|
306
|
+
* without needing to rebuild from scratch. Similar to how HNSW persists system metadata.
|
|
307
|
+
*
|
|
308
|
+
* Registry size: ~4-8KB for typical deployments (50-200 fields)
|
|
309
|
+
* Scales: O(log N) - field count grows logarithmically with entity count
|
|
310
|
+
*/
|
|
311
|
+
private saveFieldRegistry;
|
|
312
|
+
/**
|
|
313
|
+
* Load field registry from storage to populate fieldIndexes directory
|
|
314
|
+
* v4.2.1: Enables O(1) discovery of persisted sparse indices
|
|
315
|
+
*
|
|
316
|
+
* Called during init() to discover which fields have persisted indices.
|
|
317
|
+
* Populates fieldIndexes Map with skeleton entries - actual sparse indices
|
|
318
|
+
* are lazy-loaded via UnifiedCache when first accessed.
|
|
319
|
+
*
|
|
320
|
+
* Gracefully handles missing registry (first run or corrupted data).
|
|
321
|
+
*/
|
|
322
|
+
private loadFieldRegistry;
|
|
301
323
|
/**
|
|
302
324
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
303
325
|
* This exposes the production-ready counting that's already maintained
|
|
@@ -92,6 +92,9 @@ export class MetadataIndexManager {
|
|
|
92
92
|
* This must be called after construction and before any queries
|
|
93
93
|
*/
|
|
94
94
|
async init() {
|
|
95
|
+
// Load field registry to discover persisted indices (v4.2.1)
|
|
96
|
+
// Must run first to populate fieldIndexes directory before warming cache
|
|
97
|
+
await this.loadFieldRegistry();
|
|
95
98
|
// Initialize EntityIdMapper (loads UUID ↔ integer mappings from storage)
|
|
96
99
|
await this.idMapper.init();
|
|
97
100
|
// Phase 1b: Sync loaded counts to fixed-size arrays
|
|
@@ -1399,6 +1402,8 @@ export class MetadataIndexManager {
|
|
|
1399
1402
|
await Promise.all(allPromises);
|
|
1400
1403
|
// Flush EntityIdMapper (UUID ↔ integer mappings) (v3.43.0)
|
|
1401
1404
|
await this.idMapper.flush();
|
|
1405
|
+
// Save field registry for fast cold-start discovery (v4.2.1)
|
|
1406
|
+
await this.saveFieldRegistry();
|
|
1402
1407
|
this.dirtyFields.clear();
|
|
1403
1408
|
this.lastFlushTime = Date.now();
|
|
1404
1409
|
}
|
|
@@ -1480,6 +1485,77 @@ export class MetadataIndexManager {
|
|
|
1480
1485
|
}
|
|
1481
1486
|
}
|
|
1482
1487
|
}
|
|
1488
|
+
/**
|
|
1489
|
+
* Save field registry to storage for fast cold-start discovery
|
|
1490
|
+
* v4.2.1: Solves 100x performance regression by persisting field directory
|
|
1491
|
+
*
|
|
1492
|
+
* This enables instant cold starts by discovering which fields have persisted indices
|
|
1493
|
+
* without needing to rebuild from scratch. Similar to how HNSW persists system metadata.
|
|
1494
|
+
*
|
|
1495
|
+
* Registry size: ~4-8KB for typical deployments (50-200 fields)
|
|
1496
|
+
* Scales: O(log N) - field count grows logarithmically with entity count
|
|
1497
|
+
*/
|
|
1498
|
+
async saveFieldRegistry() {
|
|
1499
|
+
// Nothing to save if no fields indexed yet
|
|
1500
|
+
if (this.fieldIndexes.size === 0) {
|
|
1501
|
+
return;
|
|
1502
|
+
}
|
|
1503
|
+
try {
|
|
1504
|
+
const registry = {
|
|
1505
|
+
noun: 'FieldRegistry',
|
|
1506
|
+
fields: Array.from(this.fieldIndexes.keys()),
|
|
1507
|
+
version: 1,
|
|
1508
|
+
lastUpdated: Date.now(),
|
|
1509
|
+
totalFields: this.fieldIndexes.size
|
|
1510
|
+
};
|
|
1511
|
+
await this.storage.saveMetadata('__metadata_field_registry__', registry);
|
|
1512
|
+
prodLog.debug(`📝 Saved field registry: ${registry.totalFields} fields`);
|
|
1513
|
+
}
|
|
1514
|
+
catch (error) {
|
|
1515
|
+
// Non-critical: Log warning but don't throw
|
|
1516
|
+
// System will rebuild registry on next cold start if needed
|
|
1517
|
+
prodLog.warn('Failed to save field registry:', error);
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
/**
|
|
1521
|
+
* Load field registry from storage to populate fieldIndexes directory
|
|
1522
|
+
* v4.2.1: Enables O(1) discovery of persisted sparse indices
|
|
1523
|
+
*
|
|
1524
|
+
* Called during init() to discover which fields have persisted indices.
|
|
1525
|
+
* Populates fieldIndexes Map with skeleton entries - actual sparse indices
|
|
1526
|
+
* are lazy-loaded via UnifiedCache when first accessed.
|
|
1527
|
+
*
|
|
1528
|
+
* Gracefully handles missing registry (first run or corrupted data).
|
|
1529
|
+
*/
|
|
1530
|
+
async loadFieldRegistry() {
|
|
1531
|
+
try {
|
|
1532
|
+
const registry = await this.storage.getMetadata('__metadata_field_registry__');
|
|
1533
|
+
if (!registry?.fields || !Array.isArray(registry.fields)) {
|
|
1534
|
+
// Registry doesn't exist or is invalid - not an error, just first run
|
|
1535
|
+
prodLog.debug('📂 No field registry found - will build on first flush');
|
|
1536
|
+
return;
|
|
1537
|
+
}
|
|
1538
|
+
// Populate fieldIndexes Map from discovered fields
|
|
1539
|
+
// Skeleton entries with empty values - sparse indices loaded lazily
|
|
1540
|
+
const lastUpdated = typeof registry.lastUpdated === 'number'
|
|
1541
|
+
? registry.lastUpdated
|
|
1542
|
+
: Date.now();
|
|
1543
|
+
for (const field of registry.fields) {
|
|
1544
|
+
if (typeof field === 'string' && field.length > 0) {
|
|
1545
|
+
this.fieldIndexes.set(field, {
|
|
1546
|
+
values: {},
|
|
1547
|
+
lastUpdated
|
|
1548
|
+
});
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
prodLog.info(`✅ Loaded field registry: ${registry.fields.length} persisted fields discovered\n` +
|
|
1552
|
+
` Fields: ${registry.fields.slice(0, 5).join(', ')}${registry.fields.length > 5 ? '...' : ''}`);
|
|
1553
|
+
}
|
|
1554
|
+
catch (error) {
|
|
1555
|
+
// Silent failure - registry not critical, will rebuild if needed
|
|
1556
|
+
prodLog.debug('Could not load field registry:', error);
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1483
1559
|
/**
|
|
1484
1560
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
1485
1561
|
* This exposes the production-ready counting that's already maintained
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "4.1
|
|
3
|
+
"version": "4.2.1",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -168,6 +168,7 @@
|
|
|
168
168
|
"@google-cloud/storage": "^7.14.0",
|
|
169
169
|
"@huggingface/transformers": "^3.7.2",
|
|
170
170
|
"@msgpack/msgpack": "^3.1.2",
|
|
171
|
+
"@types/js-yaml": "^4.0.9",
|
|
171
172
|
"boxen": "^8.0.1",
|
|
172
173
|
"chalk": "^5.3.0",
|
|
173
174
|
"chardet": "^2.0.0",
|
|
@@ -175,6 +176,8 @@
|
|
|
175
176
|
"commander": "^11.1.0",
|
|
176
177
|
"csv-parse": "^6.1.0",
|
|
177
178
|
"inquirer": "^12.9.3",
|
|
179
|
+
"js-yaml": "^4.1.0",
|
|
180
|
+
"mammoth": "^1.11.0",
|
|
178
181
|
"ora": "^8.2.0",
|
|
179
182
|
"pdfjs-dist": "^4.0.379",
|
|
180
183
|
"prompts": "^2.4.2",
|