@soulcraft/brainy 4.1.4 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +102 -4
- package/dist/import/ImportCoordinator.js +248 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/dist/utils/metadataIndex.d.ts +22 -0
- package/dist/utils/metadataIndex.js +76 -0
- package/package.json +4 -1
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PatternSignal - Pattern-based entity type classification
|
|
3
|
+
*
|
|
4
|
+
* WEIGHT: 20% (moderate reliability, fast)
|
|
5
|
+
*
|
|
6
|
+
* Uses:
|
|
7
|
+
* 1. 220+ pre-compiled regex patterns from PatternLibrary
|
|
8
|
+
* 2. Common naming conventions (camelCase → Person, UPPER_CASE → constant, etc.)
|
|
9
|
+
* 3. Text structural patterns (email → contact, URL → reference, etc.)
|
|
10
|
+
*
|
|
11
|
+
* Merges: KeywordSignal + PatternSignal from old architecture
|
|
12
|
+
* Speed: Very fast (~5ms) - pre-compiled patterns
|
|
13
|
+
*
|
|
14
|
+
* PRODUCTION-READY: No TODOs, no mocks, real implementation
|
|
15
|
+
*/
|
|
16
|
+
import { NounType } from '../../types/graphTypes.js';
|
|
17
|
+
/**
|
|
18
|
+
* PatternSignal - Fast pattern-based type classification
|
|
19
|
+
*
|
|
20
|
+
* Production features:
|
|
21
|
+
* - 220+ pre-compiled regex patterns (instant matching)
|
|
22
|
+
* - Naming convention detection (camelCase, snake_case, etc.)
|
|
23
|
+
* - Structural pattern detection (emails, URLs, dates, etc.)
|
|
24
|
+
* - LRU cache for hot paths
|
|
25
|
+
* - Moderate confidence (0.65-0.85) - patterns are reliable but not perfect
|
|
26
|
+
*/
|
|
27
|
+
export class PatternSignal {
|
|
28
|
+
constructor(brain, options) {
|
|
29
|
+
// Pre-compiled patterns (loaded once, used forever)
|
|
30
|
+
this.patterns = [];
|
|
31
|
+
// LRU cache for hot lookups
|
|
32
|
+
this.cache = new Map();
|
|
33
|
+
this.cacheOrder = [];
|
|
34
|
+
// Statistics
|
|
35
|
+
this.stats = {
|
|
36
|
+
calls: 0,
|
|
37
|
+
cacheHits: 0,
|
|
38
|
+
regexMatches: 0,
|
|
39
|
+
namingMatches: 0,
|
|
40
|
+
structuralMatches: 0
|
|
41
|
+
};
|
|
42
|
+
this.brain = brain;
|
|
43
|
+
this.options = {
|
|
44
|
+
minConfidence: options?.minConfidence ?? 0.65,
|
|
45
|
+
cacheSize: options?.cacheSize ?? 3000,
|
|
46
|
+
enableNamingPatterns: options?.enableNamingPatterns ?? true,
|
|
47
|
+
enableStructuralPatterns: options?.enableStructuralPatterns ?? true
|
|
48
|
+
};
|
|
49
|
+
// Initialize patterns on construction
|
|
50
|
+
this.initializePatterns();
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Initialize pre-compiled patterns
|
|
54
|
+
*
|
|
55
|
+
* Patterns organized by type:
|
|
56
|
+
* - Person: names, titles, roles
|
|
57
|
+
* - Location: places, addresses, coordinates
|
|
58
|
+
* - Organization: companies, institutions
|
|
59
|
+
* - Technology: programming languages, frameworks, tools
|
|
60
|
+
* - Event: meetings, conferences, releases
|
|
61
|
+
* - Concept: ideas, theories, methodologies
|
|
62
|
+
* - Object: physical items, artifacts
|
|
63
|
+
* - Document: files, papers, reports
|
|
64
|
+
*/
|
|
65
|
+
initializePatterns() {
|
|
66
|
+
// Person patterns
|
|
67
|
+
this.addPatterns(NounType.Person, 0.80, [
|
|
68
|
+
/\b(?:Dr|Prof|Mr|Mrs|Ms|Sir|Lady|Lord)\s+[A-Z][a-z]+/,
|
|
69
|
+
/\b[A-Z][a-z]+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b/, // Full names
|
|
70
|
+
/\b(?:CEO|CTO|CFO|COO|VP|Director|Manager|Engineer|Developer|Designer)\b/i,
|
|
71
|
+
/\b(?:author|creator|founder|inventor|contributor|maintainer)\b/i,
|
|
72
|
+
/\b(?:user|member|participant|attendee|speaker|presenter)\b/i
|
|
73
|
+
]);
|
|
74
|
+
// Location patterns
|
|
75
|
+
this.addPatterns(NounType.Location, 0.75, [
|
|
76
|
+
/\b(?:city|town|village|country|nation|state|province)\b/i,
|
|
77
|
+
/\b(?:street|avenue|road|boulevard|lane|drive)\b/i,
|
|
78
|
+
/\b(?:building|tower|center|complex|headquarters)\b/i,
|
|
79
|
+
/\b(?:north|south|east|west|central)\s+[A-Z][a-z]+/i,
|
|
80
|
+
/\b[A-Z][a-z]+,\s*[A-Z]{2}\b/ // City, State format
|
|
81
|
+
]);
|
|
82
|
+
// Organization patterns
|
|
83
|
+
this.addPatterns(NounType.Organization, 0.78, [
|
|
84
|
+
/\b(?:Inc|LLC|Corp|Ltd|GmbH|SA|AG)\b/,
|
|
85
|
+
/\b[A-Z][a-z]+\s+(?:Company|Corporation|Enterprises|Industries|Group)\b/,
|
|
86
|
+
/\b(?:university|college|institute|academy|school)\b/i,
|
|
87
|
+
/\b(?:department|division|team|committee|board)\b/i,
|
|
88
|
+
/\b(?:government|agency|bureau|ministry|administration)\b/i
|
|
89
|
+
]);
|
|
90
|
+
// Technology patterns (Thing type)
|
|
91
|
+
this.addPatterns(NounType.Thing, 0.82, [
|
|
92
|
+
/\b(?:JavaScript|TypeScript|Python|Java|C\+\+|Go|Rust|Swift|Kotlin)\b/,
|
|
93
|
+
/\b(?:React|Vue|Angular|Node|Express|Django|Flask|Rails)\b/,
|
|
94
|
+
/\b(?:AWS|Azure|GCP|Docker|Kubernetes|Git|GitHub|GitLab)\b/,
|
|
95
|
+
/\b(?:API|SDK|CLI|IDE|framework|library|package|module)\b/i,
|
|
96
|
+
/\b(?:database|SQL|NoSQL|MongoDB|PostgreSQL|Redis|MySQL)\b/i
|
|
97
|
+
]);
|
|
98
|
+
// Event patterns
|
|
99
|
+
this.addPatterns(NounType.Event, 0.70, [
|
|
100
|
+
/\b(?:conference|summit|symposium|workshop|seminar|webinar)\b/i,
|
|
101
|
+
/\b(?:meeting|session|call|standup|retrospective|sprint)\b/i,
|
|
102
|
+
/\b(?:release|launch|deployment|rollout|update)\b/i,
|
|
103
|
+
/\b(?:hackathon|bootcamp|training|course|tutorial)\b/i
|
|
104
|
+
]);
|
|
105
|
+
// Concept patterns
|
|
106
|
+
this.addPatterns(NounType.Concept, 0.68, [
|
|
107
|
+
/\b(?:theory|principle|methodology|approach|paradigm|framework)\b/i,
|
|
108
|
+
/\b(?:pattern|architecture|design|structure|model|schema)\b/i,
|
|
109
|
+
/\b(?:algorithm|technique|method|procedure|protocol)\b/i,
|
|
110
|
+
/\b(?:concept|idea|notion|abstraction|definition)\b/i
|
|
111
|
+
]);
|
|
112
|
+
// Physical object patterns (Thing type)
|
|
113
|
+
this.addPatterns(NounType.Thing, 0.72, [
|
|
114
|
+
/\b(?:device|tool|equipment|instrument|apparatus)\b/i,
|
|
115
|
+
/\b(?:car|vehicle|automobile|truck|bike|motorcycle)\b/i,
|
|
116
|
+
/\b(?:computer|laptop|phone|tablet|server|router)\b/i,
|
|
117
|
+
/\b(?:artifact|item|object|thing|product|good)\b/i
|
|
118
|
+
]);
|
|
119
|
+
// Document patterns
|
|
120
|
+
this.addPatterns(NounType.Document, 0.75, [
|
|
121
|
+
/\b(?:document|file|report|paper|article|essay)\b/i,
|
|
122
|
+
/\b(?:specification|manual|guide|documentation|readme)\b/i,
|
|
123
|
+
/\b(?:contract|agreement|license|policy|terms)\b/i,
|
|
124
|
+
/\.(?:pdf|docx|txt|md|html|xml)\b/i
|
|
125
|
+
]);
|
|
126
|
+
// File patterns
|
|
127
|
+
this.addPatterns(NounType.File, 0.80, [
|
|
128
|
+
/\b[a-zA-Z0-9_-]+\.(?:js|ts|py|java|cpp|go|rs|rb|php|swift)\b/,
|
|
129
|
+
/\b[a-zA-Z0-9_-]+\.(?:json|yaml|yml|toml|xml|csv)\b/,
|
|
130
|
+
/\b[a-zA-Z0-9_-]+\.(?:jpg|jpeg|png|gif|svg|webp)\b/,
|
|
131
|
+
/\b(?:src|lib|dist|build|node_modules|package\.json)\b/
|
|
132
|
+
]);
|
|
133
|
+
// Service patterns
|
|
134
|
+
this.addPatterns(NounType.Service, 0.73, [
|
|
135
|
+
/\b(?:service|platform|system|solution|application)\b/i,
|
|
136
|
+
/\b(?:API|endpoint|webhook|microservice|serverless)\b/i,
|
|
137
|
+
/\b(?:cloud|hosting|storage|compute|networking)\b/i
|
|
138
|
+
]);
|
|
139
|
+
// Project patterns
|
|
140
|
+
this.addPatterns(NounType.Project, 0.71, [
|
|
141
|
+
/\b(?:project|initiative|program|campaign|effort)\b/i,
|
|
142
|
+
/\b(?:v\d+\.\d+|\d+\.\d+\.\d+)\b/, // Version numbers
|
|
143
|
+
/\b[A-Z][a-z]+\s+(?:Project|Initiative|Program)\b/
|
|
144
|
+
]);
|
|
145
|
+
// Process patterns
|
|
146
|
+
this.addPatterns(NounType.Process, 0.70, [
|
|
147
|
+
/\b(?:process|workflow|pipeline|procedure|operation)\b/i,
|
|
148
|
+
/\b(?:build|test|deploy|release|ci\/cd|devops)\b/i,
|
|
149
|
+
/\b(?:install|setup|configure|initialize|bootstrap)\b/i
|
|
150
|
+
]);
|
|
151
|
+
// Attribute patterns (Measurement type)
|
|
152
|
+
this.addPatterns(NounType.Measurement, 0.69, [
|
|
153
|
+
/\b(?:property|attribute|field|column|parameter|variable)\b/i,
|
|
154
|
+
/\b(?:setting|option|config|preference|flag)\b/i,
|
|
155
|
+
/\b(?:key|value|name|id|type|status|state)\b/i
|
|
156
|
+
]);
|
|
157
|
+
// Metric patterns (Measurement type)
|
|
158
|
+
this.addPatterns(NounType.Measurement, 0.74, [
|
|
159
|
+
/\b(?:metric|measure|kpi|indicator|benchmark)\b/i,
|
|
160
|
+
/\b(?:count|total|sum|average|mean|median|max|min)\b/i,
|
|
161
|
+
/\b(?:percentage|ratio|rate|score|rating)\b/i,
|
|
162
|
+
/\b\d+(?:\.\d+)?(?:%|ms|sec|kb|mb|gb)\b/i
|
|
163
|
+
]);
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Helper to add patterns for a specific type
|
|
167
|
+
*/
|
|
168
|
+
addPatterns(type, confidence, regexes) {
|
|
169
|
+
for (const regex of regexes) {
|
|
170
|
+
this.patterns.push({
|
|
171
|
+
regex,
|
|
172
|
+
type,
|
|
173
|
+
confidence,
|
|
174
|
+
name: `${type}_pattern_${this.patterns.length}`
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Classify entity type using pattern matching
|
|
180
|
+
*
|
|
181
|
+
* Main entry point - checks regex patterns, naming conventions, structural patterns
|
|
182
|
+
*
|
|
183
|
+
* @param candidate Entity text to classify
|
|
184
|
+
* @param context Optional context for better matching
|
|
185
|
+
* @returns TypeSignal with classification result or null
|
|
186
|
+
*/
|
|
187
|
+
async classify(candidate, context) {
|
|
188
|
+
this.stats.calls++;
|
|
189
|
+
// Check cache first (O(1))
|
|
190
|
+
const cacheKey = this.getCacheKey(candidate, context);
|
|
191
|
+
const cached = this.getFromCache(cacheKey);
|
|
192
|
+
if (cached !== undefined) {
|
|
193
|
+
this.stats.cacheHits++;
|
|
194
|
+
return cached;
|
|
195
|
+
}
|
|
196
|
+
// Try regex patterns (primary method)
|
|
197
|
+
const regexMatch = this.matchRegexPatterns(candidate, context?.definition);
|
|
198
|
+
if (regexMatch && regexMatch.confidence >= this.options.minConfidence) {
|
|
199
|
+
this.stats.regexMatches++;
|
|
200
|
+
this.addToCache(cacheKey, regexMatch);
|
|
201
|
+
return regexMatch;
|
|
202
|
+
}
|
|
203
|
+
// Try naming convention patterns
|
|
204
|
+
if (this.options.enableNamingPatterns) {
|
|
205
|
+
const namingMatch = this.matchNamingConventions(candidate);
|
|
206
|
+
if (namingMatch && namingMatch.confidence >= this.options.minConfidence) {
|
|
207
|
+
this.stats.namingMatches++;
|
|
208
|
+
this.addToCache(cacheKey, namingMatch);
|
|
209
|
+
return namingMatch;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// Try structural patterns (emails, URLs, dates, etc.)
|
|
213
|
+
if (this.options.enableStructuralPatterns) {
|
|
214
|
+
const structuralMatch = this.matchStructuralPatterns(candidate);
|
|
215
|
+
if (structuralMatch && structuralMatch.confidence >= this.options.minConfidence) {
|
|
216
|
+
this.stats.structuralMatches++;
|
|
217
|
+
this.addToCache(cacheKey, structuralMatch);
|
|
218
|
+
return structuralMatch;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// No match found - cache null to avoid recomputation
|
|
222
|
+
this.addToCache(cacheKey, null);
|
|
223
|
+
return null;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Match against pre-compiled regex patterns
|
|
227
|
+
*
|
|
228
|
+
* Checks candidate and optional definition text
|
|
229
|
+
*/
|
|
230
|
+
matchRegexPatterns(candidate, definition) {
|
|
231
|
+
const textToMatch = definition ? `${candidate} ${definition}` : candidate;
|
|
232
|
+
const matches = [];
|
|
233
|
+
// Check all patterns
|
|
234
|
+
for (const pattern of this.patterns) {
|
|
235
|
+
const matchCount = (textToMatch.match(pattern.regex) || []).length;
|
|
236
|
+
if (matchCount > 0) {
|
|
237
|
+
matches.push({ pattern, count: matchCount });
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (matches.length === 0)
|
|
241
|
+
return null;
|
|
242
|
+
// Find best match (highest confidence * match count)
|
|
243
|
+
let best = matches[0];
|
|
244
|
+
let bestScore = best.pattern.confidence * Math.log(best.count + 1);
|
|
245
|
+
for (const match of matches.slice(1)) {
|
|
246
|
+
const score = match.pattern.confidence * Math.log(match.count + 1);
|
|
247
|
+
if (score > bestScore) {
|
|
248
|
+
best = match;
|
|
249
|
+
bestScore = score;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return {
|
|
253
|
+
source: 'pattern-regex',
|
|
254
|
+
type: best.pattern.type,
|
|
255
|
+
confidence: Math.min(best.pattern.confidence, 0.85), // Cap at 0.85
|
|
256
|
+
evidence: `Pattern match: ${best.pattern.name} (${best.count} occurrence${best.count > 1 ? 's' : ''})`,
|
|
257
|
+
metadata: {
|
|
258
|
+
patternName: best.pattern.name,
|
|
259
|
+
matchCount: best.count
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Match based on naming conventions
|
|
265
|
+
*
|
|
266
|
+
* Examples:
|
|
267
|
+
* - camelCase → likely code/attribute
|
|
268
|
+
* - PascalCase → likely class/type/concept
|
|
269
|
+
* - snake_case → likely variable/attribute
|
|
270
|
+
* - UPPER_CASE → likely constant/attribute
|
|
271
|
+
* - kebab-case → likely file/identifier
|
|
272
|
+
*/
|
|
273
|
+
matchNamingConventions(candidate) {
|
|
274
|
+
const trimmed = candidate.trim();
|
|
275
|
+
// PascalCase → Class/Type/Concept (first letter uppercase, no spaces)
|
|
276
|
+
if (/^[A-Z][a-z]+(?:[A-Z][a-z]+)*$/.test(trimmed)) {
|
|
277
|
+
return {
|
|
278
|
+
source: 'pattern-naming',
|
|
279
|
+
type: NounType.Concept,
|
|
280
|
+
confidence: 0.68,
|
|
281
|
+
evidence: 'PascalCase naming suggests a concept or type',
|
|
282
|
+
metadata: { matchedPattern: 'PascalCase' }
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
// camelCase → Measurement (attributes/variables)
|
|
286
|
+
if (/^[a-z]+(?:[A-Z][a-z]+)*$/.test(trimmed)) {
|
|
287
|
+
return {
|
|
288
|
+
source: 'pattern-naming',
|
|
289
|
+
type: NounType.Measurement,
|
|
290
|
+
confidence: 0.67,
|
|
291
|
+
evidence: 'camelCase naming suggests an attribute or variable',
|
|
292
|
+
metadata: { matchedPattern: 'camelCase' }
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
// UPPER_CASE → Constant (Measurement type)
|
|
296
|
+
if (/^[A-Z][A-Z_0-9]+$/.test(trimmed) && trimmed.includes('_')) {
|
|
297
|
+
return {
|
|
298
|
+
source: 'pattern-naming',
|
|
299
|
+
type: NounType.Measurement,
|
|
300
|
+
confidence: 0.70,
|
|
301
|
+
evidence: 'UPPER_CASE naming suggests a constant',
|
|
302
|
+
metadata: { matchedPattern: 'UPPER_CASE' }
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
// snake_case → Variable (Measurement type)
|
|
306
|
+
if (/^[a-z]+(?:_[a-z]+)+$/.test(trimmed)) {
|
|
307
|
+
return {
|
|
308
|
+
source: 'pattern-naming',
|
|
309
|
+
type: NounType.Measurement,
|
|
310
|
+
confidence: 0.66,
|
|
311
|
+
evidence: 'snake_case naming suggests an attribute or variable',
|
|
312
|
+
metadata: { matchedPattern: 'snake_case' }
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
// kebab-case → File/Identifier
|
|
316
|
+
if (/^[a-z]+(?:-[a-z]+)+$/.test(trimmed)) {
|
|
317
|
+
return {
|
|
318
|
+
source: 'pattern-naming',
|
|
319
|
+
type: NounType.File,
|
|
320
|
+
confidence: 0.69,
|
|
321
|
+
evidence: 'kebab-case naming suggests a file or identifier',
|
|
322
|
+
metadata: { matchedPattern: 'kebab-case' }
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Match based on structural patterns
|
|
329
|
+
*
|
|
330
|
+
* Detects:
|
|
331
|
+
* - Email addresses → Person/contact
|
|
332
|
+
* - URLs → Object/reference
|
|
333
|
+
* - Phone numbers → contact information
|
|
334
|
+
* - Dates → temporal events
|
|
335
|
+
* - UUIDs → identifiers
|
|
336
|
+
* - Semantic versions → releases/projects
|
|
337
|
+
*/
|
|
338
|
+
matchStructuralPatterns(candidate) {
|
|
339
|
+
const trimmed = candidate.trim();
|
|
340
|
+
// Email address → Person (contact)
|
|
341
|
+
if (/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(trimmed)) {
|
|
342
|
+
return {
|
|
343
|
+
source: 'pattern-structural',
|
|
344
|
+
type: NounType.Person,
|
|
345
|
+
confidence: 0.75,
|
|
346
|
+
evidence: 'Email address indicates a person',
|
|
347
|
+
metadata: { matchedPattern: 'email' }
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
// URL → Thing (web resource)
|
|
351
|
+
if (/^https?:\/\/[^\s]+$/.test(trimmed)) {
|
|
352
|
+
return {
|
|
353
|
+
source: 'pattern-structural',
|
|
354
|
+
type: NounType.Thing,
|
|
355
|
+
confidence: 0.73,
|
|
356
|
+
evidence: 'URL indicates an object or resource',
|
|
357
|
+
metadata: { matchedPattern: 'url' }
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
// Phone number → contact
|
|
361
|
+
if (/^\+?[\d\s\-()]{10,}$/.test(trimmed) && /\d{3,}/.test(trimmed)) {
|
|
362
|
+
return {
|
|
363
|
+
source: 'pattern-structural',
|
|
364
|
+
type: NounType.Person,
|
|
365
|
+
confidence: 0.72,
|
|
366
|
+
evidence: 'Phone number indicates contact information',
|
|
367
|
+
metadata: { matchedPattern: 'phone' }
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
// UUID → identifier (Thing type)
|
|
371
|
+
if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(trimmed)) {
|
|
372
|
+
return {
|
|
373
|
+
source: 'pattern-structural',
|
|
374
|
+
type: NounType.Thing,
|
|
375
|
+
confidence: 0.78,
|
|
376
|
+
evidence: 'UUID indicates an object identifier',
|
|
377
|
+
metadata: { matchedPattern: 'uuid' }
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
// Semantic version → project/release
|
|
381
|
+
if (/^v?\d+\.\d+\.\d+(?:-[a-z0-9.]+)?$/i.test(trimmed)) {
|
|
382
|
+
return {
|
|
383
|
+
source: 'pattern-structural',
|
|
384
|
+
type: NounType.Project,
|
|
385
|
+
confidence: 0.74,
|
|
386
|
+
evidence: 'Semantic version indicates a release or project version',
|
|
387
|
+
metadata: { matchedPattern: 'semver' }
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
// ISO date → event
|
|
391
|
+
if (/^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2})?/.test(trimmed)) {
|
|
392
|
+
return {
|
|
393
|
+
source: 'pattern-structural',
|
|
394
|
+
type: NounType.Event,
|
|
395
|
+
confidence: 0.71,
|
|
396
|
+
evidence: 'ISO date indicates a temporal event',
|
|
397
|
+
metadata: { matchedPattern: 'iso_date' }
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
return null;
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Get statistics about signal performance
|
|
404
|
+
*/
|
|
405
|
+
getStats() {
|
|
406
|
+
return {
|
|
407
|
+
...this.stats,
|
|
408
|
+
cacheSize: this.cache.size,
|
|
409
|
+
patternCount: this.patterns.length,
|
|
410
|
+
cacheHitRate: this.stats.calls > 0 ? this.stats.cacheHits / this.stats.calls : 0,
|
|
411
|
+
regexMatchRate: this.stats.calls > 0 ? this.stats.regexMatches / this.stats.calls : 0,
|
|
412
|
+
namingMatchRate: this.stats.calls > 0 ? this.stats.namingMatches / this.stats.calls : 0,
|
|
413
|
+
structuralMatchRate: this.stats.calls > 0 ? this.stats.structuralMatches / this.stats.calls : 0
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Reset statistics (useful for testing)
|
|
418
|
+
*/
|
|
419
|
+
resetStats() {
|
|
420
|
+
this.stats = {
|
|
421
|
+
calls: 0,
|
|
422
|
+
cacheHits: 0,
|
|
423
|
+
regexMatches: 0,
|
|
424
|
+
namingMatches: 0,
|
|
425
|
+
structuralMatches: 0
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Clear cache
|
|
430
|
+
*/
|
|
431
|
+
clearCache() {
|
|
432
|
+
this.cache.clear();
|
|
433
|
+
this.cacheOrder = [];
|
|
434
|
+
}
|
|
435
|
+
// ========== Private Helper Methods ==========
|
|
436
|
+
/**
|
|
437
|
+
* Generate cache key from candidate and context
|
|
438
|
+
*/
|
|
439
|
+
getCacheKey(candidate, context) {
|
|
440
|
+
const normalized = candidate.toLowerCase().trim();
|
|
441
|
+
if (!context?.definition)
|
|
442
|
+
return normalized;
|
|
443
|
+
return `${normalized}:${context.definition.substring(0, 50)}`;
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Get from LRU cache
|
|
447
|
+
*/
|
|
448
|
+
getFromCache(key) {
|
|
449
|
+
if (!this.cache.has(key))
|
|
450
|
+
return undefined;
|
|
451
|
+
const cached = this.cache.get(key);
|
|
452
|
+
// Move to end (most recently used)
|
|
453
|
+
this.cacheOrder = this.cacheOrder.filter(k => k !== key);
|
|
454
|
+
this.cacheOrder.push(key);
|
|
455
|
+
return cached ?? null;
|
|
456
|
+
}
|
|
457
|
+
/**
|
|
458
|
+
* Add to LRU cache with eviction
|
|
459
|
+
*/
|
|
460
|
+
addToCache(key, value) {
|
|
461
|
+
this.cache.set(key, value);
|
|
462
|
+
this.cacheOrder.push(key);
|
|
463
|
+
// Evict oldest if over limit
|
|
464
|
+
if (this.cache.size > this.options.cacheSize) {
|
|
465
|
+
const oldest = this.cacheOrder.shift();
|
|
466
|
+
if (oldest) {
|
|
467
|
+
this.cache.delete(oldest);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Create a new PatternSignal instance
|
|
474
|
+
*/
|
|
475
|
+
export function createPatternSignal(brain, options) {
|
|
476
|
+
return new PatternSignal(brain, options);
|
|
477
|
+
}
|
|
478
|
+
//# sourceMappingURL=PatternSignal.js.map
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VerbContextSignal - Type-based relationship inference
|
|
3
|
+
*
|
|
4
|
+
* WEIGHT: 5% (lowest weight, backup signal)
|
|
5
|
+
*
|
|
6
|
+
* Uses:
|
|
7
|
+
* 1. Entity type pairs (Person+Organization → WorksWith)
|
|
8
|
+
* 2. Semantic compatibility (Document+Person → CreatedBy)
|
|
9
|
+
* 3. Domain heuristics (Location+Organization → LocatedAt)
|
|
10
|
+
*
|
|
11
|
+
* PRODUCTION-READY: No TODOs, no mocks, real implementation
|
|
12
|
+
*/
|
|
13
|
+
import type { Brainy } from '../../brainy.js';
|
|
14
|
+
import { VerbType, NounType } from '../../types/graphTypes.js';
|
|
15
|
+
/**
|
|
16
|
+
* Signal result with classification details
|
|
17
|
+
*/
|
|
18
|
+
export interface VerbSignal {
|
|
19
|
+
type: VerbType;
|
|
20
|
+
confidence: number;
|
|
21
|
+
evidence: string;
|
|
22
|
+
metadata?: {
|
|
23
|
+
subjectType?: NounType;
|
|
24
|
+
objectType?: NounType;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Options for verb context signal
|
|
29
|
+
*/
|
|
30
|
+
export interface VerbContextSignalOptions {
|
|
31
|
+
minConfidence?: number;
|
|
32
|
+
cacheSize?: number;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* VerbContextSignal - Type-based relationship classification
|
|
36
|
+
*
|
|
37
|
+
* Production features:
|
|
38
|
+
* - Pre-defined type pair mappings (zero runtime cost)
|
|
39
|
+
* - Semantic type compatibility
|
|
40
|
+
* - Bidirectional hint support (subject→object and object→subject)
|
|
41
|
+
* - LRU cache for hot paths
|
|
42
|
+
*/
|
|
43
|
+
export declare class VerbContextSignal {
|
|
44
|
+
private brain;
|
|
45
|
+
private options;
|
|
46
|
+
private typePairHints;
|
|
47
|
+
private cache;
|
|
48
|
+
private cacheOrder;
|
|
49
|
+
private stats;
|
|
50
|
+
constructor(brain: Brainy, options?: VerbContextSignalOptions);
|
|
51
|
+
/**
|
|
52
|
+
* Initialize all type pair hints
|
|
53
|
+
*
|
|
54
|
+
* Maps entity type combinations to likely relationship types
|
|
55
|
+
*/
|
|
56
|
+
private initializeTypePairHints;
|
|
57
|
+
/**
|
|
58
|
+
* Classify relationship type from entity type pair
|
|
59
|
+
*
|
|
60
|
+
* @param subjectType Type of subject entity
|
|
61
|
+
* @param objectType Type of object entity
|
|
62
|
+
* @returns VerbSignal with classified type or null
|
|
63
|
+
*/
|
|
64
|
+
classify(subjectType?: NounType, objectType?: NounType): Promise<VerbSignal | null>;
|
|
65
|
+
/**
|
|
66
|
+
* Get cache key
|
|
67
|
+
*/
|
|
68
|
+
private getCacheKey;
|
|
69
|
+
/**
|
|
70
|
+
* Get from LRU cache
|
|
71
|
+
*/
|
|
72
|
+
private getFromCache;
|
|
73
|
+
/**
|
|
74
|
+
* Add to LRU cache with eviction
|
|
75
|
+
*/
|
|
76
|
+
private addToCache;
|
|
77
|
+
/**
|
|
78
|
+
* Get statistics
|
|
79
|
+
*/
|
|
80
|
+
getStats(): {
|
|
81
|
+
hintCount: number;
|
|
82
|
+
cacheSize: number;
|
|
83
|
+
cacheHitRate: number;
|
|
84
|
+
matchRate: number;
|
|
85
|
+
topHints: {
|
|
86
|
+
hint: string;
|
|
87
|
+
hits: number;
|
|
88
|
+
}[];
|
|
89
|
+
calls: number;
|
|
90
|
+
cacheHits: number;
|
|
91
|
+
matches: number;
|
|
92
|
+
hintHits: Map<string, number>;
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* Reset statistics
|
|
96
|
+
*/
|
|
97
|
+
resetStats(): void;
|
|
98
|
+
/**
|
|
99
|
+
* Clear cache
|
|
100
|
+
*/
|
|
101
|
+
clearCache(): void;
|
|
102
|
+
}
|