@soulcraft/brainy 4.1.3 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -7
- package/dist/brainy.d.ts +74 -16
- package/dist/brainy.js +74 -16
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +155 -5
- package/dist/import/ImportCoordinator.js +346 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/package.json +4 -1
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ExactMatchSignal - O(1) exact match entity type classification
|
|
3
|
+
*
|
|
4
|
+
* HIGHEST WEIGHT: 40% (most reliable signal)
|
|
5
|
+
*
|
|
6
|
+
* Uses:
|
|
7
|
+
* 1. O(1) term index lookup (exact string match)
|
|
8
|
+
* 2. O(1) metadata hints (column names, file structure)
|
|
9
|
+
* 3. Format-specific intelligence (Excel, CSV, PDF, YAML, DOCX)
|
|
10
|
+
*
|
|
11
|
+
* This is the WORKSHOP BUG FIX - finds explicit relationships via exact matching
|
|
12
|
+
*
|
|
13
|
+
* PRODUCTION-READY: No TODOs, no mocks, real implementation
|
|
14
|
+
*/
|
|
15
|
+
import type { Brainy } from '../../brainy.js';
|
|
16
|
+
import { NounType } from '../../types/graphTypes.js';
|
|
17
|
+
/**
|
|
18
|
+
* Signal result with classification details
|
|
19
|
+
*/
|
|
20
|
+
export interface TypeSignal {
|
|
21
|
+
source: 'exact-term' | 'exact-metadata' | 'exact-format';
|
|
22
|
+
type: NounType;
|
|
23
|
+
confidence: number;
|
|
24
|
+
evidence: string;
|
|
25
|
+
metadata?: {
|
|
26
|
+
matchedTerm?: string;
|
|
27
|
+
columnHint?: string;
|
|
28
|
+
formatHint?: string;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Options for exact match signal
|
|
33
|
+
*/
|
|
34
|
+
export interface ExactMatchSignalOptions {
|
|
35
|
+
minConfidence?: number;
|
|
36
|
+
cacheSize?: number;
|
|
37
|
+
enableFormatHints?: boolean;
|
|
38
|
+
columnPatterns?: {
|
|
39
|
+
term?: string[];
|
|
40
|
+
type?: string[];
|
|
41
|
+
definition?: string[];
|
|
42
|
+
related?: string[];
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* ExactMatchSignal - Instant O(1) type classification via exact matching
|
|
47
|
+
*
|
|
48
|
+
* Production features:
|
|
49
|
+
* - O(1) hash table lookups (fastest possible)
|
|
50
|
+
* - Format-specific intelligence (Excel columns, CSV headers, etc.)
|
|
51
|
+
* - Metadata hints (column names reveal entity types)
|
|
52
|
+
* - LRU cache for hot paths
|
|
53
|
+
* - Highest confidence (0.95-0.99) - most reliable signal
|
|
54
|
+
*/
|
|
55
|
+
export declare class ExactMatchSignal {
|
|
56
|
+
private brain;
|
|
57
|
+
private options;
|
|
58
|
+
private termIndex;
|
|
59
|
+
private cache;
|
|
60
|
+
private cacheOrder;
|
|
61
|
+
private stats;
|
|
62
|
+
constructor(brain: Brainy, options?: ExactMatchSignalOptions);
|
|
63
|
+
/**
|
|
64
|
+
* Build term index from import data (call once per import)
|
|
65
|
+
*
|
|
66
|
+
* This is O(n) upfront cost, then O(1) lookups forever
|
|
67
|
+
*
|
|
68
|
+
* @param terms Array of terms with their types
|
|
69
|
+
*/
|
|
70
|
+
buildIndex(terms: Array<{
|
|
71
|
+
text: string;
|
|
72
|
+
type: NounType;
|
|
73
|
+
confidence?: number;
|
|
74
|
+
}>): void;
|
|
75
|
+
/**
|
|
76
|
+
* Classify entity type using exact matching
|
|
77
|
+
*
|
|
78
|
+
* Main entry point - checks term index, metadata, and format hints
|
|
79
|
+
*
|
|
80
|
+
* @param candidate Entity text to classify
|
|
81
|
+
* @param context Optional context for better matching
|
|
82
|
+
* @returns TypeSignal with classification result or null
|
|
83
|
+
*/
|
|
84
|
+
classify(candidate: string, context?: {
|
|
85
|
+
definition?: string;
|
|
86
|
+
metadata?: Record<string, any>;
|
|
87
|
+
columnName?: string;
|
|
88
|
+
fileFormat?: 'excel' | 'csv' | 'pdf' | 'json' | 'markdown' | 'yaml' | 'docx';
|
|
89
|
+
rowData?: Record<string, any>;
|
|
90
|
+
}): Promise<TypeSignal | null>;
|
|
91
|
+
/**
|
|
92
|
+
* Match against term index (O(1))
|
|
93
|
+
*
|
|
94
|
+
* Highest confidence - exact string match
|
|
95
|
+
*/
|
|
96
|
+
private matchTerm;
|
|
97
|
+
/**
|
|
98
|
+
* Match using metadata hints (column names, file structure)
|
|
99
|
+
*
|
|
100
|
+
* High confidence - structural clues reveal entity types
|
|
101
|
+
*/
|
|
102
|
+
private matchMetadata;
|
|
103
|
+
/**
|
|
104
|
+
* Match using format-specific intelligence
|
|
105
|
+
*
|
|
106
|
+
* Excel, CSV, PDF, YAML, DOCX each have unique structural patterns
|
|
107
|
+
*/
|
|
108
|
+
private matchFormat;
|
|
109
|
+
/**
|
|
110
|
+
* Detect Excel-specific patterns
|
|
111
|
+
*
|
|
112
|
+
* - Cell formats (dates, currencies)
|
|
113
|
+
* - Named ranges
|
|
114
|
+
* - Column headers reveal entity types
|
|
115
|
+
* - Sheet names as categories
|
|
116
|
+
*/
|
|
117
|
+
private detectExcelPatterns;
|
|
118
|
+
/**
|
|
119
|
+
* Detect CSV-specific patterns
|
|
120
|
+
*
|
|
121
|
+
* - Relationship columns (parent_id, created_by)
|
|
122
|
+
* - Nested delimiters (semicolons, pipes)
|
|
123
|
+
* - URL columns indicate external references
|
|
124
|
+
*/
|
|
125
|
+
private detectCSVPatterns;
|
|
126
|
+
/**
|
|
127
|
+
* Detect PDF-specific patterns
|
|
128
|
+
*
|
|
129
|
+
* - Table of contents entries
|
|
130
|
+
* - Section headings
|
|
131
|
+
* - Citation references
|
|
132
|
+
* - Figure captions
|
|
133
|
+
*/
|
|
134
|
+
private detectPDFPatterns;
|
|
135
|
+
/**
|
|
136
|
+
* Detect YAML-specific patterns
|
|
137
|
+
*
|
|
138
|
+
* - Key names reveal entity types
|
|
139
|
+
* - Nested structure indicates relationships
|
|
140
|
+
* - Lists indicate collections
|
|
141
|
+
*/
|
|
142
|
+
private detectYAMLPatterns;
|
|
143
|
+
/**
|
|
144
|
+
* Detect DOCX-specific patterns
|
|
145
|
+
*
|
|
146
|
+
* - Heading levels indicate hierarchy
|
|
147
|
+
* - List items indicate collections
|
|
148
|
+
* - Comments indicate relationships
|
|
149
|
+
* - Track changes reveal authorship
|
|
150
|
+
*/
|
|
151
|
+
private detectDOCXPatterns;
|
|
152
|
+
/**
|
|
153
|
+
* Detect entity type from column name patterns
|
|
154
|
+
*/
|
|
155
|
+
private detectColumnType;
|
|
156
|
+
/**
|
|
157
|
+
* Infer type from explicit type metadata
|
|
158
|
+
*/
|
|
159
|
+
private inferTypeFromMetadata;
|
|
160
|
+
/**
|
|
161
|
+
* Infer type from Excel sheet name
|
|
162
|
+
*/
|
|
163
|
+
private inferTypeFromSheetName;
|
|
164
|
+
/**
|
|
165
|
+
* Get index size
|
|
166
|
+
*/
|
|
167
|
+
getIndexSize(): number;
|
|
168
|
+
/**
|
|
169
|
+
* Get statistics
|
|
170
|
+
*/
|
|
171
|
+
getStats(): {
|
|
172
|
+
indexSize: number;
|
|
173
|
+
cacheSize: number;
|
|
174
|
+
cacheHitRate: number;
|
|
175
|
+
termMatchRate: number;
|
|
176
|
+
metadataMatchRate: number;
|
|
177
|
+
formatMatchRate: number;
|
|
178
|
+
calls: number;
|
|
179
|
+
cacheHits: number;
|
|
180
|
+
termMatches: number;
|
|
181
|
+
metadataMatches: number;
|
|
182
|
+
formatMatches: number;
|
|
183
|
+
};
|
|
184
|
+
/**
|
|
185
|
+
* Reset statistics
|
|
186
|
+
*/
|
|
187
|
+
resetStats(): void;
|
|
188
|
+
/**
|
|
189
|
+
* Clear cache
|
|
190
|
+
*/
|
|
191
|
+
clearCache(): void;
|
|
192
|
+
/**
|
|
193
|
+
* Clear index
|
|
194
|
+
*/
|
|
195
|
+
clearIndex(): void;
|
|
196
|
+
/**
|
|
197
|
+
* Normalize text for matching
|
|
198
|
+
*/
|
|
199
|
+
private normalize;
|
|
200
|
+
/**
|
|
201
|
+
* Tokenize text into words
|
|
202
|
+
*/
|
|
203
|
+
private tokenize;
|
|
204
|
+
/**
|
|
205
|
+
* Generate cache key
|
|
206
|
+
*/
|
|
207
|
+
private getCacheKey;
|
|
208
|
+
/**
|
|
209
|
+
* Get from LRU cache
|
|
210
|
+
*/
|
|
211
|
+
private getFromCache;
|
|
212
|
+
/**
|
|
213
|
+
* Add to LRU cache with eviction
|
|
214
|
+
*/
|
|
215
|
+
private addToCache;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Create a new ExactMatchSignal instance
|
|
219
|
+
*/
|
|
220
|
+
export declare function createExactMatchSignal(brain: Brainy, options?: ExactMatchSignalOptions): ExactMatchSignal;
|