@soulcraft/brainy 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -28
- package/dist/augmentations/neuralImport.d.ts +12 -3
- package/dist/augmentations/neuralImport.js +195 -55
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +83 -0
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +425 -0
- package/dist/brainyData.d.ts +4 -4
- package/dist/brainyData.js +34 -28
- package/dist/importManager.d.ts +78 -0
- package/dist/importManager.js +258 -0
- package/dist/neural/embeddedPatterns.d.ts +1 -1
- package/dist/neural/embeddedPatterns.js +1 -1
- package/package.json +5 -3
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Type Matcher - Uses embeddings for semantic type detection
|
|
3
|
+
*
|
|
4
|
+
* This module uses our existing TransformerEmbedding and similarity functions
|
|
5
|
+
* to intelligently match data to our 31 noun types and 40 verb types.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Semantic similarity matching using embeddings
|
|
9
|
+
* - Context-aware type detection
|
|
10
|
+
* - Confidence scoring
|
|
11
|
+
* - Caching for performance
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Result of type matching with confidence scores
|
|
15
|
+
*/
|
|
16
|
+
export interface TypeMatchResult {
|
|
17
|
+
type: string;
|
|
18
|
+
confidence: number;
|
|
19
|
+
reasoning: string;
|
|
20
|
+
alternatives: Array<{
|
|
21
|
+
type: string;
|
|
22
|
+
confidence: number;
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Intelligent Type Matcher using semantic embeddings
|
|
27
|
+
*/
|
|
28
|
+
export declare class IntelligentTypeMatcher {
|
|
29
|
+
private embedder;
|
|
30
|
+
private nounEmbeddings;
|
|
31
|
+
private verbEmbeddings;
|
|
32
|
+
private initialized;
|
|
33
|
+
private cache;
|
|
34
|
+
constructor();
|
|
35
|
+
/**
|
|
36
|
+
* Initialize the type matcher by generating embeddings for all types
|
|
37
|
+
*/
|
|
38
|
+
init(): Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* Match an object to the most appropriate noun type
|
|
41
|
+
*/
|
|
42
|
+
matchNounType(obj: any): Promise<TypeMatchResult>;
|
|
43
|
+
/**
|
|
44
|
+
* Match a relationship to the most appropriate verb type
|
|
45
|
+
*/
|
|
46
|
+
matchVerbType(sourceObj: any, targetObj: any, relationshipHint?: string): Promise<TypeMatchResult>;
|
|
47
|
+
/**
|
|
48
|
+
* Create text representation of an object for embedding
|
|
49
|
+
*/
|
|
50
|
+
private createTextRepresentation;
|
|
51
|
+
/**
|
|
52
|
+
* Create text representation of a relationship
|
|
53
|
+
*/
|
|
54
|
+
private createRelationshipText;
|
|
55
|
+
/**
|
|
56
|
+
* Get a brief summary of an object
|
|
57
|
+
*/
|
|
58
|
+
private getObjectSummary;
|
|
59
|
+
/**
|
|
60
|
+
* Apply heuristic rules for noun type detection
|
|
61
|
+
*/
|
|
62
|
+
private applyNounHeuristics;
|
|
63
|
+
/**
|
|
64
|
+
* Apply heuristic rules for verb type detection
|
|
65
|
+
*/
|
|
66
|
+
private applyVerbHeuristics;
|
|
67
|
+
/**
|
|
68
|
+
* Generate human-readable reasoning for the type selection
|
|
69
|
+
*/
|
|
70
|
+
private generateReasoning;
|
|
71
|
+
/**
|
|
72
|
+
* Clear the cache
|
|
73
|
+
*/
|
|
74
|
+
clearCache(): void;
|
|
75
|
+
/**
|
|
76
|
+
* Dispose of resources
|
|
77
|
+
*/
|
|
78
|
+
dispose(): Promise<void>;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Get or create the global type matcher instance
|
|
82
|
+
*/
|
|
83
|
+
export declare function getTypeMatcher(): Promise<IntelligentTypeMatcher>;
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Type Matcher - Uses embeddings for semantic type detection
|
|
3
|
+
*
|
|
4
|
+
* This module uses our existing TransformerEmbedding and similarity functions
|
|
5
|
+
* to intelligently match data to our 31 noun types and 40 verb types.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Semantic similarity matching using embeddings
|
|
9
|
+
* - Context-aware type detection
|
|
10
|
+
* - Confidence scoring
|
|
11
|
+
* - Caching for performance
|
|
12
|
+
*/
|
|
13
|
+
import { NounType, VerbType } from '../../types/graphTypes.js';
|
|
14
|
+
import { TransformerEmbedding } from '../../utils/embedding.js';
|
|
15
|
+
import { cosineDistance } from '../../utils/distance.js';
|
|
16
|
+
/**
|
|
17
|
+
* Type descriptions for semantic matching
|
|
18
|
+
* These descriptions are used to generate embeddings for each type
|
|
19
|
+
*/
|
|
20
|
+
const NOUN_TYPE_DESCRIPTIONS = {
|
|
21
|
+
// Core Entity Types
|
|
22
|
+
[NounType.Person]: 'person human individual user employee customer citizen member author creator agent actor participant',
|
|
23
|
+
[NounType.Organization]: 'organization company business corporation institution agency department team group committee board',
|
|
24
|
+
[NounType.Location]: 'location place address city country region area zone coordinate position site venue building',
|
|
25
|
+
[NounType.Thing]: 'thing object item product device equipment tool instrument asset artifact material physical tangible',
|
|
26
|
+
[NounType.Concept]: 'concept idea theory principle philosophy belief value abstract intangible notion thought',
|
|
27
|
+
[NounType.Event]: 'event occurrence incident activity happening meeting conference celebration milestone timestamp date',
|
|
28
|
+
// Digital/Content Types
|
|
29
|
+
[NounType.Document]: 'document file report article paper text pdf word contract agreement record documentation',
|
|
30
|
+
[NounType.Media]: 'media image photo video audio music podcast multimedia graphic visualization animation',
|
|
31
|
+
[NounType.File]: 'file digital data binary code script program software archive package bundle',
|
|
32
|
+
[NounType.Message]: 'message email chat communication notification alert announcement broadcast transmission',
|
|
33
|
+
[NounType.Content]: 'content information data text material resource publication post blog webpage',
|
|
34
|
+
// Collection Types
|
|
35
|
+
[NounType.Collection]: 'collection group set list array category folder directory catalog inventory database',
|
|
36
|
+
[NounType.Dataset]: 'dataset data table spreadsheet database records statistics metrics measurements analysis',
|
|
37
|
+
// Business/Application Types
|
|
38
|
+
[NounType.Product]: 'product item merchandise offering service feature application software solution package',
|
|
39
|
+
[NounType.Service]: 'service offering subscription support maintenance utility function capability',
|
|
40
|
+
[NounType.User]: 'user account profile member subscriber customer client participant identity credentials',
|
|
41
|
+
[NounType.Task]: 'task action todo item job assignment duty responsibility activity step procedure',
|
|
42
|
+
[NounType.Project]: 'project initiative program campaign effort endeavor plan scheme venture undertaking',
|
|
43
|
+
// Descriptive Types
|
|
44
|
+
[NounType.Process]: 'process workflow procedure method algorithm sequence pipeline operation routine protocol',
|
|
45
|
+
[NounType.State]: 'state status condition phase stage mode situation circumstance configuration setting',
|
|
46
|
+
[NounType.Role]: 'role position title function responsibility duty job capacity designation authority',
|
|
47
|
+
[NounType.Topic]: 'topic subject theme category tag keyword area domain field discipline specialty',
|
|
48
|
+
[NounType.Language]: 'language dialect locale tongue vernacular communication speech linguistics vocabulary',
|
|
49
|
+
[NounType.Currency]: 'currency money dollar euro pound yen bitcoin payment financial monetary unit',
|
|
50
|
+
[NounType.Measurement]: 'measurement metric quantity value amount size dimension weight height volume distance',
|
|
51
|
+
// Scientific/Research Types
|
|
52
|
+
[NounType.Hypothesis]: 'hypothesis theory proposition thesis assumption premise conjecture speculation prediction',
|
|
53
|
+
[NounType.Experiment]: 'experiment test trial study research investigation analysis observation examination',
|
|
54
|
+
// Legal/Regulatory Types
|
|
55
|
+
[NounType.Contract]: 'contract agreement deal treaty pact covenant license terms conditions policy',
|
|
56
|
+
[NounType.Regulation]: 'regulation law rule policy standard compliance requirement guideline ordinance statute',
|
|
57
|
+
// Technical Infrastructure Types
|
|
58
|
+
[NounType.Interface]: 'interface API endpoint protocol specification contract schema definition connection',
|
|
59
|
+
[NounType.Resource]: 'resource infrastructure server database storage compute memory bandwidth capacity asset'
|
|
60
|
+
};
|
|
61
|
+
const VERB_TYPE_DESCRIPTIONS = {
|
|
62
|
+
// Core Relationship Types
|
|
63
|
+
[VerbType.RelatedTo]: 'related connected associated linked correlated relevant pertinent applicable',
|
|
64
|
+
[VerbType.Contains]: 'contains includes holds stores encompasses comprises consists incorporates',
|
|
65
|
+
[VerbType.PartOf]: 'part component element member piece portion section segment constituent',
|
|
66
|
+
[VerbType.LocatedAt]: 'located situated positioned placed found exists resides occupies',
|
|
67
|
+
[VerbType.References]: 'references cites mentions points links refers quotes sources',
|
|
68
|
+
// Temporal/Causal Types
|
|
69
|
+
[VerbType.Precedes]: 'precedes before earlier prior previous antecedent preliminary foregoing',
|
|
70
|
+
[VerbType.Succeeds]: 'succeeds follows after later subsequent next ensuing succeeding',
|
|
71
|
+
[VerbType.Causes]: 'causes triggers induces produces generates results influences affects',
|
|
72
|
+
[VerbType.DependsOn]: 'depends requires needs relies necessitates contingent prerequisite',
|
|
73
|
+
[VerbType.Requires]: 'requires needs demands necessitates mandates obliges compels entails',
|
|
74
|
+
// Creation/Transformation Types
|
|
75
|
+
[VerbType.Creates]: 'creates makes produces generates builds constructs forms establishes',
|
|
76
|
+
[VerbType.Transforms]: 'transforms converts changes modifies alters transitions morphs evolves',
|
|
77
|
+
[VerbType.Becomes]: 'becomes turns evolves transforms changes transitions develops grows',
|
|
78
|
+
[VerbType.Modifies]: 'modifies changes updates alters edits revises adjusts adapts',
|
|
79
|
+
[VerbType.Consumes]: 'consumes uses utilizes depletes expends absorbs takes processes',
|
|
80
|
+
// Ownership/Attribution Types
|
|
81
|
+
[VerbType.Owns]: 'owns possesses holds controls manages administers governs maintains',
|
|
82
|
+
[VerbType.AttributedTo]: 'attributed credited assigned ascribed authored written composed',
|
|
83
|
+
[VerbType.CreatedBy]: 'created made produced generated built developed authored written',
|
|
84
|
+
[VerbType.BelongsTo]: 'belongs property possession part member affiliate associated owned',
|
|
85
|
+
// Social/Organizational Types
|
|
86
|
+
[VerbType.MemberOf]: 'member participant affiliate associate belongs joined enrolled registered',
|
|
87
|
+
[VerbType.WorksWith]: 'works collaborates cooperates partners teams assists helps supports',
|
|
88
|
+
[VerbType.FriendOf]: 'friend companion buddy pal acquaintance associate connection relationship',
|
|
89
|
+
[VerbType.Follows]: 'follows subscribes tracks monitors watches observes trails pursues',
|
|
90
|
+
[VerbType.Likes]: 'likes enjoys appreciates favors prefers admires values endorses',
|
|
91
|
+
[VerbType.ReportsTo]: 'reports answers subordinate accountable responsible supervised managed',
|
|
92
|
+
[VerbType.Supervises]: 'supervises manages oversees directs leads controls guides administers',
|
|
93
|
+
[VerbType.Mentors]: 'mentors teaches guides coaches instructs trains advises counsels',
|
|
94
|
+
[VerbType.Communicates]: 'communicates talks speaks messages contacts interacts corresponds exchanges',
|
|
95
|
+
// Descriptive/Functional Types
|
|
96
|
+
[VerbType.Describes]: 'describes explains details documents specifies outlines depicts characterizes',
|
|
97
|
+
[VerbType.Defines]: 'defines specifies establishes determines sets declares identifies designates',
|
|
98
|
+
[VerbType.Categorizes]: 'categorizes classifies groups sorts organizes arranges labels tags',
|
|
99
|
+
[VerbType.Measures]: 'measures quantifies gauges assesses evaluates calculates determines counts',
|
|
100
|
+
[VerbType.Evaluates]: 'evaluates assesses analyzes reviews examines appraises judges rates',
|
|
101
|
+
[VerbType.Uses]: 'uses utilizes employs applies operates handles manipulates exploits',
|
|
102
|
+
[VerbType.Implements]: 'implements executes realizes performs accomplishes carries delivers completes',
|
|
103
|
+
[VerbType.Extends]: 'extends expands enhances augments amplifies broadens enlarges develops',
|
|
104
|
+
// Enhanced Relationships
|
|
105
|
+
[VerbType.Inherits]: 'inherits derives extends receives obtains acquires succeeds legacy',
|
|
106
|
+
[VerbType.Conflicts]: 'conflicts contradicts opposes clashes disputes disagrees incompatible inconsistent',
|
|
107
|
+
[VerbType.Synchronizes]: 'synchronizes coordinates aligns harmonizes matches corresponds parallels coincides',
|
|
108
|
+
[VerbType.Competes]: 'competes rivals contends contests challenges opposes vies struggles'
|
|
109
|
+
};
|
|
110
|
+
/**
|
|
111
|
+
* Intelligent Type Matcher using semantic embeddings
|
|
112
|
+
*/
|
|
113
|
+
export class IntelligentTypeMatcher {
|
|
114
|
+
constructor() {
|
|
115
|
+
this.nounEmbeddings = new Map();
|
|
116
|
+
this.verbEmbeddings = new Map();
|
|
117
|
+
this.initialized = false;
|
|
118
|
+
this.cache = new Map();
|
|
119
|
+
this.embedder = new TransformerEmbedding({ verbose: false });
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Initialize the type matcher by generating embeddings for all types
|
|
123
|
+
*/
|
|
124
|
+
async init() {
|
|
125
|
+
if (this.initialized)
|
|
126
|
+
return;
|
|
127
|
+
await this.embedder.init();
|
|
128
|
+
// Generate embeddings for noun types
|
|
129
|
+
for (const [type, description] of Object.entries(NOUN_TYPE_DESCRIPTIONS)) {
|
|
130
|
+
const embedding = await this.embedder.embed(description);
|
|
131
|
+
this.nounEmbeddings.set(type, embedding);
|
|
132
|
+
}
|
|
133
|
+
// Generate embeddings for verb types
|
|
134
|
+
for (const [type, description] of Object.entries(VERB_TYPE_DESCRIPTIONS)) {
|
|
135
|
+
const embedding = await this.embedder.embed(description);
|
|
136
|
+
this.verbEmbeddings.set(type, embedding);
|
|
137
|
+
}
|
|
138
|
+
this.initialized = true;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Match an object to the most appropriate noun type
|
|
142
|
+
*/
|
|
143
|
+
async matchNounType(obj) {
|
|
144
|
+
await this.init();
|
|
145
|
+
// Create a text representation of the object for embedding
|
|
146
|
+
const textRepresentation = this.createTextRepresentation(obj);
|
|
147
|
+
// Check cache
|
|
148
|
+
const cacheKey = `noun:${textRepresentation}`;
|
|
149
|
+
if (this.cache.has(cacheKey)) {
|
|
150
|
+
return this.cache.get(cacheKey);
|
|
151
|
+
}
|
|
152
|
+
// Generate embedding for the input
|
|
153
|
+
const inputEmbedding = await this.embedder.embed(textRepresentation);
|
|
154
|
+
// Calculate similarities to all noun types
|
|
155
|
+
const similarities = [];
|
|
156
|
+
for (const [type, typeEmbedding] of this.nounEmbeddings.entries()) {
|
|
157
|
+
// Convert cosine distance to similarity (1 - distance)
|
|
158
|
+
const similarity = 1 - cosineDistance(inputEmbedding, typeEmbedding);
|
|
159
|
+
similarities.push({ type, similarity });
|
|
160
|
+
}
|
|
161
|
+
// Sort by similarity (highest first)
|
|
162
|
+
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
163
|
+
// Apply heuristic rules for common patterns
|
|
164
|
+
const heuristicType = this.applyNounHeuristics(obj);
|
|
165
|
+
if (heuristicType) {
|
|
166
|
+
// Boost the heuristic type's confidence
|
|
167
|
+
const heuristicIndex = similarities.findIndex(s => s.type === heuristicType);
|
|
168
|
+
if (heuristicIndex > 0) {
|
|
169
|
+
similarities[heuristicIndex].similarity *= 1.2; // 20% boost
|
|
170
|
+
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Create result
|
|
174
|
+
const result = {
|
|
175
|
+
type: similarities[0].type,
|
|
176
|
+
confidence: similarities[0].similarity,
|
|
177
|
+
reasoning: this.generateReasoning(obj, similarities[0].type, 'noun'),
|
|
178
|
+
alternatives: similarities.slice(1, 4).map(s => ({
|
|
179
|
+
type: s.type,
|
|
180
|
+
confidence: s.similarity
|
|
181
|
+
}))
|
|
182
|
+
};
|
|
183
|
+
// Cache result
|
|
184
|
+
this.cache.set(cacheKey, result);
|
|
185
|
+
return result;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Match a relationship to the most appropriate verb type
|
|
189
|
+
*/
|
|
190
|
+
async matchVerbType(sourceObj, targetObj, relationshipHint) {
|
|
191
|
+
await this.init();
|
|
192
|
+
// Create text representation of the relationship
|
|
193
|
+
const textRepresentation = this.createRelationshipText(sourceObj, targetObj, relationshipHint);
|
|
194
|
+
// Check cache
|
|
195
|
+
const cacheKey = `verb:${textRepresentation}`;
|
|
196
|
+
if (this.cache.has(cacheKey)) {
|
|
197
|
+
return this.cache.get(cacheKey);
|
|
198
|
+
}
|
|
199
|
+
// Generate embedding
|
|
200
|
+
const inputEmbedding = await this.embedder.embed(textRepresentation);
|
|
201
|
+
// Calculate similarities to all verb types
|
|
202
|
+
const similarities = [];
|
|
203
|
+
for (const [type, typeEmbedding] of this.verbEmbeddings.entries()) {
|
|
204
|
+
const similarity = 1 - cosineDistance(inputEmbedding, typeEmbedding);
|
|
205
|
+
similarities.push({ type, similarity });
|
|
206
|
+
}
|
|
207
|
+
// Sort by similarity
|
|
208
|
+
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
209
|
+
// Apply heuristic rules
|
|
210
|
+
const heuristicType = this.applyVerbHeuristics(sourceObj, targetObj, relationshipHint);
|
|
211
|
+
if (heuristicType) {
|
|
212
|
+
const heuristicIndex = similarities.findIndex(s => s.type === heuristicType);
|
|
213
|
+
if (heuristicIndex > 0) {
|
|
214
|
+
similarities[heuristicIndex].similarity *= 1.2;
|
|
215
|
+
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Create result
|
|
219
|
+
const result = {
|
|
220
|
+
type: similarities[0].type,
|
|
221
|
+
confidence: similarities[0].similarity,
|
|
222
|
+
reasoning: this.generateReasoning({ source: sourceObj, target: targetObj, hint: relationshipHint }, similarities[0].type, 'verb'),
|
|
223
|
+
alternatives: similarities.slice(1, 4).map(s => ({
|
|
224
|
+
type: s.type,
|
|
225
|
+
confidence: s.similarity
|
|
226
|
+
}))
|
|
227
|
+
};
|
|
228
|
+
// Cache result
|
|
229
|
+
this.cache.set(cacheKey, result);
|
|
230
|
+
return result;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Create text representation of an object for embedding
|
|
234
|
+
*/
|
|
235
|
+
createTextRepresentation(obj) {
|
|
236
|
+
const parts = [];
|
|
237
|
+
// Add type if available
|
|
238
|
+
if (typeof obj === 'object' && obj !== null) {
|
|
239
|
+
// Add field names and values
|
|
240
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
241
|
+
parts.push(key);
|
|
242
|
+
if (typeof value === 'string') {
|
|
243
|
+
parts.push(value.slice(0, 100)); // Limit string length
|
|
244
|
+
}
|
|
245
|
+
else if (typeof value === 'number' || typeof value === 'boolean') {
|
|
246
|
+
parts.push(String(value));
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Add special fields with higher weight
|
|
250
|
+
const importantFields = ['type', 'kind', 'category', 'class', 'name', 'title', 'description'];
|
|
251
|
+
for (const field of importantFields) {
|
|
252
|
+
if (obj[field]) {
|
|
253
|
+
parts.push(String(obj[field]));
|
|
254
|
+
parts.push(String(obj[field])); // Double weight for important fields
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
else if (typeof obj === 'string') {
|
|
259
|
+
parts.push(obj);
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
parts.push(String(obj));
|
|
263
|
+
}
|
|
264
|
+
return parts.join(' ');
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Create text representation of a relationship
|
|
268
|
+
*/
|
|
269
|
+
createRelationshipText(sourceObj, targetObj, relationshipHint) {
|
|
270
|
+
const parts = [];
|
|
271
|
+
if (relationshipHint) {
|
|
272
|
+
parts.push(relationshipHint);
|
|
273
|
+
parts.push(relationshipHint); // Double weight for explicit hint
|
|
274
|
+
}
|
|
275
|
+
// Add source context
|
|
276
|
+
if (sourceObj) {
|
|
277
|
+
parts.push('source:');
|
|
278
|
+
parts.push(this.getObjectSummary(sourceObj));
|
|
279
|
+
}
|
|
280
|
+
// Add target context
|
|
281
|
+
if (targetObj) {
|
|
282
|
+
parts.push('target:');
|
|
283
|
+
parts.push(this.getObjectSummary(targetObj));
|
|
284
|
+
}
|
|
285
|
+
return parts.join(' ');
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Get a brief summary of an object
|
|
289
|
+
*/
|
|
290
|
+
getObjectSummary(obj) {
|
|
291
|
+
if (typeof obj === 'string')
|
|
292
|
+
return obj.slice(0, 50);
|
|
293
|
+
if (typeof obj !== 'object' || obj === null)
|
|
294
|
+
return String(obj);
|
|
295
|
+
const summary = [];
|
|
296
|
+
const fields = ['type', 'name', 'title', 'id', 'category', 'kind'];
|
|
297
|
+
for (const field of fields) {
|
|
298
|
+
if (obj[field]) {
|
|
299
|
+
summary.push(String(obj[field]));
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return summary.join(' ').slice(0, 100);
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Apply heuristic rules for noun type detection
|
|
306
|
+
*/
|
|
307
|
+
applyNounHeuristics(obj) {
|
|
308
|
+
if (typeof obj !== 'object' || obj === null)
|
|
309
|
+
return null;
|
|
310
|
+
// Person heuristics
|
|
311
|
+
if (obj.email || obj.firstName || obj.lastName || obj.username || obj.age || obj.gender) {
|
|
312
|
+
return NounType.Person;
|
|
313
|
+
}
|
|
314
|
+
// Organization heuristics
|
|
315
|
+
if (obj.companyName || obj.organizationId || obj.employees || obj.industry) {
|
|
316
|
+
return NounType.Organization;
|
|
317
|
+
}
|
|
318
|
+
// Location heuristics
|
|
319
|
+
if (obj.latitude || obj.longitude || obj.address || obj.city || obj.country || obj.coordinates) {
|
|
320
|
+
return NounType.Location;
|
|
321
|
+
}
|
|
322
|
+
// Document heuristics
|
|
323
|
+
if (obj.content && (obj.title || obj.author) || obj.documentType || obj.pages) {
|
|
324
|
+
return NounType.Document;
|
|
325
|
+
}
|
|
326
|
+
// Event heuristics
|
|
327
|
+
if (obj.startTime || obj.endTime || obj.date || obj.eventType || obj.attendees) {
|
|
328
|
+
return NounType.Event;
|
|
329
|
+
}
|
|
330
|
+
// Product heuristics
|
|
331
|
+
if (obj.price || obj.sku || obj.inventory || obj.productId) {
|
|
332
|
+
return NounType.Product;
|
|
333
|
+
}
|
|
334
|
+
// Task heuristics
|
|
335
|
+
if (obj.status && (obj.assignee || obj.dueDate) || obj.priority || obj.completed !== undefined) {
|
|
336
|
+
return NounType.Task;
|
|
337
|
+
}
|
|
338
|
+
// Media heuristics
|
|
339
|
+
if (obj.url && (obj.url.match(/\.(jpg|jpeg|png|gif|mp4|mp3|wav)/i))) {
|
|
340
|
+
return NounType.Media;
|
|
341
|
+
}
|
|
342
|
+
// Dataset heuristics
|
|
343
|
+
if (Array.isArray(obj.data) || obj.rows || obj.columns || obj.schema) {
|
|
344
|
+
return NounType.Dataset;
|
|
345
|
+
}
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Apply heuristic rules for verb type detection
|
|
350
|
+
*/
|
|
351
|
+
applyVerbHeuristics(sourceObj, targetObj, relationshipHint) {
|
|
352
|
+
if (!relationshipHint)
|
|
353
|
+
return null;
|
|
354
|
+
const hint = relationshipHint.toLowerCase();
|
|
355
|
+
// Ownership patterns
|
|
356
|
+
if (hint.includes('own') || hint.includes('possess') || hint.includes('has')) {
|
|
357
|
+
return VerbType.Owns;
|
|
358
|
+
}
|
|
359
|
+
// Creation patterns
|
|
360
|
+
if (hint.includes('create') || hint.includes('made') || hint.includes('authored')) {
|
|
361
|
+
return VerbType.Creates;
|
|
362
|
+
}
|
|
363
|
+
// Containment patterns
|
|
364
|
+
if (hint.includes('contain') || hint.includes('include') || hint.includes('has')) {
|
|
365
|
+
return VerbType.Contains;
|
|
366
|
+
}
|
|
367
|
+
// Membership patterns
|
|
368
|
+
if (hint.includes('member') || hint.includes('belong') || hint.includes('part')) {
|
|
369
|
+
return VerbType.MemberOf;
|
|
370
|
+
}
|
|
371
|
+
// Reference patterns
|
|
372
|
+
if (hint.includes('refer') || hint.includes('cite') || hint.includes('link')) {
|
|
373
|
+
return VerbType.References;
|
|
374
|
+
}
|
|
375
|
+
// Dependency patterns
|
|
376
|
+
if (hint.includes('depend') || hint.includes('require') || hint.includes('need')) {
|
|
377
|
+
return VerbType.DependsOn;
|
|
378
|
+
}
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Generate human-readable reasoning for the type selection
|
|
383
|
+
*/
|
|
384
|
+
generateReasoning(obj, selectedType, typeKind) {
|
|
385
|
+
const descriptions = typeKind === 'noun' ? NOUN_TYPE_DESCRIPTIONS : VERB_TYPE_DESCRIPTIONS;
|
|
386
|
+
const typeDesc = descriptions[selectedType];
|
|
387
|
+
if (typeKind === 'noun') {
|
|
388
|
+
const fields = Object.keys(obj).slice(0, 3).join(', ');
|
|
389
|
+
return `Matched to ${selectedType} based on semantic similarity to "${typeDesc.split(' ').slice(0, 5).join(' ')}..." and object fields: ${fields}`;
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
return `Matched to ${selectedType} based on semantic similarity to "${typeDesc.split(' ').slice(0, 5).join(' ')}..." and relationship context`;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Clear the cache
|
|
397
|
+
*/
|
|
398
|
+
clearCache() {
|
|
399
|
+
this.cache.clear();
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Dispose of resources
|
|
403
|
+
*/
|
|
404
|
+
async dispose() {
|
|
405
|
+
await this.embedder.dispose();
|
|
406
|
+
this.cache.clear();
|
|
407
|
+
this.nounEmbeddings.clear();
|
|
408
|
+
this.verbEmbeddings.clear();
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Singleton instance for efficient reuse
|
|
413
|
+
*/
|
|
414
|
+
let globalMatcher = null;
|
|
415
|
+
/**
|
|
416
|
+
* Get or create the global type matcher instance
|
|
417
|
+
*/
|
|
418
|
+
export async function getTypeMatcher() {
|
|
419
|
+
if (!globalMatcher) {
|
|
420
|
+
globalMatcher = new IntelligentTypeMatcher();
|
|
421
|
+
await globalMatcher.init();
|
|
422
|
+
}
|
|
423
|
+
return globalMatcher;
|
|
424
|
+
}
|
|
425
|
+
//# sourceMappingURL=intelligentTypeMatcher.js.map
|
package/dist/brainyData.d.ts
CHANGED
|
@@ -424,6 +424,7 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
424
424
|
private _neural?;
|
|
425
425
|
private _tripleEngine?;
|
|
426
426
|
private _nlpProcessor?;
|
|
427
|
+
private _importManager?;
|
|
427
428
|
private cacheAutoConfigurator;
|
|
428
429
|
private timeoutConfig;
|
|
429
430
|
private retryConfig;
|
|
@@ -1490,11 +1491,10 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
1490
1491
|
* @param options Import options including type hints and processing mode
|
|
1491
1492
|
* @returns Array of created IDs
|
|
1492
1493
|
*/
|
|
1493
|
-
import(
|
|
1494
|
-
|
|
1495
|
-
autoDetect?: boolean;
|
|
1494
|
+
import(source: any[] | any | string | Buffer, options?: {
|
|
1495
|
+
format?: 'auto' | 'json' | 'csv' | 'yaml' | 'text';
|
|
1496
1496
|
batchSize?: number;
|
|
1497
|
-
|
|
1497
|
+
relationships?: boolean;
|
|
1498
1498
|
}): Promise<string[]>;
|
|
1499
1499
|
/**
|
|
1500
1500
|
* Add Noun - Explicit noun creation with strongly-typed NounType
|
package/dist/brainyData.js
CHANGED
|
@@ -5008,37 +5008,43 @@ export class BrainyData {
|
|
|
5008
5008
|
* @param options Import options including type hints and processing mode
|
|
5009
5009
|
* @returns Array of created IDs
|
|
5010
5010
|
*/
|
|
5011
|
-
async import(
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5026
|
-
|
|
5027
|
-
|
|
5028
|
-
|
|
5029
|
-
}
|
|
5030
|
-
|
|
5031
|
-
const id = await this.addNoun(item, metadata);
|
|
5032
|
-
results.push(id);
|
|
5033
|
-
}
|
|
5034
|
-
catch (error) {
|
|
5035
|
-
prodLog.warn(`Failed to import item:`, error);
|
|
5036
|
-
// Continue with next item rather than failing entire batch
|
|
5011
|
+
async import(source, options) {
|
|
5012
|
+
// Lazy-load import manager for zero overhead when not used
|
|
5013
|
+
if (!this._importManager) {
|
|
5014
|
+
const { ImportManager } = await import('./importManager.js');
|
|
5015
|
+
this._importManager = new ImportManager(this);
|
|
5016
|
+
await this._importManager.init();
|
|
5017
|
+
}
|
|
5018
|
+
// AUTO-DETECT: Is it a URL or file path?
|
|
5019
|
+
if (typeof source === 'string') {
|
|
5020
|
+
// URL detection
|
|
5021
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
5022
|
+
const result = await this._importManager.importUrl(source, options || {});
|
|
5023
|
+
return result.nouns;
|
|
5024
|
+
}
|
|
5025
|
+
// File path detection
|
|
5026
|
+
try {
|
|
5027
|
+
const { exists } = await import('./universal/fs.js');
|
|
5028
|
+
if (await exists(source)) {
|
|
5029
|
+
const result = await this._importManager.importFile(source, options || {});
|
|
5030
|
+
return result.nouns;
|
|
5037
5031
|
}
|
|
5038
5032
|
}
|
|
5033
|
+
catch { }
|
|
5039
5034
|
}
|
|
5040
|
-
|
|
5041
|
-
|
|
5035
|
+
// Regular data import (objects, arrays, or raw text)
|
|
5036
|
+
const result = await this._importManager.import(source, {
|
|
5037
|
+
format: options?.format || 'auto',
|
|
5038
|
+
batchSize: options?.batchSize || 50,
|
|
5039
|
+
extractRelationships: options?.relationships !== false,
|
|
5040
|
+
autoDetect: true, // Always intelligent
|
|
5041
|
+
parallel: true // Always fast
|
|
5042
|
+
});
|
|
5043
|
+
if (result.errors.length > 0) {
|
|
5044
|
+
prodLog.warn(`Import had ${result.errors.length} errors:`, result.errors[0]);
|
|
5045
|
+
}
|
|
5046
|
+
prodLog.info(`✨ Imported ${result.stats.imported} items, ${result.stats.relationships} relationships`);
|
|
5047
|
+
return result.nouns;
|
|
5042
5048
|
}
|
|
5043
5049
|
/**
|
|
5044
5050
|
* Add Noun - Explicit noun creation with strongly-typed NounType
|