@soulcraft/brainy 3.27.1 → 3.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ /**
2
+ * Smart JSON Importer
3
+ *
4
+ * Extracts entities and relationships from JSON files using:
5
+ * - Recursive traversal of nested structures
6
+ * - NeuralEntityExtractor for entity extraction from text values
7
+ * - NaturalLanguageProcessor for relationship inference
8
+ * - Hierarchical relationship creation (parent-child, contains, etc.)
9
+ *
10
+ * NO MOCKS - Production-ready implementation
11
+ */
12
+ import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
13
+ import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
14
+ import { NounType, VerbType } from '../types/graphTypes.js';
15
+ /**
16
+ * SmartJSONImporter - Extracts structured knowledge from JSON files
17
+ */
18
+ export class SmartJSONImporter {
19
+ constructor(brain) {
20
+ this.brain = brain;
21
+ this.extractor = new NeuralEntityExtractor(brain);
22
+ this.nlp = new NaturalLanguageProcessor(brain);
23
+ }
24
+ /**
25
+ * Initialize the importer
26
+ */
27
+ async init() {
28
+ await this.nlp.init();
29
+ }
30
+ /**
31
+ * Extract entities and relationships from JSON data
32
+ */
33
+ async extract(data, options = {}) {
34
+ const startTime = Date.now();
35
+ // Set defaults
36
+ const opts = {
37
+ enableNeuralExtraction: true,
38
+ enableHierarchicalRelationships: true,
39
+ enableConceptExtraction: true,
40
+ confidenceThreshold: 0.6,
41
+ maxDepth: 10,
42
+ minStringLength: 20,
43
+ nameKeys: ['name', 'title', 'label', 'id', 'key'],
44
+ descriptionKeys: ['description', 'desc', 'details', 'text', 'content', 'summary'],
45
+ typeKeys: ['type', 'kind', 'category', 'class'],
46
+ onProgress: () => { },
47
+ ...options
48
+ };
49
+ // Parse JSON if string
50
+ let jsonData;
51
+ if (typeof data === 'string') {
52
+ try {
53
+ jsonData = JSON.parse(data);
54
+ }
55
+ catch (error) {
56
+ throw new Error(`Invalid JSON: ${error instanceof Error ? error.message : String(error)}`);
57
+ }
58
+ }
59
+ else {
60
+ jsonData = data;
61
+ }
62
+ // Traverse and extract
63
+ const entities = [];
64
+ const relationships = [];
65
+ const entityMap = new Map();
66
+ const stats = {
67
+ byType: {},
68
+ byDepth: {},
69
+ byConfidence: { high: 0, medium: 0, low: 0 }
70
+ };
71
+ let nodesProcessed = 0;
72
+ // Recursive traversal
73
+ await this.traverseJSON(jsonData, '', null, 0, opts, entities, relationships, entityMap, stats, () => {
74
+ nodesProcessed++;
75
+ if (nodesProcessed % 10 === 0) {
76
+ opts.onProgress({
77
+ processed: nodesProcessed,
78
+ entities: entities.length,
79
+ relationships: relationships.length
80
+ });
81
+ }
82
+ });
83
+ return {
84
+ nodesProcessed,
85
+ entitiesExtracted: entities.length,
86
+ relationshipsInferred: relationships.length,
87
+ entities,
88
+ relationships,
89
+ entityMap,
90
+ processingTime: Date.now() - startTime,
91
+ stats
92
+ };
93
+ }
94
+ /**
95
+ * Recursively traverse JSON structure
96
+ */
97
+ async traverseJSON(node, path, parentPath, depth, options, entities, relationships, entityMap, stats, onNode) {
98
+ // Stop if max depth reached
99
+ if (depth > options.maxDepth)
100
+ return;
101
+ onNode();
102
+ stats.byDepth[depth] = (stats.byDepth[depth] || 0) + 1;
103
+ // Handle null/undefined
104
+ if (node === null || node === undefined)
105
+ return;
106
+ // Handle arrays
107
+ if (Array.isArray(node)) {
108
+ for (let i = 0; i < node.length; i++) {
109
+ await this.traverseJSON(node[i], `${path}[${i}]`, path, depth + 1, options, entities, relationships, entityMap, stats, onNode);
110
+ }
111
+ return;
112
+ }
113
+ // Handle objects
114
+ if (typeof node === 'object') {
115
+ // Extract entity from this object
116
+ const entity = await this.extractEntityFromObject(node, path, parentPath, depth, options, stats);
117
+ if (entity) {
118
+ entities.push(entity);
119
+ entityMap.set(path, entity.id);
120
+ // Create hierarchical relationship if parent exists
121
+ if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
122
+ const parentId = entityMap.get(parentPath);
123
+ relationships.push({
124
+ from: parentId,
125
+ to: entity.id,
126
+ type: VerbType.Contains,
127
+ confidence: 0.95,
128
+ evidence: `Hierarchical relationship: ${parentPath} contains ${path}`
129
+ });
130
+ }
131
+ }
132
+ // Traverse child properties
133
+ for (const [key, value] of Object.entries(node)) {
134
+ const childPath = path ? `${path}.${key}` : key;
135
+ await this.traverseJSON(value, childPath, path, depth + 1, options, entities, relationships, entityMap, stats, onNode);
136
+ }
137
+ return;
138
+ }
139
+ // Handle primitive values (strings)
140
+ if (typeof node === 'string' && node.length >= options.minStringLength) {
141
+ // Extract entities from text
142
+ if (options.enableNeuralExtraction) {
143
+ const extractedEntities = await this.extractor.extract(node, {
144
+ confidence: options.confidenceThreshold,
145
+ neuralMatching: true,
146
+ cache: { enabled: true }
147
+ });
148
+ for (const extracted of extractedEntities) {
149
+ const entity = {
150
+ id: this.generateEntityId(extracted.text, path),
151
+ name: extracted.text,
152
+ type: extracted.type,
153
+ description: node,
154
+ confidence: extracted.confidence,
155
+ path,
156
+ parentPath,
157
+ metadata: {
158
+ source: 'json',
159
+ depth,
160
+ extractedAt: Date.now()
161
+ }
162
+ };
163
+ entities.push(entity);
164
+ this.updateStats(stats, entity.type, entity.confidence, depth);
165
+ // Link to parent if exists
166
+ if (options.enableHierarchicalRelationships && parentPath && entityMap.has(parentPath)) {
167
+ const parentId = entityMap.get(parentPath);
168
+ relationships.push({
169
+ from: parentId,
170
+ to: entity.id,
171
+ type: VerbType.RelatedTo,
172
+ confidence: extracted.confidence * 0.9,
173
+ evidence: `Found in: ${path}`
174
+ });
175
+ }
176
+ }
177
+ }
178
+ }
179
+ }
180
+ /**
181
+ * Extract entity from JSON object
182
+ */
183
+ async extractEntityFromObject(obj, path, parentPath, depth, options, stats) {
184
+ // Find name
185
+ const name = this.findValue(obj, options.nameKeys);
186
+ if (!name)
187
+ return null;
188
+ // Find description
189
+ const description = this.findValue(obj, options.descriptionKeys) || name;
190
+ // Find type
191
+ const typeString = this.findValue(obj, options.typeKeys);
192
+ const type = typeString ? this.mapTypeString(typeString) : this.inferTypeFromStructure(obj);
193
+ // Extract concepts if enabled
194
+ let concepts = [];
195
+ if (options.enableConceptExtraction && description.length > 0) {
196
+ try {
197
+ concepts = await this.brain.extractConcepts(description, { limit: 10 });
198
+ }
199
+ catch (error) {
200
+ concepts = [];
201
+ }
202
+ }
203
+ const entity = {
204
+ id: this.generateEntityId(name, path),
205
+ name,
206
+ type,
207
+ description,
208
+ confidence: 0.9, // Objects with explicit structure have high confidence
209
+ path,
210
+ parentPath,
211
+ metadata: {
212
+ source: 'json',
213
+ depth,
214
+ originalObject: obj,
215
+ concepts,
216
+ extractedAt: Date.now()
217
+ }
218
+ };
219
+ this.updateStats(stats, entity.type, entity.confidence, depth);
220
+ return entity;
221
+ }
222
+ /**
223
+ * Find value in object by key patterns
224
+ */
225
+ findValue(obj, keys) {
226
+ for (const key of keys) {
227
+ if (obj[key] !== undefined && obj[key] !== null) {
228
+ const value = String(obj[key]).trim();
229
+ if (value.length > 0) {
230
+ return value;
231
+ }
232
+ }
233
+ }
234
+ // Try case-insensitive match
235
+ for (const key of keys) {
236
+ const found = Object.keys(obj).find(k => k.toLowerCase() === key.toLowerCase());
237
+ if (found && obj[found] !== undefined && obj[found] !== null) {
238
+ const value = String(obj[found]).trim();
239
+ if (value.length > 0) {
240
+ return value;
241
+ }
242
+ }
243
+ }
244
+ return null;
245
+ }
246
+ /**
247
+ * Infer type from JSON structure
248
+ */
249
+ inferTypeFromStructure(obj) {
250
+ const keys = Object.keys(obj).map(k => k.toLowerCase());
251
+ // Check for common patterns
252
+ if (keys.some(k => k.includes('person') || k.includes('user') || k.includes('author'))) {
253
+ return NounType.Person;
254
+ }
255
+ if (keys.some(k => k.includes('location') || k.includes('place') || k.includes('address'))) {
256
+ return NounType.Location;
257
+ }
258
+ if (keys.some(k => k.includes('organization') || k.includes('company') || k.includes('org'))) {
259
+ return NounType.Organization;
260
+ }
261
+ if (keys.some(k => k.includes('event') || k.includes('date') || k.includes('time'))) {
262
+ return NounType.Event;
263
+ }
264
+ if (keys.some(k => k.includes('project') || k.includes('task'))) {
265
+ return NounType.Project;
266
+ }
267
+ if (keys.some(k => k.includes('document') || k.includes('file') || k.includes('url'))) {
268
+ return NounType.Document;
269
+ }
270
+ return NounType.Thing;
271
+ }
272
+ /**
273
+ * Map type string to NounType
274
+ */
275
+ mapTypeString(typeString) {
276
+ const normalized = typeString.toLowerCase().trim();
277
+ const mapping = {
278
+ 'person': NounType.Person,
279
+ 'user': NounType.Person,
280
+ 'character': NounType.Person,
281
+ 'place': NounType.Location,
282
+ 'location': NounType.Location,
283
+ 'organization': NounType.Organization,
284
+ 'company': NounType.Organization,
285
+ 'org': NounType.Organization,
286
+ 'concept': NounType.Concept,
287
+ 'idea': NounType.Concept,
288
+ 'event': NounType.Event,
289
+ 'product': NounType.Product,
290
+ 'item': NounType.Product,
291
+ 'document': NounType.Document,
292
+ 'file': NounType.File,
293
+ 'project': NounType.Project,
294
+ 'thing': NounType.Thing
295
+ };
296
+ return mapping[normalized] || NounType.Thing;
297
+ }
298
+ /**
299
+ * Generate consistent entity ID
300
+ */
301
+ generateEntityId(name, path) {
302
+ const normalized = name.toLowerCase().trim().replace(/\s+/g, '_');
303
+ const pathNorm = path.replace(/[^a-zA-Z0-9]/g, '_');
304
+ return `ent_${normalized}_${pathNorm}_${Date.now()}`;
305
+ }
306
+ /**
307
+ * Update statistics
308
+ */
309
+ updateStats(stats, type, confidence, depth) {
310
+ // Track by type
311
+ const typeName = String(type);
312
+ stats.byType[typeName] = (stats.byType[typeName] || 0) + 1;
313
+ // Track by confidence
314
+ if (confidence > 0.8) {
315
+ stats.byConfidence.high++;
316
+ }
317
+ else if (confidence >= 0.6) {
318
+ stats.byConfidence.medium++;
319
+ }
320
+ else {
321
+ stats.byConfidence.low++;
322
+ }
323
+ }
324
+ }
325
+ //# sourceMappingURL=SmartJSONImporter.js.map
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Smart Markdown Importer
3
+ *
4
+ * Extracts entities and relationships from Markdown files using:
5
+ * - Heading structure for entity organization
6
+ * - Link relationships
7
+ * - NeuralEntityExtractor for entity extraction from text
8
+ * - Section-based grouping
9
+ *
10
+ * NO MOCKS - Production-ready implementation
11
+ */
12
+ import { Brainy } from '../brainy.js';
13
+ import { NounType, VerbType } from '../types/graphTypes.js';
14
+ export interface SmartMarkdownOptions {
15
+ /** Enable neural entity extraction from text */
16
+ enableNeuralExtraction?: boolean;
17
+ /** Enable relationship inference */
18
+ enableRelationshipInference?: boolean;
19
+ /** Enable concept extraction for tagging */
20
+ enableConceptExtraction?: boolean;
21
+ /** Confidence threshold for entities (0-1) */
22
+ confidenceThreshold?: number;
23
+ /** Extract code blocks as entities */
24
+ extractCodeBlocks?: boolean;
25
+ /** Minimum section text length to process */
26
+ minSectionLength?: number;
27
+ /** Group by heading level */
28
+ groupByHeading?: boolean;
29
+ /** Progress callback */
30
+ onProgress?: (stats: {
31
+ processed: number;
32
+ total: number;
33
+ entities: number;
34
+ relationships: number;
35
+ }) => void;
36
+ }
37
+ export interface MarkdownSection {
38
+ /** Section ID */
39
+ id: string;
40
+ /** Heading text (if this section has a heading) */
41
+ heading: string | null;
42
+ /** Heading level (1-6) */
43
+ level: number;
44
+ /** Section content */
45
+ content: string;
46
+ /** Entities extracted from this section */
47
+ entities: Array<{
48
+ id: string;
49
+ name: string;
50
+ type: NounType;
51
+ description: string;
52
+ confidence: number;
53
+ metadata: Record<string, any>;
54
+ }>;
55
+ /** Links found in this section */
56
+ links: Array<{
57
+ text: string;
58
+ url: string;
59
+ type: 'internal' | 'external';
60
+ }>;
61
+ /** Code blocks in this section */
62
+ codeBlocks?: Array<{
63
+ language: string;
64
+ code: string;
65
+ }>;
66
+ /** Relationships */
67
+ relationships: Array<{
68
+ from: string;
69
+ to: string;
70
+ type: VerbType;
71
+ confidence: number;
72
+ evidence: string;
73
+ }>;
74
+ /** Concepts */
75
+ concepts?: string[];
76
+ }
77
+ export interface SmartMarkdownResult {
78
+ /** Total sections processed */
79
+ sectionsProcessed: number;
80
+ /** Entities extracted */
81
+ entitiesExtracted: number;
82
+ /** Relationships inferred */
83
+ relationshipsInferred: number;
84
+ /** All extracted sections */
85
+ sections: MarkdownSection[];
86
+ /** Entity ID mapping (name -> ID) */
87
+ entityMap: Map<string, string>;
88
+ /** Processing time in ms */
89
+ processingTime: number;
90
+ /** Extraction statistics */
91
+ stats: {
92
+ byType: Record<string, number>;
93
+ byHeadingLevel: Record<number, number>;
94
+ byConfidence: {
95
+ high: number;
96
+ medium: number;
97
+ low: number;
98
+ };
99
+ linksFound: number;
100
+ codeBlocksFound: number;
101
+ };
102
+ }
103
+ /**
104
+ * SmartMarkdownImporter - Extracts structured knowledge from Markdown files
105
+ */
106
+ export declare class SmartMarkdownImporter {
107
+ private brain;
108
+ private extractor;
109
+ private nlp;
110
+ constructor(brain: Brainy);
111
+ /**
112
+ * Initialize the importer
113
+ */
114
+ init(): Promise<void>;
115
+ /**
116
+ * Extract entities and relationships from Markdown content
117
+ */
118
+ extract(markdown: string, options?: SmartMarkdownOptions): Promise<SmartMarkdownResult>;
119
+ /**
120
+ * Parse markdown into sections
121
+ */
122
+ private parseMarkdown;
123
+ /**
124
+ * Process a single section
125
+ */
126
+ private processSection;
127
+ /**
128
+ * Extract markdown links
129
+ */
130
+ private extractLinks;
131
+ /**
132
+ * Extract code blocks
133
+ */
134
+ private extractCodeBlocks;
135
+ /**
136
+ * Remove code blocks from content
137
+ */
138
+ private removeCodeBlocks;
139
+ /**
140
+ * Infer type from heading
141
+ */
142
+ private inferTypeFromHeading;
143
+ /**
144
+ * Check if entities are related by proximity
145
+ */
146
+ private entitiesAreRelated;
147
+ /**
148
+ * Infer relationship type from context
149
+ */
150
+ private inferRelationship;
151
+ /**
152
+ * Generate consistent entity ID
153
+ */
154
+ private generateEntityId;
155
+ /**
156
+ * Update statistics
157
+ */
158
+ private updateStats;
159
+ }