@soulcraft/brainy 4.1.3 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -7
- package/dist/brainy.d.ts +74 -16
- package/dist/brainy.js +74 -16
- package/dist/import/FormatDetector.d.ts +6 -1
- package/dist/import/FormatDetector.js +40 -1
- package/dist/import/ImportCoordinator.d.ts +155 -5
- package/dist/import/ImportCoordinator.js +346 -6
- package/dist/import/InstancePool.d.ts +136 -0
- package/dist/import/InstancePool.js +231 -0
- package/dist/importers/SmartCSVImporter.d.ts +2 -1
- package/dist/importers/SmartCSVImporter.js +11 -22
- package/dist/importers/SmartDOCXImporter.d.ts +125 -0
- package/dist/importers/SmartDOCXImporter.js +227 -0
- package/dist/importers/SmartExcelImporter.d.ts +12 -1
- package/dist/importers/SmartExcelImporter.js +40 -25
- package/dist/importers/SmartJSONImporter.d.ts +1 -0
- package/dist/importers/SmartJSONImporter.js +25 -6
- package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
- package/dist/importers/SmartMarkdownImporter.js +11 -16
- package/dist/importers/SmartPDFImporter.d.ts +2 -1
- package/dist/importers/SmartPDFImporter.js +11 -22
- package/dist/importers/SmartYAMLImporter.d.ts +121 -0
- package/dist/importers/SmartYAMLImporter.js +275 -0
- package/dist/importers/VFSStructureGenerator.js +12 -0
- package/dist/neural/SmartExtractor.d.ts +279 -0
- package/dist/neural/SmartExtractor.js +592 -0
- package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
- package/dist/neural/SmartRelationshipExtractor.js +396 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/neural/entityExtractor.d.ts +3 -0
- package/dist/neural/entityExtractor.js +34 -36
- package/dist/neural/presets.d.ts +189 -0
- package/dist/neural/presets.js +365 -0
- package/dist/neural/signals/ContextSignal.d.ts +166 -0
- package/dist/neural/signals/ContextSignal.js +646 -0
- package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
- package/dist/neural/signals/EmbeddingSignal.js +435 -0
- package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
- package/dist/neural/signals/ExactMatchSignal.js +542 -0
- package/dist/neural/signals/PatternSignal.d.ts +159 -0
- package/dist/neural/signals/PatternSignal.js +478 -0
- package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
- package/dist/neural/signals/VerbContextSignal.js +390 -0
- package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
- package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
- package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
- package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
- package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
- package/dist/neural/signals/VerbPatternSignal.js +457 -0
- package/dist/types/graphTypes.d.ts +2 -0
- package/package.json +4 -1
|
@@ -17,6 +17,8 @@ import { SmartPDFImporter } from '../importers/SmartPDFImporter.js';
|
|
|
17
17
|
import { SmartCSVImporter } from '../importers/SmartCSVImporter.js';
|
|
18
18
|
import { SmartJSONImporter } from '../importers/SmartJSONImporter.js';
|
|
19
19
|
import { SmartMarkdownImporter } from '../importers/SmartMarkdownImporter.js';
|
|
20
|
+
import { SmartYAMLImporter } from '../importers/SmartYAMLImporter.js';
|
|
21
|
+
import { SmartDOCXImporter } from '../importers/SmartDOCXImporter.js';
|
|
20
22
|
import { VFSStructureGenerator } from '../importers/VFSStructureGenerator.js';
|
|
21
23
|
import { NounType } from '../types/graphTypes.js';
|
|
22
24
|
import { v4 as uuidv4 } from '../universal/uuid.js';
|
|
@@ -36,6 +38,8 @@ export class ImportCoordinator {
|
|
|
36
38
|
this.csvImporter = new SmartCSVImporter(brain);
|
|
37
39
|
this.jsonImporter = new SmartJSONImporter(brain);
|
|
38
40
|
this.markdownImporter = new SmartMarkdownImporter(brain);
|
|
41
|
+
this.yamlImporter = new SmartYAMLImporter(brain);
|
|
42
|
+
this.docxImporter = new SmartDOCXImporter(brain);
|
|
39
43
|
this.vfsGenerator = new VFSStructureGenerator(brain);
|
|
40
44
|
}
|
|
41
45
|
/**
|
|
@@ -47,6 +51,8 @@ export class ImportCoordinator {
|
|
|
47
51
|
await this.csvImporter.init();
|
|
48
52
|
await this.jsonImporter.init();
|
|
49
53
|
await this.markdownImporter.init();
|
|
54
|
+
await this.yamlImporter.init();
|
|
55
|
+
await this.docxImporter.init();
|
|
50
56
|
await this.vfsGenerator.init();
|
|
51
57
|
await this.history.init();
|
|
52
58
|
}
|
|
@@ -58,12 +64,15 @@ export class ImportCoordinator {
|
|
|
58
64
|
}
|
|
59
65
|
/**
|
|
60
66
|
* Import from any source with auto-detection
|
|
67
|
+
* v4.2.0: Now supports URL imports with authentication
|
|
61
68
|
*/
|
|
62
69
|
async import(source, options = {}) {
|
|
63
70
|
const startTime = Date.now();
|
|
64
71
|
const importId = uuidv4();
|
|
65
|
-
//
|
|
66
|
-
|
|
72
|
+
// Validate options (v4.0.0+: Reject deprecated v3.x options)
|
|
73
|
+
this.validateOptions(options);
|
|
74
|
+
// Normalize source (v4.2.0: handles URL fetching)
|
|
75
|
+
const normalizedSource = await this.normalizeSource(source, options.format);
|
|
67
76
|
// Report detection stage
|
|
68
77
|
options.onProgress?.({
|
|
69
78
|
stage: 'detecting',
|
|
@@ -168,8 +177,16 @@ export class ImportCoordinator {
|
|
|
168
177
|
}
|
|
169
178
|
/**
|
|
170
179
|
* Normalize source to ImportSource
|
|
180
|
+
* v4.2.0: Now async to support URL fetching
|
|
171
181
|
*/
|
|
172
|
-
normalizeSource(source, formatHint) {
|
|
182
|
+
async normalizeSource(source, formatHint) {
|
|
183
|
+
// If already an ImportSource, handle URL fetching if needed
|
|
184
|
+
if (this.isImportSource(source)) {
|
|
185
|
+
if (source.type === 'url') {
|
|
186
|
+
return await this.fetchUrl(source);
|
|
187
|
+
}
|
|
188
|
+
return source;
|
|
189
|
+
}
|
|
173
190
|
// Buffer
|
|
174
191
|
if (Buffer.isBuffer(source)) {
|
|
175
192
|
return {
|
|
@@ -177,8 +194,15 @@ export class ImportCoordinator {
|
|
|
177
194
|
data: source
|
|
178
195
|
};
|
|
179
196
|
}
|
|
180
|
-
// String - could be path or content
|
|
197
|
+
// String - could be URL, path, or content
|
|
181
198
|
if (typeof source === 'string') {
|
|
199
|
+
// Check if it's a URL
|
|
200
|
+
if (this.isUrl(source)) {
|
|
201
|
+
return await this.fetchUrl({
|
|
202
|
+
type: 'url',
|
|
203
|
+
data: source
|
|
204
|
+
});
|
|
205
|
+
}
|
|
182
206
|
// Check if it's a file path
|
|
183
207
|
if (this.isFilePath(source)) {
|
|
184
208
|
const buffer = fs.readFileSync(source);
|
|
@@ -201,7 +225,73 @@ export class ImportCoordinator {
|
|
|
201
225
|
data: source
|
|
202
226
|
};
|
|
203
227
|
}
|
|
204
|
-
throw new Error('Invalid source type. Expected Buffer, string, or
|
|
228
|
+
throw new Error('Invalid source type. Expected Buffer, string, object, or ImportSource.');
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Check if value is an ImportSource object
|
|
232
|
+
*/
|
|
233
|
+
isImportSource(value) {
|
|
234
|
+
return value && typeof value === 'object' && 'type' in value && 'data' in value;
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Check if string is a URL
|
|
238
|
+
*/
|
|
239
|
+
isUrl(str) {
|
|
240
|
+
try {
|
|
241
|
+
const url = new URL(str);
|
|
242
|
+
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
243
|
+
}
|
|
244
|
+
catch {
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Fetch content from URL
|
|
250
|
+
* v4.2.0: Supports authentication and custom headers
|
|
251
|
+
*/
|
|
252
|
+
async fetchUrl(source) {
|
|
253
|
+
const url = typeof source.data === 'string' ? source.data : String(source.data);
|
|
254
|
+
// Build headers
|
|
255
|
+
const headers = {
|
|
256
|
+
'User-Agent': 'Brainy/4.2.0',
|
|
257
|
+
...(source.headers || {})
|
|
258
|
+
};
|
|
259
|
+
// Add basic auth if provided
|
|
260
|
+
if (source.auth) {
|
|
261
|
+
const credentials = Buffer.from(`${source.auth.username}:${source.auth.password}`).toString('base64');
|
|
262
|
+
headers['Authorization'] = `Basic ${credentials}`;
|
|
263
|
+
}
|
|
264
|
+
try {
|
|
265
|
+
const response = await fetch(url, { headers });
|
|
266
|
+
if (!response.ok) {
|
|
267
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
268
|
+
}
|
|
269
|
+
// Get filename from URL or Content-Disposition header
|
|
270
|
+
const contentDisposition = response.headers.get('content-disposition');
|
|
271
|
+
let filename = source.filename;
|
|
272
|
+
if (contentDisposition) {
|
|
273
|
+
const match = contentDisposition.match(/filename=["']?([^"';]+)["']?/);
|
|
274
|
+
if (match)
|
|
275
|
+
filename = match[1];
|
|
276
|
+
}
|
|
277
|
+
if (!filename) {
|
|
278
|
+
filename = new URL(url).pathname.split('/').pop() || 'download';
|
|
279
|
+
}
|
|
280
|
+
// Get content type for format hint
|
|
281
|
+
const contentType = response.headers.get('content-type');
|
|
282
|
+
// Convert response to buffer
|
|
283
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
284
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
285
|
+
return {
|
|
286
|
+
type: 'buffer',
|
|
287
|
+
data: buffer,
|
|
288
|
+
filename,
|
|
289
|
+
headers: { 'content-type': contentType || 'application/octet-stream' }
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
catch (error) {
|
|
293
|
+
throw new Error(`Failed to fetch URL ${url}: ${error.message}`);
|
|
294
|
+
}
|
|
205
295
|
}
|
|
206
296
|
/**
|
|
207
297
|
* Check if string is a file path
|
|
@@ -233,6 +323,12 @@ export class ImportCoordinator {
|
|
|
233
323
|
return this.detector.detectFromString(source.data);
|
|
234
324
|
case 'object':
|
|
235
325
|
return this.detector.detectFromObject(source.data);
|
|
326
|
+
case 'url':
|
|
327
|
+
// URL sources are converted to buffers in normalizeSource()
|
|
328
|
+
// This should never be reached, but included for type safety
|
|
329
|
+
return null;
|
|
330
|
+
default:
|
|
331
|
+
return null;
|
|
236
332
|
}
|
|
237
333
|
}
|
|
238
334
|
/**
|
|
@@ -288,6 +384,18 @@ export class ImportCoordinator {
|
|
|
288
384
|
? source.data
|
|
289
385
|
: source.data.toString('utf8');
|
|
290
386
|
return await this.markdownImporter.extract(mdContent, extractOptions);
|
|
387
|
+
case 'yaml':
|
|
388
|
+
const yamlContent = source.type === 'string'
|
|
389
|
+
? source.data
|
|
390
|
+
: source.type === 'buffer' || source.type === 'path'
|
|
391
|
+
? source.data.toString('utf8')
|
|
392
|
+
: JSON.stringify(source.data);
|
|
393
|
+
return await this.yamlImporter.extract(yamlContent, extractOptions);
|
|
394
|
+
case 'docx':
|
|
395
|
+
const docxBuffer = source.type === 'buffer' || source.type === 'path'
|
|
396
|
+
? source.data
|
|
397
|
+
: Buffer.from(JSON.stringify(source.data));
|
|
398
|
+
return await this.docxImporter.extract(docxBuffer, extractOptions);
|
|
291
399
|
default:
|
|
292
400
|
throw new Error(`Unsupported format: ${format}`);
|
|
293
401
|
}
|
|
@@ -305,6 +413,17 @@ export class ImportCoordinator {
|
|
|
305
413
|
}
|
|
306
414
|
// Extract rows/sections/entities from result (unified across formats)
|
|
307
415
|
const rows = extractionResult.rows || extractionResult.sections || extractionResult.entities || [];
|
|
416
|
+
// Progressive flush interval - adjusts based on current count (v4.2.0+)
|
|
417
|
+
// Starts at 100, increases to 1000 at 1K entities, then 5000 at 10K
|
|
418
|
+
// This works for both known totals (files) and unknown totals (streaming APIs)
|
|
419
|
+
let currentFlushInterval = 100; // Start with frequent updates for better UX
|
|
420
|
+
let entitiesSinceFlush = 0;
|
|
421
|
+
let totalFlushes = 0;
|
|
422
|
+
console.log(`📊 Streaming Import: Progressive flush intervals\n` +
|
|
423
|
+
` Starting interval: Every ${currentFlushInterval} entities\n` +
|
|
424
|
+
` Auto-adjusts: 100 → 1000 (at 1K entities) → 5000 (at 10K entities)\n` +
|
|
425
|
+
` Benefits: Live queries, crash resilience, frequent early updates\n` +
|
|
426
|
+
` Works with: Known totals (files) and unknown totals (streaming APIs)`);
|
|
308
427
|
// Smart deduplication auto-disable for large imports (prevents O(n²) performance)
|
|
309
428
|
const DEDUPLICATION_AUTO_DISABLE_THRESHOLD = 100;
|
|
310
429
|
let actuallyEnableDeduplication = options.enableDeduplication;
|
|
@@ -428,8 +547,9 @@ export class ImportCoordinator {
|
|
|
428
547
|
from: entityId,
|
|
429
548
|
to: targetEntityId,
|
|
430
549
|
type: rel.type,
|
|
550
|
+
confidence: rel.confidence, // v4.2.0: Top-level field
|
|
551
|
+
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
431
552
|
metadata: {
|
|
432
|
-
confidence: rel.confidence,
|
|
433
553
|
evidence: rel.evidence,
|
|
434
554
|
importedAt: Date.now()
|
|
435
555
|
}
|
|
@@ -441,12 +561,58 @@ export class ImportCoordinator {
|
|
|
441
561
|
}
|
|
442
562
|
}
|
|
443
563
|
}
|
|
564
|
+
// Streaming import: Progressive flush with dynamic interval adjustment (v4.2.0+)
|
|
565
|
+
entitiesSinceFlush++;
|
|
566
|
+
if (entitiesSinceFlush >= currentFlushInterval) {
|
|
567
|
+
const flushStart = Date.now();
|
|
568
|
+
await this.brain.flush();
|
|
569
|
+
const flushDuration = Date.now() - flushStart;
|
|
570
|
+
totalFlushes++;
|
|
571
|
+
// Reset counter
|
|
572
|
+
entitiesSinceFlush = 0;
|
|
573
|
+
// Recalculate flush interval based on current entity count
|
|
574
|
+
const newInterval = this.getProgressiveFlushInterval(entities.length);
|
|
575
|
+
if (newInterval !== currentFlushInterval) {
|
|
576
|
+
console.log(`📊 Flush interval adjusted: ${currentFlushInterval} → ${newInterval}\n` +
|
|
577
|
+
` Reason: Reached ${entities.length} entities (threshold for next tier)\n` +
|
|
578
|
+
` Impact: ${newInterval > currentFlushInterval ? 'Fewer' : 'More'} flushes = ${newInterval > currentFlushInterval ? 'Better performance' : 'More frequent updates'}`);
|
|
579
|
+
currentFlushInterval = newInterval;
|
|
580
|
+
}
|
|
581
|
+
// Notify progress callback that data is now queryable
|
|
582
|
+
await options.onProgress?.({
|
|
583
|
+
stage: 'storing-graph',
|
|
584
|
+
message: `Flushed indexes (${entities.length}/${rows.length} entities, ${flushDuration}ms)`,
|
|
585
|
+
processed: entities.length,
|
|
586
|
+
total: rows.length,
|
|
587
|
+
entities: entities.length,
|
|
588
|
+
queryable: true // ← Indexes are flushed, data is queryable!
|
|
589
|
+
});
|
|
590
|
+
}
|
|
444
591
|
}
|
|
445
592
|
catch (error) {
|
|
446
593
|
// Skip entity creation errors (might already exist, etc.)
|
|
447
594
|
continue;
|
|
448
595
|
}
|
|
449
596
|
}
|
|
597
|
+
// Final flush for any remaining entities
|
|
598
|
+
if (entitiesSinceFlush > 0) {
|
|
599
|
+
const flushStart = Date.now();
|
|
600
|
+
await this.brain.flush();
|
|
601
|
+
const flushDuration = Date.now() - flushStart;
|
|
602
|
+
totalFlushes++;
|
|
603
|
+
console.log(`✅ Import complete: ${entities.length} entities processed\n` +
|
|
604
|
+
` Total flushes: ${totalFlushes}\n` +
|
|
605
|
+
` Final flush: ${flushDuration}ms\n` +
|
|
606
|
+
` Average overhead: ~${((totalFlushes * 50) / (entities.length * 100) * 100).toFixed(2)}%`);
|
|
607
|
+
await options.onProgress?.({
|
|
608
|
+
stage: 'storing-graph',
|
|
609
|
+
message: `Final flush complete (${entities.length} entities)`,
|
|
610
|
+
processed: entities.length,
|
|
611
|
+
total: rows.length,
|
|
612
|
+
entities: entities.length,
|
|
613
|
+
queryable: true
|
|
614
|
+
});
|
|
615
|
+
}
|
|
450
616
|
// Batch create all relationships using brain.relateMany() for performance
|
|
451
617
|
if (options.createRelationships && relationships.length > 0) {
|
|
452
618
|
try {
|
|
@@ -555,8 +721,182 @@ export class ImportCoordinator {
|
|
|
555
721
|
stats: result.stats
|
|
556
722
|
};
|
|
557
723
|
}
|
|
724
|
+
// YAML: entities -> rows (v4.2.0)
|
|
725
|
+
if (format === 'yaml') {
|
|
726
|
+
const rows = result.entities.map((entity) => ({
|
|
727
|
+
entity,
|
|
728
|
+
relatedEntities: [],
|
|
729
|
+
relationships: result.relationships.filter((r) => r.from === entity.id),
|
|
730
|
+
concepts: entity.metadata?.concepts || []
|
|
731
|
+
}));
|
|
732
|
+
return {
|
|
733
|
+
rowsProcessed: result.nodesProcessed,
|
|
734
|
+
entitiesExtracted: result.entitiesExtracted,
|
|
735
|
+
relationshipsInferred: result.relationshipsInferred,
|
|
736
|
+
rows,
|
|
737
|
+
entityMap: result.entityMap,
|
|
738
|
+
processingTime: result.processingTime,
|
|
739
|
+
stats: result.stats
|
|
740
|
+
};
|
|
741
|
+
}
|
|
742
|
+
// DOCX: entities -> rows (v4.2.0)
|
|
743
|
+
if (format === 'docx') {
|
|
744
|
+
const rows = result.entities.map((entity) => ({
|
|
745
|
+
entity,
|
|
746
|
+
relatedEntities: [],
|
|
747
|
+
relationships: result.relationships.filter((r) => r.from === entity.id),
|
|
748
|
+
concepts: entity.metadata?.concepts || []
|
|
749
|
+
}));
|
|
750
|
+
return {
|
|
751
|
+
rowsProcessed: result.paragraphsProcessed,
|
|
752
|
+
entitiesExtracted: result.entitiesExtracted,
|
|
753
|
+
relationshipsInferred: result.relationshipsInferred,
|
|
754
|
+
rows,
|
|
755
|
+
entityMap: result.entityMap,
|
|
756
|
+
processingTime: result.processingTime,
|
|
757
|
+
stats: result.stats
|
|
758
|
+
};
|
|
759
|
+
}
|
|
558
760
|
// Fallback: return as-is
|
|
559
761
|
return result;
|
|
560
762
|
}
|
|
763
|
+
/**
|
|
764
|
+
* Validate options and reject deprecated v3.x options (v4.0.0+)
|
|
765
|
+
* Throws clear errors with migration guidance
|
|
766
|
+
*/
|
|
767
|
+
validateOptions(options) {
|
|
768
|
+
const invalidOptions = [];
|
|
769
|
+
// Check for v3.x deprecated options
|
|
770
|
+
if ('extractRelationships' in options) {
|
|
771
|
+
invalidOptions.push({
|
|
772
|
+
old: 'extractRelationships',
|
|
773
|
+
new: 'enableRelationshipInference',
|
|
774
|
+
message: 'Option renamed for clarity in v4.x - explicitly indicates AI-powered relationship inference'
|
|
775
|
+
});
|
|
776
|
+
}
|
|
777
|
+
if ('autoDetect' in options) {
|
|
778
|
+
invalidOptions.push({
|
|
779
|
+
old: 'autoDetect',
|
|
780
|
+
new: '(removed)',
|
|
781
|
+
message: 'Auto-detection is now always enabled - no need to specify this option'
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
if ('createFileStructure' in options) {
|
|
785
|
+
invalidOptions.push({
|
|
786
|
+
old: 'createFileStructure',
|
|
787
|
+
new: 'vfsPath',
|
|
788
|
+
message: 'Use vfsPath to explicitly specify the virtual filesystem directory path'
|
|
789
|
+
});
|
|
790
|
+
}
|
|
791
|
+
if ('excelSheets' in options) {
|
|
792
|
+
invalidOptions.push({
|
|
793
|
+
old: 'excelSheets',
|
|
794
|
+
new: '(removed)',
|
|
795
|
+
message: 'All sheets are now processed automatically - no configuration needed'
|
|
796
|
+
});
|
|
797
|
+
}
|
|
798
|
+
if ('pdfExtractTables' in options) {
|
|
799
|
+
invalidOptions.push({
|
|
800
|
+
old: 'pdfExtractTables',
|
|
801
|
+
new: '(removed)',
|
|
802
|
+
message: 'Table extraction is now automatic for PDF imports'
|
|
803
|
+
});
|
|
804
|
+
}
|
|
805
|
+
// If invalid options found, throw error with detailed message
|
|
806
|
+
if (invalidOptions.length > 0) {
|
|
807
|
+
const errorMessage = this.buildValidationErrorMessage(invalidOptions);
|
|
808
|
+
throw new Error(errorMessage);
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
/**
|
|
812
|
+
* Build detailed error message for invalid options
|
|
813
|
+
* Respects LOG_LEVEL for verbosity (detailed in dev, concise in prod)
|
|
814
|
+
*/
|
|
815
|
+
buildValidationErrorMessage(invalidOptions) {
|
|
816
|
+
// Check environment for verbosity level
|
|
817
|
+
const verbose = process.env.LOG_LEVEL === 'debug' ||
|
|
818
|
+
process.env.LOG_LEVEL === 'verbose' ||
|
|
819
|
+
process.env.NODE_ENV === 'development' ||
|
|
820
|
+
process.env.NODE_ENV === 'dev';
|
|
821
|
+
if (verbose) {
|
|
822
|
+
// DETAILED mode (development)
|
|
823
|
+
const optionDetails = invalidOptions
|
|
824
|
+
.map((opt) => `
|
|
825
|
+
❌ ${opt.old}
|
|
826
|
+
→ Use: ${opt.new}
|
|
827
|
+
→ Why: ${opt.message}`)
|
|
828
|
+
.join('\n');
|
|
829
|
+
return `
|
|
830
|
+
❌ Invalid import options detected (Brainy v4.x breaking changes)
|
|
831
|
+
|
|
832
|
+
The following v3.x options are no longer supported:
|
|
833
|
+
${optionDetails}
|
|
834
|
+
|
|
835
|
+
📖 Migration Guide: https://brainy.dev/docs/guides/migrating-to-v4
|
|
836
|
+
💡 Quick Fix Examples:
|
|
837
|
+
|
|
838
|
+
Before (v3.x):
|
|
839
|
+
await brain.import(file, {
|
|
840
|
+
extractRelationships: true,
|
|
841
|
+
createFileStructure: true
|
|
842
|
+
})
|
|
843
|
+
|
|
844
|
+
After (v4.x):
|
|
845
|
+
await brain.import(file, {
|
|
846
|
+
enableRelationshipInference: true,
|
|
847
|
+
vfsPath: '/imports/my-data'
|
|
848
|
+
})
|
|
849
|
+
|
|
850
|
+
🔗 Full API docs: https://brainy.dev/docs/api/import
|
|
851
|
+
`.trim();
|
|
852
|
+
}
|
|
853
|
+
else {
|
|
854
|
+
// CONCISE mode (production)
|
|
855
|
+
const optionsList = invalidOptions.map((o) => `'${o.old}'`).join(', ');
|
|
856
|
+
return `Invalid import options: ${optionsList}. See https://brainy.dev/docs/guides/migrating-to-v4`;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Get progressive flush interval based on CURRENT entity count (v4.2.0+)
|
|
861
|
+
*
|
|
862
|
+
* Unlike adaptive intervals (which require knowing total count upfront),
|
|
863
|
+
* progressive intervals adjust dynamically as import proceeds.
|
|
864
|
+
*
|
|
865
|
+
* Thresholds:
|
|
866
|
+
* - 0-999 entities: Flush every 100 (frequent updates for better UX)
|
|
867
|
+
* - 1K-9.9K entities: Flush every 1000 (balanced performance/responsiveness)
|
|
868
|
+
* - 10K+ entities: Flush every 5000 (performance focused, minimal overhead)
|
|
869
|
+
*
|
|
870
|
+
* Benefits:
|
|
871
|
+
* - Works with known totals (file imports)
|
|
872
|
+
* - Works with unknown totals (streaming APIs, database cursors)
|
|
873
|
+
* - Frequent updates early when user is watching
|
|
874
|
+
* - Efficient processing later when performance matters
|
|
875
|
+
* - Low overhead (~0.3% for large imports)
|
|
876
|
+
* - No configuration required
|
|
877
|
+
*
|
|
878
|
+
* Example:
|
|
879
|
+
* - Import with 50K entities:
|
|
880
|
+
* - Flushes at: 100, 200, ..., 900 (9 flushes with interval=100)
|
|
881
|
+
* - Interval increases to 1000 at entity #1000
|
|
882
|
+
* - Flushes at: 1000, 2000, ..., 9000 (9 more flushes)
|
|
883
|
+
* - Interval increases to 5000 at entity #10000
|
|
884
|
+
* - Flushes at: 10000, 15000, ..., 50000 (8 more flushes)
|
|
885
|
+
* - Total: ~26 flushes = ~1.3s overhead = 0.026% of import time
|
|
886
|
+
*
|
|
887
|
+
* @param currentEntityCount - Current number of entities imported so far
|
|
888
|
+
* @returns Current optimal flush interval
|
|
889
|
+
*/
|
|
890
|
+
getProgressiveFlushInterval(currentEntityCount) {
|
|
891
|
+
if (currentEntityCount < 1000) {
|
|
892
|
+
return 100; // Frequent updates for small imports and early stages
|
|
893
|
+
}
|
|
894
|
+
else if (currentEntityCount < 10000) {
|
|
895
|
+
return 1000; // Balanced interval for medium-sized imports
|
|
896
|
+
}
|
|
897
|
+
else {
|
|
898
|
+
return 5000; // Performance-focused interval for large imports
|
|
899
|
+
}
|
|
900
|
+
}
|
|
561
901
|
}
|
|
562
902
|
//# sourceMappingURL=ImportCoordinator.js.map
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* InstancePool - Shared instance management for memory efficiency
|
|
3
|
+
*
|
|
4
|
+
* Production-grade instance pooling to prevent memory leaks during imports.
|
|
5
|
+
* Critical for scaling to billions of entities.
|
|
6
|
+
*
|
|
7
|
+
* Problem: Creating new NLP/Extractor instances in loops → memory leak
|
|
8
|
+
* Solution: Reuse shared instances across entire import session
|
|
9
|
+
*
|
|
10
|
+
* Memory savings:
|
|
11
|
+
* - Without pooling: 100K rows × 50MB per instance = 5TB RAM (OOM!)
|
|
12
|
+
* - With pooling: 50MB total (shared across all rows)
|
|
13
|
+
*/
|
|
14
|
+
import { Brainy } from '../brainy.js';
|
|
15
|
+
import { NaturalLanguageProcessor } from '../neural/naturalLanguageProcessor.js';
|
|
16
|
+
import { NeuralEntityExtractor } from '../neural/entityExtractor.js';
|
|
17
|
+
/**
|
|
18
|
+
* InstancePool - Manages shared instances for memory efficiency
|
|
19
|
+
*
|
|
20
|
+
* Lifecycle:
|
|
21
|
+
* 1. Create pool at import start
|
|
22
|
+
* 2. Reuse instances across all rows
|
|
23
|
+
* 3. Pool is garbage collected when import completes
|
|
24
|
+
*
|
|
25
|
+
* Thread safety: Not thread-safe (single import session per pool)
|
|
26
|
+
*/
|
|
27
|
+
export declare class InstancePool {
|
|
28
|
+
private brain;
|
|
29
|
+
private nlpInstance;
|
|
30
|
+
private extractorInstance;
|
|
31
|
+
private nlpInitialized;
|
|
32
|
+
private initializationPromise;
|
|
33
|
+
private stats;
|
|
34
|
+
constructor(brain: Brainy);
|
|
35
|
+
/**
|
|
36
|
+
* Get shared NaturalLanguageProcessor instance
|
|
37
|
+
*
|
|
38
|
+
* Lazy initialization - created on first access
|
|
39
|
+
* All subsequent calls return same instance
|
|
40
|
+
*
|
|
41
|
+
* @returns Shared NLP instance
|
|
42
|
+
*/
|
|
43
|
+
getNLP(): Promise<NaturalLanguageProcessor>;
|
|
44
|
+
/**
|
|
45
|
+
* Get shared NeuralEntityExtractor instance
|
|
46
|
+
*
|
|
47
|
+
* Lazy initialization - created on first access
|
|
48
|
+
* All subsequent calls return same instance
|
|
49
|
+
*
|
|
50
|
+
* @returns Shared extractor instance
|
|
51
|
+
*/
|
|
52
|
+
getExtractor(): NeuralEntityExtractor;
|
|
53
|
+
/**
|
|
54
|
+
* Get shared NLP instance (synchronous, may return uninitialized)
|
|
55
|
+
*
|
|
56
|
+
* Use when you need NLP synchronously and will handle initialization yourself.
|
|
57
|
+
* Prefer getNLP() for async code.
|
|
58
|
+
*
|
|
59
|
+
* @returns Shared NLP instance (possibly uninitialized)
|
|
60
|
+
*/
|
|
61
|
+
getNLPSync(): NaturalLanguageProcessor;
|
|
62
|
+
/**
|
|
63
|
+
* Initialize all instances upfront
|
|
64
|
+
*
|
|
65
|
+
* Call at start of import to avoid lazy initialization overhead
|
|
66
|
+
* during processing. Improves predictability and first-row performance.
|
|
67
|
+
*
|
|
68
|
+
* @returns Promise that resolves when all instances are ready
|
|
69
|
+
*/
|
|
70
|
+
init(): Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Internal initialization implementation
|
|
73
|
+
*/
|
|
74
|
+
private initializeInternal;
|
|
75
|
+
/**
|
|
76
|
+
* Ensure NLP is initialized (loads 220 patterns)
|
|
77
|
+
*
|
|
78
|
+
* Handles concurrent initialization requests safely
|
|
79
|
+
*/
|
|
80
|
+
private ensureNLPInitialized;
|
|
81
|
+
/**
|
|
82
|
+
* Check if instances are initialized
|
|
83
|
+
*
|
|
84
|
+
* @returns True if NLP is initialized and ready to use
|
|
85
|
+
*/
|
|
86
|
+
isInitialized(): boolean;
|
|
87
|
+
/**
|
|
88
|
+
* Get pool statistics
|
|
89
|
+
*
|
|
90
|
+
* Useful for performance monitoring and memory leak detection
|
|
91
|
+
*
|
|
92
|
+
* @returns Statistics about instance reuse
|
|
93
|
+
*/
|
|
94
|
+
getStats(): {
|
|
95
|
+
nlpCreated: boolean;
|
|
96
|
+
extractorCreated: boolean;
|
|
97
|
+
initialized: boolean;
|
|
98
|
+
memorySaved: number;
|
|
99
|
+
nlpReuses: number;
|
|
100
|
+
extractorReuses: number;
|
|
101
|
+
creationTime: number;
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Calculate estimated memory saved by pooling
|
|
105
|
+
*
|
|
106
|
+
* Assumes ~50MB per NLP instance, ~10MB per extractor instance
|
|
107
|
+
*
|
|
108
|
+
* @returns Estimated memory saved in bytes
|
|
109
|
+
*/
|
|
110
|
+
private calculateMemorySaved;
|
|
111
|
+
/**
|
|
112
|
+
* Reset statistics (useful for testing)
|
|
113
|
+
*/
|
|
114
|
+
resetStats(): void;
|
|
115
|
+
/**
|
|
116
|
+
* Get string representation (for debugging)
|
|
117
|
+
*/
|
|
118
|
+
toString(): string;
|
|
119
|
+
/**
|
|
120
|
+
* Cleanup method (for explicit resource management)
|
|
121
|
+
*
|
|
122
|
+
* Note: Usually not needed - pool is garbage collected when import completes.
|
|
123
|
+
* Use only if you need explicit cleanup for some reason.
|
|
124
|
+
*/
|
|
125
|
+
cleanup(): void;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Create a new instance pool
|
|
129
|
+
*
|
|
130
|
+
* Convenience factory function
|
|
131
|
+
*
|
|
132
|
+
* @param brain Brainy instance
|
|
133
|
+
* @param autoInit Whether to initialize instances immediately
|
|
134
|
+
* @returns Instance pool
|
|
135
|
+
*/
|
|
136
|
+
export declare function createInstancePool(brain: Brainy, autoInit?: boolean): Promise<InstancePool>;
|