@soulcraft/brainy 4.10.3 → 4.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.js +50 -14
- package/dist/import/ImportCoordinator.js +243 -173
- package/dist/storage/adapters/azureBlobStorage.d.ts +15 -1
- package/dist/storage/adapters/azureBlobStorage.js +25 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +13 -0
- package/dist/storage/adapters/baseStorageAdapter.js +26 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +14 -1
- package/dist/storage/adapters/fileSystemStorage.js +24 -0
- package/dist/storage/adapters/gcsStorage.d.ts +16 -1
- package/dist/storage/adapters/gcsStorage.js +26 -0
- package/dist/storage/adapters/memoryStorage.d.ts +14 -1
- package/dist/storage/adapters/memoryStorage.js +24 -0
- package/dist/storage/adapters/opfsStorage.d.ts +14 -1
- package/dist/storage/adapters/opfsStorage.js +24 -0
- package/dist/storage/adapters/r2Storage.d.ts +18 -1
- package/dist/storage/adapters/r2Storage.js +28 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +15 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +25 -0
- package/dist/storage/baseStorage.d.ts +24 -0
- package/dist/utils/adaptiveBackpressure.d.ts +17 -10
- package/dist/utils/adaptiveBackpressure.js +98 -48
- package/package.json +1 -1
package/dist/brainy.js
CHANGED
|
@@ -1517,6 +1517,16 @@ export class Brainy {
|
|
|
1517
1517
|
*/
|
|
1518
1518
|
async addMany(params) {
|
|
1519
1519
|
await this.ensureInitialized();
|
|
1520
|
+
// Get optimal batch configuration from storage adapter (v4.11.0)
|
|
1521
|
+
// This automatically adapts to storage characteristics:
|
|
1522
|
+
// - GCS: 50 batch size, 100ms delay, sequential
|
|
1523
|
+
// - S3/R2: 100 batch size, 50ms delay, parallel
|
|
1524
|
+
// - Memory: 1000 batch size, 0ms delay, parallel
|
|
1525
|
+
const storageConfig = this.storage.getBatchConfig();
|
|
1526
|
+
// Use storage preferences (allow explicit user override)
|
|
1527
|
+
const batchSize = params.chunkSize ?? storageConfig.maxBatchSize;
|
|
1528
|
+
const parallel = params.parallel ?? storageConfig.supportsParallelWrites;
|
|
1529
|
+
const delayMs = storageConfig.batchDelayMs;
|
|
1520
1530
|
const result = {
|
|
1521
1531
|
successful: [],
|
|
1522
1532
|
failed: [],
|
|
@@ -1524,10 +1534,10 @@ export class Brainy {
|
|
|
1524
1534
|
duration: 0
|
|
1525
1535
|
};
|
|
1526
1536
|
const startTime = Date.now();
|
|
1527
|
-
|
|
1528
|
-
// Process in
|
|
1529
|
-
for (let i = 0; i < params.items.length; i +=
|
|
1530
|
-
const chunk = params.items.slice(i, i +
|
|
1537
|
+
let lastBatchTime = Date.now();
|
|
1538
|
+
// Process in batches
|
|
1539
|
+
for (let i = 0; i < params.items.length; i += batchSize) {
|
|
1540
|
+
const chunk = params.items.slice(i, i + batchSize);
|
|
1531
1541
|
const promises = chunk.map(async (item) => {
|
|
1532
1542
|
try {
|
|
1533
1543
|
const id = await this.add(item);
|
|
@@ -1543,18 +1553,29 @@ export class Brainy {
|
|
|
1543
1553
|
}
|
|
1544
1554
|
}
|
|
1545
1555
|
});
|
|
1546
|
-
|
|
1556
|
+
// Parallel vs Sequential based on storage preference
|
|
1557
|
+
if (parallel) {
|
|
1547
1558
|
await Promise.allSettled(promises);
|
|
1548
1559
|
}
|
|
1549
1560
|
else {
|
|
1561
|
+
// Sequential processing for rate-limited storage
|
|
1550
1562
|
for (const promise of promises) {
|
|
1551
1563
|
await promise;
|
|
1552
1564
|
}
|
|
1553
1565
|
}
|
|
1554
|
-
//
|
|
1566
|
+
// Progress callback
|
|
1555
1567
|
if (params.onProgress) {
|
|
1556
1568
|
params.onProgress(result.successful.length + result.failed.length, result.total);
|
|
1557
1569
|
}
|
|
1570
|
+
// Adaptive delay between batches
|
|
1571
|
+
if (i + batchSize < params.items.length && delayMs > 0) {
|
|
1572
|
+
const batchDuration = Date.now() - lastBatchTime;
|
|
1573
|
+
// If batch was too fast, add delay to respect rate limits
|
|
1574
|
+
if (batchDuration < delayMs) {
|
|
1575
|
+
await new Promise(resolve => setTimeout(resolve, delayMs - batchDuration));
|
|
1576
|
+
}
|
|
1577
|
+
lastBatchTime = Date.now();
|
|
1578
|
+
}
|
|
1558
1579
|
}
|
|
1559
1580
|
result.duration = Date.now() - startTime;
|
|
1560
1581
|
return result;
|
|
@@ -1655,6 +1676,13 @@ export class Brainy {
|
|
|
1655
1676
|
*/
|
|
1656
1677
|
async relateMany(params) {
|
|
1657
1678
|
await this.ensureInitialized();
|
|
1679
|
+
// Get optimal batch configuration from storage adapter (v4.11.0)
|
|
1680
|
+
// Automatically adapts to storage characteristics
|
|
1681
|
+
const storageConfig = this.storage.getBatchConfig();
|
|
1682
|
+
// Use storage preferences (allow explicit user override)
|
|
1683
|
+
const batchSize = params.chunkSize ?? storageConfig.maxBatchSize;
|
|
1684
|
+
const parallel = params.parallel ?? storageConfig.supportsParallelWrites;
|
|
1685
|
+
const delayMs = storageConfig.batchDelayMs;
|
|
1658
1686
|
const result = {
|
|
1659
1687
|
successful: [],
|
|
1660
1688
|
failed: [],
|
|
@@ -1662,11 +1690,11 @@ export class Brainy {
|
|
|
1662
1690
|
duration: 0
|
|
1663
1691
|
};
|
|
1664
1692
|
const startTime = Date.now();
|
|
1665
|
-
|
|
1666
|
-
for (let i = 0; i < params.items.length; i +=
|
|
1667
|
-
const chunk = params.items.slice(i, i +
|
|
1668
|
-
if (
|
|
1669
|
-
//
|
|
1693
|
+
let lastBatchTime = Date.now();
|
|
1694
|
+
for (let i = 0; i < params.items.length; i += batchSize) {
|
|
1695
|
+
const chunk = params.items.slice(i, i + batchSize);
|
|
1696
|
+
if (parallel) {
|
|
1697
|
+
// Parallel processing
|
|
1670
1698
|
const promises = chunk.map(async (item) => {
|
|
1671
1699
|
try {
|
|
1672
1700
|
const relationId = await this.relate(item);
|
|
@@ -1682,10 +1710,10 @@ export class Brainy {
|
|
|
1682
1710
|
}
|
|
1683
1711
|
}
|
|
1684
1712
|
});
|
|
1685
|
-
await Promise.
|
|
1713
|
+
await Promise.allSettled(promises);
|
|
1686
1714
|
}
|
|
1687
1715
|
else {
|
|
1688
|
-
//
|
|
1716
|
+
// Sequential processing
|
|
1689
1717
|
for (const item of chunk) {
|
|
1690
1718
|
try {
|
|
1691
1719
|
const relationId = await this.relate(item);
|
|
@@ -1702,10 +1730,18 @@ export class Brainy {
|
|
|
1702
1730
|
}
|
|
1703
1731
|
}
|
|
1704
1732
|
}
|
|
1705
|
-
//
|
|
1733
|
+
// Progress callback
|
|
1706
1734
|
if (params.onProgress) {
|
|
1707
1735
|
params.onProgress(result.successful.length + result.failed.length, result.total);
|
|
1708
1736
|
}
|
|
1737
|
+
// Adaptive delay
|
|
1738
|
+
if (i + batchSize < params.items.length && delayMs > 0) {
|
|
1739
|
+
const batchDuration = Date.now() - lastBatchTime;
|
|
1740
|
+
if (batchDuration < delayMs) {
|
|
1741
|
+
await new Promise(resolve => setTimeout(resolve, delayMs - batchDuration));
|
|
1742
|
+
}
|
|
1743
|
+
lastBatchTime = Date.now();
|
|
1744
|
+
}
|
|
1709
1745
|
}
|
|
1710
1746
|
result.duration = Date.now() - startTime;
|
|
1711
1747
|
return result.successful;
|
|
@@ -499,17 +499,114 @@ export class ImportCoordinator {
|
|
|
499
499
|
});
|
|
500
500
|
console.log(`✅ Document entity created: ${documentEntityId}`);
|
|
501
501
|
}
|
|
502
|
-
//
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
//
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
502
|
+
// ============================================
|
|
503
|
+
// v4.11.0: Batch entity creation using addMany()
|
|
504
|
+
// Replaces entity-by-entity loop for 10-100x performance improvement on cloud storage
|
|
505
|
+
// ============================================
|
|
506
|
+
if (!actuallyEnableDeduplication) {
|
|
507
|
+
// FAST PATH: Batch creation without deduplication (recommended for imports > 100 entities)
|
|
508
|
+
const importSource = vfsResult.rootPath;
|
|
509
|
+
// Prepare all entity parameters upfront
|
|
510
|
+
const entityParams = rows.map((row) => {
|
|
511
|
+
const entity = row.entity || row;
|
|
512
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
513
|
+
return {
|
|
514
|
+
data: entity.description || entity.name,
|
|
515
|
+
type: entity.type,
|
|
516
|
+
metadata: {
|
|
517
|
+
...entity.metadata,
|
|
518
|
+
name: entity.name,
|
|
519
|
+
confidence: entity.confidence,
|
|
520
|
+
vfsPath: vfsFile?.path,
|
|
521
|
+
importedFrom: 'import-coordinator',
|
|
522
|
+
imports: [importSource],
|
|
523
|
+
...(trackingContext && {
|
|
524
|
+
importIds: [trackingContext.importId],
|
|
525
|
+
projectId: trackingContext.projectId,
|
|
526
|
+
importedAt: trackingContext.importedAt,
|
|
527
|
+
importFormat: trackingContext.importFormat,
|
|
528
|
+
importSource: trackingContext.importSource,
|
|
529
|
+
sourceRow: row.rowNumber,
|
|
530
|
+
sourceSheet: row.sheet,
|
|
531
|
+
...trackingContext.customMetadata
|
|
532
|
+
})
|
|
533
|
+
}
|
|
534
|
+
};
|
|
535
|
+
});
|
|
536
|
+
// Batch create all entities (storage-aware batching handles rate limits automatically)
|
|
537
|
+
const addResult = await this.brain.addMany({
|
|
538
|
+
items: entityParams,
|
|
539
|
+
continueOnError: true,
|
|
540
|
+
onProgress: (done, total) => {
|
|
541
|
+
options.onProgress?.({
|
|
542
|
+
stage: 'storing-graph',
|
|
543
|
+
message: `Creating entities: ${done}/${total}`,
|
|
544
|
+
processed: done,
|
|
545
|
+
total,
|
|
546
|
+
entities: done
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
// Map results to entities array and update rows with new IDs
|
|
551
|
+
for (let i = 0; i < addResult.successful.length; i++) {
|
|
552
|
+
const entityId = addResult.successful[i];
|
|
553
|
+
const row = rows[i];
|
|
554
|
+
const entity = row.entity || row;
|
|
555
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
556
|
+
entity.id = entityId;
|
|
557
|
+
entities.push({
|
|
558
|
+
id: entityId,
|
|
559
|
+
name: entity.name,
|
|
560
|
+
type: entity.type,
|
|
561
|
+
vfsPath: vfsFile?.path
|
|
562
|
+
});
|
|
563
|
+
newCount++;
|
|
564
|
+
}
|
|
565
|
+
// Handle failed entities
|
|
566
|
+
if (addResult.failed.length > 0) {
|
|
567
|
+
console.warn(`⚠️ ${addResult.failed.length} entities failed to create`);
|
|
568
|
+
}
|
|
569
|
+
// Create provenance links in batch
|
|
570
|
+
if (documentEntityId && options.createProvenanceLinks !== false && entities.length > 0) {
|
|
571
|
+
const provenanceParams = entities.map((entity, idx) => {
|
|
572
|
+
const row = rows[idx];
|
|
573
|
+
return {
|
|
574
|
+
from: documentEntityId,
|
|
575
|
+
to: entity.id,
|
|
576
|
+
type: VerbType.Contains,
|
|
577
|
+
metadata: {
|
|
578
|
+
relationshipType: 'provenance',
|
|
579
|
+
evidence: `Extracted from ${sourceInfo?.sourceFilename}`,
|
|
580
|
+
sheet: row?.sheet,
|
|
581
|
+
rowNumber: row?.rowNumber,
|
|
582
|
+
extractedAt: Date.now(),
|
|
583
|
+
format: sourceInfo?.format,
|
|
584
|
+
...(trackingContext && {
|
|
585
|
+
importIds: [trackingContext.importId],
|
|
586
|
+
projectId: trackingContext.projectId,
|
|
587
|
+
createdAt: Date.now(),
|
|
588
|
+
importFormat: trackingContext.importFormat,
|
|
589
|
+
...trackingContext.customMetadata
|
|
590
|
+
})
|
|
591
|
+
}
|
|
592
|
+
};
|
|
593
|
+
});
|
|
594
|
+
await this.brain.relateMany({
|
|
595
|
+
items: provenanceParams,
|
|
596
|
+
continueOnError: true
|
|
597
|
+
});
|
|
598
|
+
provenanceCount = provenanceParams.length;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
else {
|
|
602
|
+
// SLOW PATH: Entity-by-entity with deduplication (only for small imports < 100 entities)
|
|
603
|
+
for (const row of rows) {
|
|
604
|
+
const entity = row.entity || row;
|
|
605
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
606
|
+
try {
|
|
607
|
+
const importSource = vfsResult.rootPath;
|
|
608
|
+
let entityId;
|
|
609
|
+
let wasMerged = false;
|
|
513
610
|
// Use deduplicator to check for existing entities
|
|
514
611
|
const mergeResult = await this.deduplicator.createOrMerge({
|
|
515
612
|
id: entity.id,
|
|
@@ -546,181 +643,154 @@ export class ImportCoordinator {
|
|
|
546
643
|
else {
|
|
547
644
|
newCount++;
|
|
548
645
|
}
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
646
|
+
// Update entity ID in extraction result
|
|
647
|
+
entity.id = entityId;
|
|
648
|
+
entities.push({
|
|
649
|
+
id: entityId,
|
|
650
|
+
name: entity.name,
|
|
554
651
|
type: entity.type,
|
|
555
|
-
|
|
556
|
-
...entity.metadata,
|
|
557
|
-
name: entity.name,
|
|
558
|
-
confidence: entity.confidence,
|
|
559
|
-
vfsPath: vfsFile?.path,
|
|
560
|
-
importedFrom: 'import-coordinator',
|
|
561
|
-
imports: [importSource],
|
|
562
|
-
// v4.10.0: Import tracking metadata
|
|
563
|
-
...(trackingContext && {
|
|
564
|
-
importIds: [trackingContext.importId],
|
|
565
|
-
projectId: trackingContext.projectId,
|
|
566
|
-
importedAt: trackingContext.importedAt,
|
|
567
|
-
importFormat: trackingContext.importFormat,
|
|
568
|
-
importSource: trackingContext.importSource,
|
|
569
|
-
sourceRow: row.rowNumber,
|
|
570
|
-
sourceSheet: row.sheet,
|
|
571
|
-
...trackingContext.customMetadata
|
|
572
|
-
})
|
|
573
|
-
}
|
|
652
|
+
vfsPath: vfsFile?.path
|
|
574
653
|
});
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
extractedAt: Date.now(),
|
|
599
|
-
format: sourceInfo?.format,
|
|
600
|
-
// v4.10.0: Import tracking metadata
|
|
601
|
-
...(trackingContext && {
|
|
602
|
-
importIds: [trackingContext.importId],
|
|
603
|
-
projectId: trackingContext.projectId,
|
|
604
|
-
createdAt: Date.now(),
|
|
605
|
-
importFormat: trackingContext.importFormat,
|
|
606
|
-
...trackingContext.customMetadata
|
|
607
|
-
})
|
|
608
|
-
}
|
|
609
|
-
});
|
|
610
|
-
provenanceCount++;
|
|
611
|
-
}
|
|
612
|
-
// Collect relationships for batch creation
|
|
613
|
-
if (options.createRelationships && row.relationships) {
|
|
614
|
-
for (const rel of row.relationships) {
|
|
615
|
-
try {
|
|
616
|
-
// CRITICAL FIX (v3.43.2): Prevent infinite placeholder creation loop
|
|
617
|
-
// Find or create target entity using EXACT matching only
|
|
618
|
-
let targetEntityId;
|
|
619
|
-
// STEP 1: Check if target already exists in entities list (includes placeholders)
|
|
620
|
-
// This prevents creating duplicate placeholders - the root cause of Bug #1
|
|
621
|
-
const existingTarget = entities.find(e => e.name.toLowerCase() === rel.to.toLowerCase());
|
|
622
|
-
if (existingTarget) {
|
|
623
|
-
targetEntityId = existingTarget.id;
|
|
654
|
+
// ============================================
|
|
655
|
+
// v4.9.0: Create provenance relationship (document → entity)
|
|
656
|
+
// ============================================
|
|
657
|
+
if (documentEntityId && options.createProvenanceLinks !== false) {
|
|
658
|
+
await this.brain.relate({
|
|
659
|
+
from: documentEntityId,
|
|
660
|
+
to: entityId,
|
|
661
|
+
type: VerbType.Contains,
|
|
662
|
+
metadata: {
|
|
663
|
+
relationshipType: 'provenance',
|
|
664
|
+
evidence: `Extracted from ${sourceInfo?.sourceFilename}`,
|
|
665
|
+
sheet: row.sheet,
|
|
666
|
+
rowNumber: row.rowNumber,
|
|
667
|
+
extractedAt: Date.now(),
|
|
668
|
+
format: sourceInfo?.format,
|
|
669
|
+
// v4.10.0: Import tracking metadata
|
|
670
|
+
...(trackingContext && {
|
|
671
|
+
importIds: [trackingContext.importId],
|
|
672
|
+
projectId: trackingContext.projectId,
|
|
673
|
+
createdAt: Date.now(),
|
|
674
|
+
importFormat: trackingContext.importFormat,
|
|
675
|
+
...trackingContext.customMetadata
|
|
676
|
+
})
|
|
624
677
|
}
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
678
|
+
});
|
|
679
|
+
provenanceCount++;
|
|
680
|
+
}
|
|
681
|
+
// Collect relationships for batch creation
|
|
682
|
+
if (options.createRelationships && row.relationships) {
|
|
683
|
+
for (const rel of row.relationships) {
|
|
684
|
+
try {
|
|
685
|
+
// CRITICAL FIX (v3.43.2): Prevent infinite placeholder creation loop
|
|
686
|
+
// Find or create target entity using EXACT matching only
|
|
687
|
+
let targetEntityId;
|
|
688
|
+
// STEP 1: Check if target already exists in entities list (includes placeholders)
|
|
689
|
+
// This prevents creating duplicate placeholders - the root cause of Bug #1
|
|
690
|
+
const existingTarget = entities.find(e => e.name.toLowerCase() === rel.to.toLowerCase());
|
|
691
|
+
if (existingTarget) {
|
|
692
|
+
targetEntityId = existingTarget.id;
|
|
635
693
|
}
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
inferredFrom: entity.name,
|
|
646
|
-
// v4.10.0: Import tracking metadata
|
|
647
|
-
...(trackingContext && {
|
|
648
|
-
importIds: [trackingContext.importId],
|
|
649
|
-
projectId: trackingContext.projectId,
|
|
650
|
-
importedAt: trackingContext.importedAt,
|
|
651
|
-
importFormat: trackingContext.importFormat,
|
|
652
|
-
...trackingContext.customMetadata
|
|
653
|
-
})
|
|
694
|
+
else {
|
|
695
|
+
// STEP 2: Try to find in extraction results (rows)
|
|
696
|
+
// FIX: Use EXACT matching instead of fuzzy .includes()
|
|
697
|
+
// Fuzzy matching caused false matches (e.g., "Entity_29" matching "Entity_297")
|
|
698
|
+
for (const otherRow of rows) {
|
|
699
|
+
const otherEntity = otherRow.entity || otherRow;
|
|
700
|
+
if (otherEntity.name.toLowerCase() === rel.to.toLowerCase()) {
|
|
701
|
+
targetEntityId = otherEntity.id;
|
|
702
|
+
break;
|
|
654
703
|
}
|
|
655
|
-
}
|
|
656
|
-
//
|
|
657
|
-
entities
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
704
|
+
}
|
|
705
|
+
// STEP 3: If still not found, create placeholder entity ONCE
|
|
706
|
+
// The placeholder is added to entities array, so future searches will find it
|
|
707
|
+
if (!targetEntityId) {
|
|
708
|
+
targetEntityId = await this.brain.add({
|
|
709
|
+
data: rel.to,
|
|
710
|
+
type: NounType.Thing,
|
|
711
|
+
metadata: {
|
|
712
|
+
name: rel.to,
|
|
713
|
+
placeholder: true,
|
|
714
|
+
inferredFrom: entity.name,
|
|
715
|
+
// v4.10.0: Import tracking metadata
|
|
716
|
+
...(trackingContext && {
|
|
717
|
+
importIds: [trackingContext.importId],
|
|
718
|
+
projectId: trackingContext.projectId,
|
|
719
|
+
importedAt: trackingContext.importedAt,
|
|
720
|
+
importFormat: trackingContext.importFormat,
|
|
721
|
+
...trackingContext.customMetadata
|
|
722
|
+
})
|
|
723
|
+
}
|
|
724
|
+
});
|
|
725
|
+
// CRITICAL: Add to entities array so future searches find it
|
|
726
|
+
entities.push({
|
|
727
|
+
id: targetEntityId,
|
|
728
|
+
name: rel.to,
|
|
729
|
+
type: NounType.Thing
|
|
730
|
+
});
|
|
731
|
+
}
|
|
662
732
|
}
|
|
733
|
+
// Add to relationships array with target ID for batch processing
|
|
734
|
+
relationships.push({
|
|
735
|
+
id: '', // Will be assigned after batch creation
|
|
736
|
+
from: entityId,
|
|
737
|
+
to: targetEntityId,
|
|
738
|
+
type: rel.type,
|
|
739
|
+
confidence: rel.confidence, // v4.2.0: Top-level field
|
|
740
|
+
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
741
|
+
metadata: {
|
|
742
|
+
evidence: rel.evidence,
|
|
743
|
+
// v4.10.0: Import tracking metadata (will be merged in batch creation)
|
|
744
|
+
...(trackingContext && {
|
|
745
|
+
importIds: [trackingContext.importId],
|
|
746
|
+
projectId: trackingContext.projectId,
|
|
747
|
+
importedAt: trackingContext.importedAt,
|
|
748
|
+
importFormat: trackingContext.importFormat,
|
|
749
|
+
...trackingContext.customMetadata
|
|
750
|
+
})
|
|
751
|
+
}
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
catch (error) {
|
|
755
|
+
// Skip relationship collection errors (entity might not exist, etc.)
|
|
756
|
+
continue;
|
|
663
757
|
}
|
|
664
|
-
// Add to relationships array with target ID for batch processing
|
|
665
|
-
relationships.push({
|
|
666
|
-
id: '', // Will be assigned after batch creation
|
|
667
|
-
from: entityId,
|
|
668
|
-
to: targetEntityId,
|
|
669
|
-
type: rel.type,
|
|
670
|
-
confidence: rel.confidence, // v4.2.0: Top-level field
|
|
671
|
-
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
672
|
-
metadata: {
|
|
673
|
-
evidence: rel.evidence,
|
|
674
|
-
// v4.10.0: Import tracking metadata (will be merged in batch creation)
|
|
675
|
-
...(trackingContext && {
|
|
676
|
-
importIds: [trackingContext.importId],
|
|
677
|
-
projectId: trackingContext.projectId,
|
|
678
|
-
importedAt: trackingContext.importedAt,
|
|
679
|
-
importFormat: trackingContext.importFormat,
|
|
680
|
-
...trackingContext.customMetadata
|
|
681
|
-
})
|
|
682
|
-
}
|
|
683
|
-
});
|
|
684
758
|
}
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
759
|
+
}
|
|
760
|
+
// Streaming import: Progressive flush with dynamic interval adjustment (v4.2.0+)
|
|
761
|
+
entitiesSinceFlush++;
|
|
762
|
+
if (entitiesSinceFlush >= currentFlushInterval) {
|
|
763
|
+
const flushStart = Date.now();
|
|
764
|
+
await this.brain.flush();
|
|
765
|
+
const flushDuration = Date.now() - flushStart;
|
|
766
|
+
totalFlushes++;
|
|
767
|
+
// Reset counter
|
|
768
|
+
entitiesSinceFlush = 0;
|
|
769
|
+
// Recalculate flush interval based on current entity count
|
|
770
|
+
const newInterval = this.getProgressiveFlushInterval(entities.length);
|
|
771
|
+
if (newInterval !== currentFlushInterval) {
|
|
772
|
+
console.log(`📊 Flush interval adjusted: ${currentFlushInterval} → ${newInterval}\n` +
|
|
773
|
+
` Reason: Reached ${entities.length} entities (threshold for next tier)\n` +
|
|
774
|
+
` Impact: ${newInterval > currentFlushInterval ? 'Fewer' : 'More'} flushes = ${newInterval > currentFlushInterval ? 'Better performance' : 'More frequent updates'}`);
|
|
775
|
+
currentFlushInterval = newInterval;
|
|
688
776
|
}
|
|
777
|
+
// Notify progress callback that data is now queryable
|
|
778
|
+
await options.onProgress?.({
|
|
779
|
+
stage: 'storing-graph',
|
|
780
|
+
message: `Flushed indexes (${entities.length}/${rows.length} entities, ${flushDuration}ms)`,
|
|
781
|
+
processed: entities.length,
|
|
782
|
+
total: rows.length,
|
|
783
|
+
entities: entities.length,
|
|
784
|
+
queryable: true // ← Indexes are flushed, data is queryable!
|
|
785
|
+
});
|
|
689
786
|
}
|
|
690
787
|
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const flushStart = Date.now();
|
|
695
|
-
await this.brain.flush();
|
|
696
|
-
const flushDuration = Date.now() - flushStart;
|
|
697
|
-
totalFlushes++;
|
|
698
|
-
// Reset counter
|
|
699
|
-
entitiesSinceFlush = 0;
|
|
700
|
-
// Recalculate flush interval based on current entity count
|
|
701
|
-
const newInterval = this.getProgressiveFlushInterval(entities.length);
|
|
702
|
-
if (newInterval !== currentFlushInterval) {
|
|
703
|
-
console.log(`📊 Flush interval adjusted: ${currentFlushInterval} → ${newInterval}\n` +
|
|
704
|
-
` Reason: Reached ${entities.length} entities (threshold for next tier)\n` +
|
|
705
|
-
` Impact: ${newInterval > currentFlushInterval ? 'Fewer' : 'More'} flushes = ${newInterval > currentFlushInterval ? 'Better performance' : 'More frequent updates'}`);
|
|
706
|
-
currentFlushInterval = newInterval;
|
|
707
|
-
}
|
|
708
|
-
// Notify progress callback that data is now queryable
|
|
709
|
-
await options.onProgress?.({
|
|
710
|
-
stage: 'storing-graph',
|
|
711
|
-
message: `Flushed indexes (${entities.length}/${rows.length} entities, ${flushDuration}ms)`,
|
|
712
|
-
processed: entities.length,
|
|
713
|
-
total: rows.length,
|
|
714
|
-
entities: entities.length,
|
|
715
|
-
queryable: true // ← Indexes are flushed, data is queryable!
|
|
716
|
-
});
|
|
788
|
+
catch (error) {
|
|
789
|
+
// Skip entity creation errors (might already exist, etc.)
|
|
790
|
+
continue;
|
|
717
791
|
}
|
|
718
792
|
}
|
|
719
|
-
|
|
720
|
-
// Skip entity creation errors (might already exist, etc.)
|
|
721
|
-
continue;
|
|
722
|
-
}
|
|
723
|
-
}
|
|
793
|
+
} // End of deduplication else block
|
|
724
794
|
// Final flush for any remaining entities
|
|
725
795
|
if (entitiesSinceFlush > 0) {
|
|
726
796
|
const flushStart = Date.now();
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* v4.0.0: Fully compatible with metadata/vector separation architecture
|
|
13
13
|
*/
|
|
14
14
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
15
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
15
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
16
16
|
type HNSWNode = HNSWNoun;
|
|
17
17
|
type Edge = HNSWVerb;
|
|
18
18
|
/**
|
|
@@ -70,6 +70,20 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
70
70
|
};
|
|
71
71
|
readOnly?: boolean;
|
|
72
72
|
});
|
|
73
|
+
/**
|
|
74
|
+
* Get Azure Blob-optimized batch configuration
|
|
75
|
+
*
|
|
76
|
+
* Azure Blob Storage has moderate rate limits between GCS and S3:
|
|
77
|
+
* - Medium batch sizes (75 items)
|
|
78
|
+
* - Parallel processing supported
|
|
79
|
+
* - Moderate delays (75ms)
|
|
80
|
+
*
|
|
81
|
+
* Azure can handle ~2000 operations/second with good performance
|
|
82
|
+
*
|
|
83
|
+
* @returns Azure Blob-optimized batch configuration
|
|
84
|
+
* @since v4.11.0
|
|
85
|
+
*/
|
|
86
|
+
getBatchConfig(): StorageBatchConfig;
|
|
73
87
|
/**
|
|
74
88
|
* Initialize the storage adapter
|
|
75
89
|
*/
|
|
@@ -83,6 +83,31 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
83
83
|
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
|
+
/**
|
|
87
|
+
* Get Azure Blob-optimized batch configuration
|
|
88
|
+
*
|
|
89
|
+
* Azure Blob Storage has moderate rate limits between GCS and S3:
|
|
90
|
+
* - Medium batch sizes (75 items)
|
|
91
|
+
* - Parallel processing supported
|
|
92
|
+
* - Moderate delays (75ms)
|
|
93
|
+
*
|
|
94
|
+
* Azure can handle ~2000 operations/second with good performance
|
|
95
|
+
*
|
|
96
|
+
* @returns Azure Blob-optimized batch configuration
|
|
97
|
+
* @since v4.11.0
|
|
98
|
+
*/
|
|
99
|
+
getBatchConfig() {
|
|
100
|
+
return {
|
|
101
|
+
maxBatchSize: 75,
|
|
102
|
+
batchDelayMs: 75,
|
|
103
|
+
maxConcurrent: 75,
|
|
104
|
+
supportsParallelWrites: true, // Azure handles parallel reasonably
|
|
105
|
+
rateLimit: {
|
|
106
|
+
operationsPerSecond: 2000, // Moderate limits
|
|
107
|
+
burstCapacity: 500
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
}
|
|
86
111
|
/**
|
|
87
112
|
* Initialize the storage adapter
|
|
88
113
|
*/
|