@soulcraft/brainy 4.10.3 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +95 -0
- package/dist/api/DataAPI.d.ts +19 -1
- package/dist/api/DataAPI.js +122 -61
- package/dist/brainy.js +50 -14
- package/dist/import/ImportCoordinator.js +243 -173
- package/dist/storage/adapters/azureBlobStorage.d.ts +15 -1
- package/dist/storage/adapters/azureBlobStorage.js +25 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +13 -0
- package/dist/storage/adapters/baseStorageAdapter.js +26 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +14 -1
- package/dist/storage/adapters/fileSystemStorage.js +24 -0
- package/dist/storage/adapters/gcsStorage.d.ts +16 -1
- package/dist/storage/adapters/gcsStorage.js +26 -0
- package/dist/storage/adapters/memoryStorage.d.ts +14 -1
- package/dist/storage/adapters/memoryStorage.js +24 -0
- package/dist/storage/adapters/opfsStorage.d.ts +14 -1
- package/dist/storage/adapters/opfsStorage.js +24 -0
- package/dist/storage/adapters/r2Storage.d.ts +18 -1
- package/dist/storage/adapters/r2Storage.js +28 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +15 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +25 -0
- package/dist/storage/baseStorage.d.ts +24 -0
- package/dist/utils/adaptiveBackpressure.d.ts +17 -10
- package/dist/utils/adaptiveBackpressure.js +98 -48
- package/package.json +1 -1
|
@@ -499,17 +499,114 @@ export class ImportCoordinator {
|
|
|
499
499
|
});
|
|
500
500
|
console.log(`✅ Document entity created: ${documentEntityId}`);
|
|
501
501
|
}
|
|
502
|
-
//
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
//
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
502
|
+
// ============================================
|
|
503
|
+
// v4.11.0: Batch entity creation using addMany()
|
|
504
|
+
// Replaces entity-by-entity loop for 10-100x performance improvement on cloud storage
|
|
505
|
+
// ============================================
|
|
506
|
+
if (!actuallyEnableDeduplication) {
|
|
507
|
+
// FAST PATH: Batch creation without deduplication (recommended for imports > 100 entities)
|
|
508
|
+
const importSource = vfsResult.rootPath;
|
|
509
|
+
// Prepare all entity parameters upfront
|
|
510
|
+
const entityParams = rows.map((row) => {
|
|
511
|
+
const entity = row.entity || row;
|
|
512
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
513
|
+
return {
|
|
514
|
+
data: entity.description || entity.name,
|
|
515
|
+
type: entity.type,
|
|
516
|
+
metadata: {
|
|
517
|
+
...entity.metadata,
|
|
518
|
+
name: entity.name,
|
|
519
|
+
confidence: entity.confidence,
|
|
520
|
+
vfsPath: vfsFile?.path,
|
|
521
|
+
importedFrom: 'import-coordinator',
|
|
522
|
+
imports: [importSource],
|
|
523
|
+
...(trackingContext && {
|
|
524
|
+
importIds: [trackingContext.importId],
|
|
525
|
+
projectId: trackingContext.projectId,
|
|
526
|
+
importedAt: trackingContext.importedAt,
|
|
527
|
+
importFormat: trackingContext.importFormat,
|
|
528
|
+
importSource: trackingContext.importSource,
|
|
529
|
+
sourceRow: row.rowNumber,
|
|
530
|
+
sourceSheet: row.sheet,
|
|
531
|
+
...trackingContext.customMetadata
|
|
532
|
+
})
|
|
533
|
+
}
|
|
534
|
+
};
|
|
535
|
+
});
|
|
536
|
+
// Batch create all entities (storage-aware batching handles rate limits automatically)
|
|
537
|
+
const addResult = await this.brain.addMany({
|
|
538
|
+
items: entityParams,
|
|
539
|
+
continueOnError: true,
|
|
540
|
+
onProgress: (done, total) => {
|
|
541
|
+
options.onProgress?.({
|
|
542
|
+
stage: 'storing-graph',
|
|
543
|
+
message: `Creating entities: ${done}/${total}`,
|
|
544
|
+
processed: done,
|
|
545
|
+
total,
|
|
546
|
+
entities: done
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
// Map results to entities array and update rows with new IDs
|
|
551
|
+
for (let i = 0; i < addResult.successful.length; i++) {
|
|
552
|
+
const entityId = addResult.successful[i];
|
|
553
|
+
const row = rows[i];
|
|
554
|
+
const entity = row.entity || row;
|
|
555
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
556
|
+
entity.id = entityId;
|
|
557
|
+
entities.push({
|
|
558
|
+
id: entityId,
|
|
559
|
+
name: entity.name,
|
|
560
|
+
type: entity.type,
|
|
561
|
+
vfsPath: vfsFile?.path
|
|
562
|
+
});
|
|
563
|
+
newCount++;
|
|
564
|
+
}
|
|
565
|
+
// Handle failed entities
|
|
566
|
+
if (addResult.failed.length > 0) {
|
|
567
|
+
console.warn(`⚠️ ${addResult.failed.length} entities failed to create`);
|
|
568
|
+
}
|
|
569
|
+
// Create provenance links in batch
|
|
570
|
+
if (documentEntityId && options.createProvenanceLinks !== false && entities.length > 0) {
|
|
571
|
+
const provenanceParams = entities.map((entity, idx) => {
|
|
572
|
+
const row = rows[idx];
|
|
573
|
+
return {
|
|
574
|
+
from: documentEntityId,
|
|
575
|
+
to: entity.id,
|
|
576
|
+
type: VerbType.Contains,
|
|
577
|
+
metadata: {
|
|
578
|
+
relationshipType: 'provenance',
|
|
579
|
+
evidence: `Extracted from ${sourceInfo?.sourceFilename}`,
|
|
580
|
+
sheet: row?.sheet,
|
|
581
|
+
rowNumber: row?.rowNumber,
|
|
582
|
+
extractedAt: Date.now(),
|
|
583
|
+
format: sourceInfo?.format,
|
|
584
|
+
...(trackingContext && {
|
|
585
|
+
importIds: [trackingContext.importId],
|
|
586
|
+
projectId: trackingContext.projectId,
|
|
587
|
+
createdAt: Date.now(),
|
|
588
|
+
importFormat: trackingContext.importFormat,
|
|
589
|
+
...trackingContext.customMetadata
|
|
590
|
+
})
|
|
591
|
+
}
|
|
592
|
+
};
|
|
593
|
+
});
|
|
594
|
+
await this.brain.relateMany({
|
|
595
|
+
items: provenanceParams,
|
|
596
|
+
continueOnError: true
|
|
597
|
+
});
|
|
598
|
+
provenanceCount = provenanceParams.length;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
else {
|
|
602
|
+
// SLOW PATH: Entity-by-entity with deduplication (only for small imports < 100 entities)
|
|
603
|
+
for (const row of rows) {
|
|
604
|
+
const entity = row.entity || row;
|
|
605
|
+
const vfsFile = vfsResult.files.find((f) => f.entityId === entity.id);
|
|
606
|
+
try {
|
|
607
|
+
const importSource = vfsResult.rootPath;
|
|
608
|
+
let entityId;
|
|
609
|
+
let wasMerged = false;
|
|
513
610
|
// Use deduplicator to check for existing entities
|
|
514
611
|
const mergeResult = await this.deduplicator.createOrMerge({
|
|
515
612
|
id: entity.id,
|
|
@@ -546,181 +643,154 @@ export class ImportCoordinator {
|
|
|
546
643
|
else {
|
|
547
644
|
newCount++;
|
|
548
645
|
}
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
646
|
+
// Update entity ID in extraction result
|
|
647
|
+
entity.id = entityId;
|
|
648
|
+
entities.push({
|
|
649
|
+
id: entityId,
|
|
650
|
+
name: entity.name,
|
|
554
651
|
type: entity.type,
|
|
555
|
-
|
|
556
|
-
...entity.metadata,
|
|
557
|
-
name: entity.name,
|
|
558
|
-
confidence: entity.confidence,
|
|
559
|
-
vfsPath: vfsFile?.path,
|
|
560
|
-
importedFrom: 'import-coordinator',
|
|
561
|
-
imports: [importSource],
|
|
562
|
-
// v4.10.0: Import tracking metadata
|
|
563
|
-
...(trackingContext && {
|
|
564
|
-
importIds: [trackingContext.importId],
|
|
565
|
-
projectId: trackingContext.projectId,
|
|
566
|
-
importedAt: trackingContext.importedAt,
|
|
567
|
-
importFormat: trackingContext.importFormat,
|
|
568
|
-
importSource: trackingContext.importSource,
|
|
569
|
-
sourceRow: row.rowNumber,
|
|
570
|
-
sourceSheet: row.sheet,
|
|
571
|
-
...trackingContext.customMetadata
|
|
572
|
-
})
|
|
573
|
-
}
|
|
652
|
+
vfsPath: vfsFile?.path
|
|
574
653
|
});
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
extractedAt: Date.now(),
|
|
599
|
-
format: sourceInfo?.format,
|
|
600
|
-
// v4.10.0: Import tracking metadata
|
|
601
|
-
...(trackingContext && {
|
|
602
|
-
importIds: [trackingContext.importId],
|
|
603
|
-
projectId: trackingContext.projectId,
|
|
604
|
-
createdAt: Date.now(),
|
|
605
|
-
importFormat: trackingContext.importFormat,
|
|
606
|
-
...trackingContext.customMetadata
|
|
607
|
-
})
|
|
608
|
-
}
|
|
609
|
-
});
|
|
610
|
-
provenanceCount++;
|
|
611
|
-
}
|
|
612
|
-
// Collect relationships for batch creation
|
|
613
|
-
if (options.createRelationships && row.relationships) {
|
|
614
|
-
for (const rel of row.relationships) {
|
|
615
|
-
try {
|
|
616
|
-
// CRITICAL FIX (v3.43.2): Prevent infinite placeholder creation loop
|
|
617
|
-
// Find or create target entity using EXACT matching only
|
|
618
|
-
let targetEntityId;
|
|
619
|
-
// STEP 1: Check if target already exists in entities list (includes placeholders)
|
|
620
|
-
// This prevents creating duplicate placeholders - the root cause of Bug #1
|
|
621
|
-
const existingTarget = entities.find(e => e.name.toLowerCase() === rel.to.toLowerCase());
|
|
622
|
-
if (existingTarget) {
|
|
623
|
-
targetEntityId = existingTarget.id;
|
|
654
|
+
// ============================================
|
|
655
|
+
// v4.9.0: Create provenance relationship (document → entity)
|
|
656
|
+
// ============================================
|
|
657
|
+
if (documentEntityId && options.createProvenanceLinks !== false) {
|
|
658
|
+
await this.brain.relate({
|
|
659
|
+
from: documentEntityId,
|
|
660
|
+
to: entityId,
|
|
661
|
+
type: VerbType.Contains,
|
|
662
|
+
metadata: {
|
|
663
|
+
relationshipType: 'provenance',
|
|
664
|
+
evidence: `Extracted from ${sourceInfo?.sourceFilename}`,
|
|
665
|
+
sheet: row.sheet,
|
|
666
|
+
rowNumber: row.rowNumber,
|
|
667
|
+
extractedAt: Date.now(),
|
|
668
|
+
format: sourceInfo?.format,
|
|
669
|
+
// v4.10.0: Import tracking metadata
|
|
670
|
+
...(trackingContext && {
|
|
671
|
+
importIds: [trackingContext.importId],
|
|
672
|
+
projectId: trackingContext.projectId,
|
|
673
|
+
createdAt: Date.now(),
|
|
674
|
+
importFormat: trackingContext.importFormat,
|
|
675
|
+
...trackingContext.customMetadata
|
|
676
|
+
})
|
|
624
677
|
}
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
678
|
+
});
|
|
679
|
+
provenanceCount++;
|
|
680
|
+
}
|
|
681
|
+
// Collect relationships for batch creation
|
|
682
|
+
if (options.createRelationships && row.relationships) {
|
|
683
|
+
for (const rel of row.relationships) {
|
|
684
|
+
try {
|
|
685
|
+
// CRITICAL FIX (v3.43.2): Prevent infinite placeholder creation loop
|
|
686
|
+
// Find or create target entity using EXACT matching only
|
|
687
|
+
let targetEntityId;
|
|
688
|
+
// STEP 1: Check if target already exists in entities list (includes placeholders)
|
|
689
|
+
// This prevents creating duplicate placeholders - the root cause of Bug #1
|
|
690
|
+
const existingTarget = entities.find(e => e.name.toLowerCase() === rel.to.toLowerCase());
|
|
691
|
+
if (existingTarget) {
|
|
692
|
+
targetEntityId = existingTarget.id;
|
|
635
693
|
}
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
inferredFrom: entity.name,
|
|
646
|
-
// v4.10.0: Import tracking metadata
|
|
647
|
-
...(trackingContext && {
|
|
648
|
-
importIds: [trackingContext.importId],
|
|
649
|
-
projectId: trackingContext.projectId,
|
|
650
|
-
importedAt: trackingContext.importedAt,
|
|
651
|
-
importFormat: trackingContext.importFormat,
|
|
652
|
-
...trackingContext.customMetadata
|
|
653
|
-
})
|
|
694
|
+
else {
|
|
695
|
+
// STEP 2: Try to find in extraction results (rows)
|
|
696
|
+
// FIX: Use EXACT matching instead of fuzzy .includes()
|
|
697
|
+
// Fuzzy matching caused false matches (e.g., "Entity_29" matching "Entity_297")
|
|
698
|
+
for (const otherRow of rows) {
|
|
699
|
+
const otherEntity = otherRow.entity || otherRow;
|
|
700
|
+
if (otherEntity.name.toLowerCase() === rel.to.toLowerCase()) {
|
|
701
|
+
targetEntityId = otherEntity.id;
|
|
702
|
+
break;
|
|
654
703
|
}
|
|
655
|
-
}
|
|
656
|
-
//
|
|
657
|
-
entities
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
704
|
+
}
|
|
705
|
+
// STEP 3: If still not found, create placeholder entity ONCE
|
|
706
|
+
// The placeholder is added to entities array, so future searches will find it
|
|
707
|
+
if (!targetEntityId) {
|
|
708
|
+
targetEntityId = await this.brain.add({
|
|
709
|
+
data: rel.to,
|
|
710
|
+
type: NounType.Thing,
|
|
711
|
+
metadata: {
|
|
712
|
+
name: rel.to,
|
|
713
|
+
placeholder: true,
|
|
714
|
+
inferredFrom: entity.name,
|
|
715
|
+
// v4.10.0: Import tracking metadata
|
|
716
|
+
...(trackingContext && {
|
|
717
|
+
importIds: [trackingContext.importId],
|
|
718
|
+
projectId: trackingContext.projectId,
|
|
719
|
+
importedAt: trackingContext.importedAt,
|
|
720
|
+
importFormat: trackingContext.importFormat,
|
|
721
|
+
...trackingContext.customMetadata
|
|
722
|
+
})
|
|
723
|
+
}
|
|
724
|
+
});
|
|
725
|
+
// CRITICAL: Add to entities array so future searches find it
|
|
726
|
+
entities.push({
|
|
727
|
+
id: targetEntityId,
|
|
728
|
+
name: rel.to,
|
|
729
|
+
type: NounType.Thing
|
|
730
|
+
});
|
|
731
|
+
}
|
|
662
732
|
}
|
|
733
|
+
// Add to relationships array with target ID for batch processing
|
|
734
|
+
relationships.push({
|
|
735
|
+
id: '', // Will be assigned after batch creation
|
|
736
|
+
from: entityId,
|
|
737
|
+
to: targetEntityId,
|
|
738
|
+
type: rel.type,
|
|
739
|
+
confidence: rel.confidence, // v4.2.0: Top-level field
|
|
740
|
+
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
741
|
+
metadata: {
|
|
742
|
+
evidence: rel.evidence,
|
|
743
|
+
// v4.10.0: Import tracking metadata (will be merged in batch creation)
|
|
744
|
+
...(trackingContext && {
|
|
745
|
+
importIds: [trackingContext.importId],
|
|
746
|
+
projectId: trackingContext.projectId,
|
|
747
|
+
importedAt: trackingContext.importedAt,
|
|
748
|
+
importFormat: trackingContext.importFormat,
|
|
749
|
+
...trackingContext.customMetadata
|
|
750
|
+
})
|
|
751
|
+
}
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
catch (error) {
|
|
755
|
+
// Skip relationship collection errors (entity might not exist, etc.)
|
|
756
|
+
continue;
|
|
663
757
|
}
|
|
664
|
-
// Add to relationships array with target ID for batch processing
|
|
665
|
-
relationships.push({
|
|
666
|
-
id: '', // Will be assigned after batch creation
|
|
667
|
-
from: entityId,
|
|
668
|
-
to: targetEntityId,
|
|
669
|
-
type: rel.type,
|
|
670
|
-
confidence: rel.confidence, // v4.2.0: Top-level field
|
|
671
|
-
weight: rel.weight || 1.0, // v4.2.0: Top-level field
|
|
672
|
-
metadata: {
|
|
673
|
-
evidence: rel.evidence,
|
|
674
|
-
// v4.10.0: Import tracking metadata (will be merged in batch creation)
|
|
675
|
-
...(trackingContext && {
|
|
676
|
-
importIds: [trackingContext.importId],
|
|
677
|
-
projectId: trackingContext.projectId,
|
|
678
|
-
importedAt: trackingContext.importedAt,
|
|
679
|
-
importFormat: trackingContext.importFormat,
|
|
680
|
-
...trackingContext.customMetadata
|
|
681
|
-
})
|
|
682
|
-
}
|
|
683
|
-
});
|
|
684
758
|
}
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
759
|
+
}
|
|
760
|
+
// Streaming import: Progressive flush with dynamic interval adjustment (v4.2.0+)
|
|
761
|
+
entitiesSinceFlush++;
|
|
762
|
+
if (entitiesSinceFlush >= currentFlushInterval) {
|
|
763
|
+
const flushStart = Date.now();
|
|
764
|
+
await this.brain.flush();
|
|
765
|
+
const flushDuration = Date.now() - flushStart;
|
|
766
|
+
totalFlushes++;
|
|
767
|
+
// Reset counter
|
|
768
|
+
entitiesSinceFlush = 0;
|
|
769
|
+
// Recalculate flush interval based on current entity count
|
|
770
|
+
const newInterval = this.getProgressiveFlushInterval(entities.length);
|
|
771
|
+
if (newInterval !== currentFlushInterval) {
|
|
772
|
+
console.log(`📊 Flush interval adjusted: ${currentFlushInterval} → ${newInterval}\n` +
|
|
773
|
+
` Reason: Reached ${entities.length} entities (threshold for next tier)\n` +
|
|
774
|
+
` Impact: ${newInterval > currentFlushInterval ? 'Fewer' : 'More'} flushes = ${newInterval > currentFlushInterval ? 'Better performance' : 'More frequent updates'}`);
|
|
775
|
+
currentFlushInterval = newInterval;
|
|
688
776
|
}
|
|
777
|
+
// Notify progress callback that data is now queryable
|
|
778
|
+
await options.onProgress?.({
|
|
779
|
+
stage: 'storing-graph',
|
|
780
|
+
message: `Flushed indexes (${entities.length}/${rows.length} entities, ${flushDuration}ms)`,
|
|
781
|
+
processed: entities.length,
|
|
782
|
+
total: rows.length,
|
|
783
|
+
entities: entities.length,
|
|
784
|
+
queryable: true // ← Indexes are flushed, data is queryable!
|
|
785
|
+
});
|
|
689
786
|
}
|
|
690
787
|
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const flushStart = Date.now();
|
|
695
|
-
await this.brain.flush();
|
|
696
|
-
const flushDuration = Date.now() - flushStart;
|
|
697
|
-
totalFlushes++;
|
|
698
|
-
// Reset counter
|
|
699
|
-
entitiesSinceFlush = 0;
|
|
700
|
-
// Recalculate flush interval based on current entity count
|
|
701
|
-
const newInterval = this.getProgressiveFlushInterval(entities.length);
|
|
702
|
-
if (newInterval !== currentFlushInterval) {
|
|
703
|
-
console.log(`📊 Flush interval adjusted: ${currentFlushInterval} → ${newInterval}\n` +
|
|
704
|
-
` Reason: Reached ${entities.length} entities (threshold for next tier)\n` +
|
|
705
|
-
` Impact: ${newInterval > currentFlushInterval ? 'Fewer' : 'More'} flushes = ${newInterval > currentFlushInterval ? 'Better performance' : 'More frequent updates'}`);
|
|
706
|
-
currentFlushInterval = newInterval;
|
|
707
|
-
}
|
|
708
|
-
// Notify progress callback that data is now queryable
|
|
709
|
-
await options.onProgress?.({
|
|
710
|
-
stage: 'storing-graph',
|
|
711
|
-
message: `Flushed indexes (${entities.length}/${rows.length} entities, ${flushDuration}ms)`,
|
|
712
|
-
processed: entities.length,
|
|
713
|
-
total: rows.length,
|
|
714
|
-
entities: entities.length,
|
|
715
|
-
queryable: true // ← Indexes are flushed, data is queryable!
|
|
716
|
-
});
|
|
788
|
+
catch (error) {
|
|
789
|
+
// Skip entity creation errors (might already exist, etc.)
|
|
790
|
+
continue;
|
|
717
791
|
}
|
|
718
792
|
}
|
|
719
|
-
|
|
720
|
-
// Skip entity creation errors (might already exist, etc.)
|
|
721
|
-
continue;
|
|
722
|
-
}
|
|
723
|
-
}
|
|
793
|
+
} // End of deduplication else block
|
|
724
794
|
// Final flush for any remaining entities
|
|
725
795
|
if (entitiesSinceFlush > 0) {
|
|
726
796
|
const flushStart = Date.now();
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* v4.0.0: Fully compatible with metadata/vector separation architecture
|
|
13
13
|
*/
|
|
14
14
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
15
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
15
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
16
16
|
type HNSWNode = HNSWNoun;
|
|
17
17
|
type Edge = HNSWVerb;
|
|
18
18
|
/**
|
|
@@ -70,6 +70,20 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
70
70
|
};
|
|
71
71
|
readOnly?: boolean;
|
|
72
72
|
});
|
|
73
|
+
/**
|
|
74
|
+
* Get Azure Blob-optimized batch configuration
|
|
75
|
+
*
|
|
76
|
+
* Azure Blob Storage has moderate rate limits between GCS and S3:
|
|
77
|
+
* - Medium batch sizes (75 items)
|
|
78
|
+
* - Parallel processing supported
|
|
79
|
+
* - Moderate delays (75ms)
|
|
80
|
+
*
|
|
81
|
+
* Azure can handle ~2000 operations/second with good performance
|
|
82
|
+
*
|
|
83
|
+
* @returns Azure Blob-optimized batch configuration
|
|
84
|
+
* @since v4.11.0
|
|
85
|
+
*/
|
|
86
|
+
getBatchConfig(): StorageBatchConfig;
|
|
73
87
|
/**
|
|
74
88
|
* Initialize the storage adapter
|
|
75
89
|
*/
|
|
@@ -83,6 +83,31 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
83
83
|
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
|
+
/**
|
|
87
|
+
* Get Azure Blob-optimized batch configuration
|
|
88
|
+
*
|
|
89
|
+
* Azure Blob Storage has moderate rate limits between GCS and S3:
|
|
90
|
+
* - Medium batch sizes (75 items)
|
|
91
|
+
* - Parallel processing supported
|
|
92
|
+
* - Moderate delays (75ms)
|
|
93
|
+
*
|
|
94
|
+
* Azure can handle ~2000 operations/second with good performance
|
|
95
|
+
*
|
|
96
|
+
* @returns Azure Blob-optimized batch configuration
|
|
97
|
+
* @since v4.11.0
|
|
98
|
+
*/
|
|
99
|
+
getBatchConfig() {
|
|
100
|
+
return {
|
|
101
|
+
maxBatchSize: 75,
|
|
102
|
+
batchDelayMs: 75,
|
|
103
|
+
maxConcurrent: 75,
|
|
104
|
+
supportsParallelWrites: true, // Azure handles parallel reasonably
|
|
105
|
+
rateLimit: {
|
|
106
|
+
operationsPerSecond: 2000, // Moderate limits
|
|
107
|
+
burstCapacity: 500
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
}
|
|
86
111
|
/**
|
|
87
112
|
* Initialize the storage adapter
|
|
88
113
|
*/
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Provides common functionality for all storage adapters, including statistics tracking
|
|
4
4
|
*/
|
|
5
5
|
import { StatisticsData, StorageAdapter, HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, NounMetadata, VerbMetadata } from '../../coreTypes.js';
|
|
6
|
+
import { StorageBatchConfig } from '../baseStorage.js';
|
|
6
7
|
/**
|
|
7
8
|
* Base class for storage adapters that implements statistics tracking
|
|
8
9
|
*/
|
|
@@ -50,6 +51,18 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
50
51
|
quota: number | null;
|
|
51
52
|
details?: Record<string, any>;
|
|
52
53
|
}>;
|
|
54
|
+
/**
|
|
55
|
+
* Get optimal batch configuration for this storage adapter
|
|
56
|
+
* Override in subclasses to provide storage-specific optimization
|
|
57
|
+
*
|
|
58
|
+
* This method allows each storage adapter to declare its optimal batch behavior
|
|
59
|
+
* for rate limiting and performance. The configuration is used by addMany(),
|
|
60
|
+
* relateMany(), and import operations to automatically adapt to storage capabilities.
|
|
61
|
+
*
|
|
62
|
+
* @returns Batch configuration optimized for this storage type
|
|
63
|
+
* @since v4.11.0
|
|
64
|
+
*/
|
|
65
|
+
getBatchConfig(): StorageBatchConfig;
|
|
53
66
|
/**
|
|
54
67
|
* Get nouns with pagination and filtering
|
|
55
68
|
* @param options Pagination and filtering options
|
|
@@ -60,6 +60,32 @@ export class BaseStorageAdapter {
|
|
|
60
60
|
this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
|
|
61
61
|
this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
|
|
62
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Get optimal batch configuration for this storage adapter
|
|
65
|
+
* Override in subclasses to provide storage-specific optimization
|
|
66
|
+
*
|
|
67
|
+
* This method allows each storage adapter to declare its optimal batch behavior
|
|
68
|
+
* for rate limiting and performance. The configuration is used by addMany(),
|
|
69
|
+
* relateMany(), and import operations to automatically adapt to storage capabilities.
|
|
70
|
+
*
|
|
71
|
+
* @returns Batch configuration optimized for this storage type
|
|
72
|
+
* @since v4.11.0
|
|
73
|
+
*/
|
|
74
|
+
getBatchConfig() {
|
|
75
|
+
// Conservative defaults that work safely across all storage types
|
|
76
|
+
// Cloud storage adapters should override with higher throughput values
|
|
77
|
+
// Local storage adapters should override with no delays
|
|
78
|
+
return {
|
|
79
|
+
maxBatchSize: 50,
|
|
80
|
+
batchDelayMs: 100,
|
|
81
|
+
maxConcurrent: 50,
|
|
82
|
+
supportsParallelWrites: false,
|
|
83
|
+
rateLimit: {
|
|
84
|
+
operationsPerSecond: 100,
|
|
85
|
+
burstCapacity: 200
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
}
|
|
63
89
|
/**
|
|
64
90
|
* Save statistics data
|
|
65
91
|
* @param statistics The statistics data to save
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* File system storage adapter for Node.js environments
|
|
4
4
|
*/
|
|
5
5
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
6
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
6
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
7
7
|
type HNSWNode = HNSWNoun;
|
|
8
8
|
type Edge = HNSWVerb;
|
|
9
9
|
/**
|
|
@@ -39,6 +39,19 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
39
39
|
compression?: boolean;
|
|
40
40
|
compressionLevel?: number;
|
|
41
41
|
});
|
|
42
|
+
/**
|
|
43
|
+
* Get FileSystem-optimized batch configuration
|
|
44
|
+
*
|
|
45
|
+
* File system storage is I/O bound but not rate limited:
|
|
46
|
+
* - Large batch sizes (500 items)
|
|
47
|
+
* - No delays needed (0ms)
|
|
48
|
+
* - Moderate concurrency (100 operations) - limited by I/O threads
|
|
49
|
+
* - Parallel processing supported
|
|
50
|
+
*
|
|
51
|
+
* @returns FileSystem-optimized batch configuration
|
|
52
|
+
* @since v4.11.0
|
|
53
|
+
*/
|
|
54
|
+
getBatchConfig(): StorageBatchConfig;
|
|
42
55
|
/**
|
|
43
56
|
* Initialize the storage adapter
|
|
44
57
|
*/
|
|
@@ -70,6 +70,30 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
70
70
|
}
|
|
71
71
|
// Defer path operations until init() when path module is guaranteed to be loaded
|
|
72
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Get FileSystem-optimized batch configuration
|
|
75
|
+
*
|
|
76
|
+
* File system storage is I/O bound but not rate limited:
|
|
77
|
+
* - Large batch sizes (500 items)
|
|
78
|
+
* - No delays needed (0ms)
|
|
79
|
+
* - Moderate concurrency (100 operations) - limited by I/O threads
|
|
80
|
+
* - Parallel processing supported
|
|
81
|
+
*
|
|
82
|
+
* @returns FileSystem-optimized batch configuration
|
|
83
|
+
* @since v4.11.0
|
|
84
|
+
*/
|
|
85
|
+
getBatchConfig() {
|
|
86
|
+
return {
|
|
87
|
+
maxBatchSize: 500,
|
|
88
|
+
batchDelayMs: 0,
|
|
89
|
+
maxConcurrent: 100,
|
|
90
|
+
supportsParallelWrites: true, // Filesystem handles parallel I/O
|
|
91
|
+
rateLimit: {
|
|
92
|
+
operationsPerSecond: 5000, // Depends on disk speed
|
|
93
|
+
burstCapacity: 2000
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
}
|
|
73
97
|
/**
|
|
74
98
|
* Initialize the storage adapter
|
|
75
99
|
*/
|