@soulcraft/brainy 5.7.12 → 5.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [5.8.0](https://github.com/soulcraftlabs/brainy/compare/v5.7.13...v5.8.0) (2025-11-14)
6
+
7
+ - feat: add v5.8.0 features - transactions, pagination, and comprehensive docs (e40fee3)
8
+ - docs: label all performance claims as MEASURED vs PROJECTED (NO FAKE CODE compliance) (52e9617)
9
+
10
+
11
+ ### [5.7.13](https://github.com/soulcraftlabs/brainy/compare/v5.7.12...v5.7.13) (2025-11-14)
12
+
13
+
14
+ ### 🐛 Bug Fixes
15
+
16
+ * resolve excludeVFS architectural bug across all query paths (v5.7.13) ([e57e947](https://github.com/soulcraftlabs/brainy/commit/e57e9474986097f37e89a8dbfa868005368d645c))
17
+
5
18
  ### [5.7.12](https://github.com/soulcraftlabs/brainy/compare/v5.7.11...v5.7.12) (2025-11-13)
6
19
 
7
20
 
@@ -1659,11 +1672,11 @@ After upgrading to v3.50.2:
1659
1672
 
1660
1673
  ### ✨ Features
1661
1674
 
1662
- **Phase 2: Type-Aware HNSW - 87% Memory Reduction @ Billion Scale**
1675
+ **Phase 2: Type-Aware HNSW - PROJECTED 87% Memory Reduction @ Billion Scale**
1663
1676
 
1664
1677
  - **feat**: TypeAwareHNSWIndex with separate HNSW graphs per entity type
1665
- - **87% HNSW memory reduction**: 384GB → 50GB (-334GB) @ 1B scale
1666
- - **10x faster single-type queries**: search 100M nodes instead of 1B
1678
+ - **PROJECTED 87% HNSW memory reduction**: 384GB → 50GB (-334GB) @ 1B scale (calculated from architectural analysis, not yet benchmarked at billion scale)
1679
+ - **PROJECTED 10x faster single-type queries**: search 100M nodes instead of 1B (not yet benchmarked)
1667
1680
  - **5-8x faster multi-type queries**: search subset of types
1668
1681
  - **~3x faster all-types queries**: 31 smaller graphs vs 1 large graph
1669
1682
  - Lazy initialization - only creates indexes for types with entities
@@ -1681,11 +1694,11 @@ After upgrading to v3.50.2:
1681
1694
  - Maintains O(log n) performance guarantees
1682
1695
  - Zero API changes for existing code
1683
1696
 
1684
- ### 📊 Impact @ Billion Scale
1697
+ ### 📊 Impact @ Billion Scale (PROJECTED)
1685
1698
 
1686
- **Memory Reduction (Phase 2):**
1699
+ **Memory Reduction (Phase 2) - PROJECTED:**
1687
1700
  ```
1688
- HNSW memory: 384GB → 50GB (-87% / -334GB)
1701
+ HNSW memory: 384GB → 50GB (-87% / -334GB) - PROJECTED from architectural analysis, not benchmarked at 1B scale
1689
1702
  ```
1690
1703
 
1691
1704
  **Query Performance:**
@@ -1751,7 +1764,7 @@ Part of the billion-scale optimization roadmap:
1751
1764
  ### 🎯 Next Steps
1752
1765
 
1753
1766
  **Phase 3** (planned): Type-First Query Optimization
1754
- - Query: 40% latency reduction via type-aware planning
1767
+ - Query: PROJECTED 40% latency reduction via type-aware planning (not yet benchmarked)
1755
1768
  - Index: Smart query routing based on type cardinality
1756
1769
  - Estimated: 2 weeks implementation
1757
1770
 
package/README.md CHANGED
@@ -631,16 +631,20 @@ This comprehensive guide includes:
631
631
  - Your primary resource for building with Brainy
632
632
  - Every method documented with working examples
633
633
 
634
- 2. **[Natural Language Queries](docs/guides/natural-language.md)**
634
+ 2. **[Filter & Query Syntax Guide](docs/FIND_SYSTEM.md)**
635
+ - Complete reference for operators, compound filters, and optimization tips
636
+
637
+ 3. **[Natural Language Queries](docs/guides/natural-language.md)**
635
638
  - Master the `find()` method and Triple Intelligence queries
636
639
 
637
- 3. **[v4.0.0 Migration Guide](docs/MIGRATION-V3-TO-V4.md)**
640
+ 4. **[v4.0.0 Migration Guide](docs/MIGRATION-V3-TO-V4.md)**
638
641
  - Upgrading from v3 (100% backward compatible)
639
642
 
640
643
  ### 🧠 Core Concepts & Architecture
641
644
 
642
645
  - **[Triple Intelligence Architecture](docs/architecture/triple-intelligence.md)** — How vector + graph + document work together
643
646
  - **[Noun-Verb Taxonomy](docs/architecture/noun-verb-taxonomy.md)** — The universal type system (42 nouns × 127 verbs)
647
+ - **[Transactions](docs/transactions.md)** — Atomic operations with automatic rollback
644
648
  - **[Architecture Overview](docs/architecture/overview.md)** — System design and components
645
649
  - **[Data Storage Architecture](docs/architecture/data-storage-architecture.md)** — Type-aware indexing and HNSW
646
650
 
package/dist/brainy.d.ts CHANGED
@@ -27,6 +27,7 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
27
27
  private storage;
28
28
  private metadataIndex;
29
29
  private graphIndex;
30
+ private transactionManager;
30
31
  private embedder;
31
32
  private distance;
32
33
  private augmentationRegistry;
package/dist/brainy.js CHANGED
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { v4 as uuidv4 } from './universal/uuid.js';
8
8
  import { HNSWIndex } from './hnsw/hnswIndex.js';
9
+ import { HNSWIndexOptimized } from './hnsw/hnswIndexOptimized.js';
9
10
  import { TypeAwareHNSWIndex } from './hnsw/typeAwareHNSWIndex.js';
10
11
  import { createStorage } from './storage/storageFactory.js';
11
12
  import { defaultEmbeddingFunction, cosineDistance } from './utils/index.js';
@@ -23,6 +24,8 @@ import { CommitBuilder } from './storage/cow/CommitObject.js';
23
24
  import { NULL_HASH } from './storage/cow/constants.js';
24
25
  import { createPipeline } from './streaming/pipeline.js';
25
26
  import { configureLogger, LogLevel } from './utils/logger.js';
27
+ import { TransactionManager } from './transaction/TransactionManager.js';
28
+ import { SaveNounMetadataOperation, SaveNounOperation, AddToTypeAwareHNSWOperation, AddToHNSWOperation, AddToMetadataIndexOperation, SaveVerbMetadataOperation, SaveVerbOperation, AddToGraphIndexOperation, RemoveFromHNSWOperation, RemoveFromTypeAwareHNSWOperation, RemoveFromMetadataIndexOperation, RemoveFromGraphIndexOperation, UpdateNounMetadataOperation, DeleteNounMetadataOperation, DeleteVerbMetadataOperation } from './transaction/operations/index.js';
26
29
  import { DistributedCoordinator, ShardManager, CacheSync, ReadWriteSeparation } from './distributed/index.js';
27
30
  import { NounType } from './types/graphTypes.js';
28
31
  /**
@@ -46,6 +49,7 @@ export class Brainy {
46
49
  this.distance = cosineDistance;
47
50
  this.embedder = this.setupEmbedder();
48
51
  this.augmentationRegistry = this.setupAugmentations();
52
+ this.transactionManager = new TransactionManager();
49
53
  // Setup distributed components if enabled
50
54
  if (this.config.distributed?.enabled) {
51
55
  this.setupDistributedComponents();
@@ -327,24 +331,6 @@ export class Brainy {
327
331
  ...(params.weight !== undefined && { weight: params.weight }),
328
332
  ...(params.createdBy && { createdBy: params.createdBy })
329
333
  };
330
- // v5.0.1: Save metadata FIRST so TypeAwareStorage can cache the type
331
- // This prevents the race condition where saveNoun() defaults to 'thing'
332
- await this.storage.saveNounMetadata(id, storageMetadata);
333
- // Then save vector
334
- await this.storage.saveNoun({
335
- id,
336
- vector,
337
- connections: new Map(),
338
- level: 0
339
- });
340
- // v5.4.0: Add to HNSW index AFTER entity is saved (fixes race condition)
341
- // CRITICAL: Entity must exist in storage before HNSW tries to persist
342
- if (this.index instanceof TypeAwareHNSWIndex) {
343
- await this.index.addItem({ id, vector }, params.type);
344
- }
345
- else {
346
- await this.index.addItem({ id, vector });
347
- }
348
334
  // v4.8.0: Build entity structure for indexing (NEW - with top-level fields)
349
335
  const entityForIndexing = {
350
336
  id,
@@ -362,8 +348,28 @@ export class Brainy {
362
348
  // Only custom fields in metadata
363
349
  metadata: params.metadata || {}
364
350
  };
365
- // Pass full entity structure to metadata index
366
- await this.metadataIndex.addToIndex(id, entityForIndexing);
351
+ // v5.8.0: Execute atomically with transaction system
352
+ // All operations succeed or all rollback - prevents partial failures
353
+ await this.transactionManager.executeTransaction(async (tx) => {
354
+ // Operation 1: Save metadata FIRST (v5.0.1 - TypeAwareStorage caching)
355
+ tx.addOperation(new SaveNounMetadataOperation(this.storage, id, storageMetadata));
356
+ // Operation 2: Save vector data
357
+ tx.addOperation(new SaveNounOperation(this.storage, {
358
+ id,
359
+ vector,
360
+ connections: new Map(),
361
+ level: 0
362
+ }));
363
+ // Operation 3: Add to HNSW index (v5.4.0 - after entity saved)
364
+ if (this.index instanceof TypeAwareHNSWIndex) {
365
+ tx.addOperation(new AddToTypeAwareHNSWOperation(this.index, id, vector, params.type));
366
+ }
367
+ else {
368
+ tx.addOperation(new AddToHNSWOperation(this.index, id, vector));
369
+ }
370
+ // Operation 4: Add to metadata index
371
+ tx.addOperation(new AddToMetadataIndexOperation(this.metadataIndex, id, entityForIndexing));
372
+ });
367
373
  return id;
368
374
  });
369
375
  }
@@ -557,32 +563,6 @@ export class Brainy {
557
563
  ...(params.confidence === undefined && existing.confidence !== undefined && { confidence: existing.confidence }),
558
564
  ...(params.weight === undefined && existing.weight !== undefined && { weight: existing.weight })
559
565
  };
560
- // v4.0.0: Save metadata FIRST (v5.1.0 fix: updates type cache for TypeAwareStorage)
561
- // v5.1.0: saveNounMetadata must be called before saveNoun so that the type cache
562
- // is updated before determining the shard path. Otherwise type changes cause
563
- // entities to be saved in the wrong shard and become unfindable.
564
- await this.storage.saveNounMetadata(params.id, updatedMetadata);
565
- // Then save vector (will use updated type cache)
566
- await this.storage.saveNoun({
567
- id: params.id,
568
- vector,
569
- connections: new Map(),
570
- level: 0
571
- });
572
- // v5.4.0: Update HNSW index AFTER entity is saved (fixes race condition)
573
- // CRITICAL: Entity must be fully updated in storage before HNSW tries to persist
574
- if (needsReindexing) {
575
- // Update in index (remove and re-add since no update method)
576
- // Phase 2: pass type for TypeAwareHNSWIndex
577
- if (this.index instanceof TypeAwareHNSWIndex) {
578
- await this.index.removeItem(params.id, existing.type);
579
- await this.index.addItem({ id: params.id, vector }, newType); // v5.1.0: use new type
580
- }
581
- else {
582
- await this.index.removeItem(params.id);
583
- await this.index.addItem({ id: params.id, vector });
584
- }
585
- }
586
566
  // v4.8.0: Build entity structure for metadata index (with top-level fields)
587
567
  const entityForIndexing = {
588
568
  id: params.id,
@@ -600,9 +580,32 @@ export class Brainy {
600
580
  // Only custom fields in metadata
601
581
  metadata: newMetadata
602
582
  };
603
- // Update metadata index - remove old entry and add new one with v4.8.0 structure
604
- await this.metadataIndex.removeFromIndex(params.id, existing.metadata);
605
- await this.metadataIndex.addToIndex(params.id, entityForIndexing);
583
+ // v5.8.0: Execute atomically with transaction system
584
+ await this.transactionManager.executeTransaction(async (tx) => {
585
+ // Operation 1: Update metadata FIRST (v5.1.0 - updates type cache)
586
+ tx.addOperation(new UpdateNounMetadataOperation(this.storage, params.id, updatedMetadata));
587
+ // Operation 2: Update vector data (will use updated type cache)
588
+ tx.addOperation(new SaveNounOperation(this.storage, {
589
+ id: params.id,
590
+ vector,
591
+ connections: new Map(),
592
+ level: 0
593
+ }));
594
+ // Operation 3-4: Update HNSW index (remove and re-add if reindexing needed)
595
+ if (needsReindexing) {
596
+ if (this.index instanceof TypeAwareHNSWIndex) {
597
+ tx.addOperation(new RemoveFromTypeAwareHNSWOperation(this.index, params.id, existing.vector, existing.type));
598
+ tx.addOperation(new AddToTypeAwareHNSWOperation(this.index, params.id, vector, newType));
599
+ }
600
+ else {
601
+ tx.addOperation(new RemoveFromHNSWOperation(this.index, params.id, existing.vector));
602
+ tx.addOperation(new AddToHNSWOperation(this.index, params.id, vector));
603
+ }
604
+ }
605
+ // Operation 5-6: Update metadata index (remove old, add new)
606
+ tx.addOperation(new RemoveFromMetadataIndexOperation(this.metadataIndex, params.id, existing.metadata));
607
+ tx.addOperation(new AddToMetadataIndexOperation(this.metadataIndex, params.id, entityForIndexing));
608
+ });
606
609
  });
607
610
  }
608
611
  /**
@@ -615,47 +618,37 @@ export class Brainy {
615
618
  }
616
619
  await this.ensureInitialized();
617
620
  return this.augmentationRegistry.execute('delete', { id }, async () => {
618
- // Remove from vector index (Phase 2: get type for TypeAwareHNSWIndex)
619
- if (this.index instanceof TypeAwareHNSWIndex) {
620
- // Get entity metadata to determine type
621
- const metadata = await this.storage.getNounMetadata(id);
622
- if (metadata && metadata.noun) {
623
- await this.index.removeItem(id, metadata.noun);
624
- }
625
- }
626
- else {
627
- await this.index.removeItem(id);
628
- }
629
- // Remove from metadata index
630
- await this.metadataIndex.removeFromIndex(id);
631
- // Delete from storage
632
- await this.storage.deleteNoun(id);
633
- // Delete metadata (if it exists as separate)
634
- try {
635
- await this.storage.saveMetadata(id, null); // Clear metadata
636
- }
637
- catch {
638
- // Ignore if not supported
639
- }
640
- // Delete related verbs
621
+ // Get entity metadata and related verbs before deletion
622
+ const metadata = await this.storage.getNounMetadata(id);
623
+ const noun = await this.storage.getNoun(id);
641
624
  const verbs = await this.storage.getVerbsBySource(id);
642
625
  const targetVerbs = await this.storage.getVerbsByTarget(id);
643
626
  const allVerbs = [...verbs, ...targetVerbs];
644
- for (const verb of allVerbs) {
645
- // Remove from graph index first
646
- await this.graphIndex.removeVerb(verb.id);
647
- // Then delete from storage
648
- await this.storage.deleteVerb(verb.id);
649
- // Delete verb metadata if exists
650
- try {
651
- if (typeof this.storage.deleteVerbMetadata === 'function') {
652
- await this.storage.deleteVerbMetadata(verb.id);
627
+ // v5.8.0: Execute atomically with transaction system
628
+ await this.transactionManager.executeTransaction(async (tx) => {
629
+ // Operation 1: Remove from vector index
630
+ if (noun && metadata) {
631
+ if (this.index instanceof TypeAwareHNSWIndex && metadata.noun) {
632
+ tx.addOperation(new RemoveFromTypeAwareHNSWOperation(this.index, id, noun.vector, metadata.noun));
633
+ }
634
+ else if (this.index instanceof HNSWIndex || this.index instanceof HNSWIndexOptimized) {
635
+ tx.addOperation(new RemoveFromHNSWOperation(this.index, id, noun.vector));
653
636
  }
654
637
  }
655
- catch {
656
- // Ignore if not supported
638
+ // Operation 2: Remove from metadata index
639
+ if (metadata) {
640
+ tx.addOperation(new RemoveFromMetadataIndexOperation(this.metadataIndex, id, metadata));
657
641
  }
658
- }
642
+ // Operation 3: Delete noun metadata
643
+ tx.addOperation(new DeleteNounMetadataOperation(this.storage, id));
644
+ // Operations 4+: Delete all related verbs atomically
645
+ for (const verb of allVerbs) {
646
+ // Remove from graph index
647
+ tx.addOperation(new RemoveFromGraphIndexOperation(this.graphIndex, verb));
648
+ // Delete verb metadata
649
+ tx.addOperation(new DeleteVerbMetadataOperation(this.storage, verb.id));
650
+ }
651
+ });
659
652
  });
660
653
  }
661
654
  // ============= RELATIONSHIP OPERATIONS =============
@@ -780,14 +773,18 @@ export class Brainy {
780
773
  // CRITICAL FIX (v3.43.2): Check for duplicate relationships
781
774
  // This prevents infinite loops where same relationship is created repeatedly
782
775
  // Bug #1 showed incrementing verb counts (7→8→9...) indicating duplicates
783
- const existingVerbs = await this.storage.getVerbsBySource(params.from);
784
- const duplicate = existingVerbs.find(v => v.targetId === params.to &&
785
- v.verb === params.type);
786
- if (duplicate) {
787
- // Relationship already exists - return existing ID instead of creating duplicate
788
- console.log(`[DEBUG] Skipping duplicate relationship: ${params.from} ${params.to} (${params.type})`);
789
- return duplicate.id;
776
+ // v5.8.0 OPTIMIZATION: Use GraphAdjacencyIndex for O(log n) lookup instead of O(n) storage scan
777
+ const verbIds = await this.graphIndex.getVerbIdsBySource(params.from);
778
+ // Check each verb ID for matching relationship (only load verbs we need to check)
779
+ for (const verbId of verbIds) {
780
+ const verb = await this.graphIndex.getVerbCached(verbId);
781
+ if (verb && verb.targetId === params.to && verb.verb === params.type) {
782
+ // Relationship already exists - return existing ID instead of creating duplicate
783
+ console.log(`[DEBUG] Skipping duplicate relationship: ${params.from} → ${params.to} (${params.type})`);
784
+ return verb.id;
785
+ }
790
786
  }
787
+ // No duplicate found - proceed with creation
791
788
  // Generate ID
792
789
  const id = uuidv4();
793
790
  // Compute relationship vector (average of entities)
@@ -815,40 +812,47 @@ export class Brainy {
815
812
  metadata: params.metadata,
816
813
  createdAt: Date.now()
817
814
  };
818
- await this.storage.saveVerb({
819
- id,
820
- vector: relationVector,
821
- connections: new Map(),
822
- verb: params.type,
823
- sourceId: params.from,
824
- targetId: params.to
825
- });
826
- await this.storage.saveVerbMetadata(id, verbMetadata);
827
- // Add to graph index for O(1) lookups
828
- await this.graphIndex.addVerb(verb);
829
- // Create bidirectional if requested
830
- if (params.bidirectional) {
831
- const reverseId = uuidv4();
832
- const reverseVerb = {
833
- ...verb,
834
- id: reverseId,
835
- sourceId: params.to,
836
- targetId: params.from,
837
- source: toEntity.type,
838
- target: fromEntity.type
839
- };
840
- await this.storage.saveVerb({
841
- id: reverseId,
815
+ // v5.8.0: Execute atomically with transaction system
816
+ await this.transactionManager.executeTransaction(async (tx) => {
817
+ // Operation 1: Save verb vector data
818
+ tx.addOperation(new SaveVerbOperation(this.storage, {
819
+ id,
842
820
  vector: relationVector,
843
821
  connections: new Map(),
844
822
  verb: params.type,
845
- sourceId: params.to,
846
- targetId: params.from
847
- });
848
- await this.storage.saveVerbMetadata(reverseId, verbMetadata);
849
- // Add reverse relationship to graph index too
850
- await this.graphIndex.addVerb(reverseVerb);
851
- }
823
+ sourceId: params.from,
824
+ targetId: params.to
825
+ }));
826
+ // Operation 2: Save verb metadata
827
+ tx.addOperation(new SaveVerbMetadataOperation(this.storage, id, verbMetadata));
828
+ // Operation 3: Add to graph index for O(1) lookups
829
+ tx.addOperation(new AddToGraphIndexOperation(this.graphIndex, verb));
830
+ // Create bidirectional if requested
831
+ if (params.bidirectional) {
832
+ const reverseId = uuidv4();
833
+ const reverseVerb = {
834
+ ...verb,
835
+ id: reverseId,
836
+ sourceId: params.to,
837
+ targetId: params.from,
838
+ source: toEntity.type,
839
+ target: fromEntity.type
840
+ };
841
+ // Operation 4: Save reverse verb vector data
842
+ tx.addOperation(new SaveVerbOperation(this.storage, {
843
+ id: reverseId,
844
+ vector: relationVector,
845
+ connections: new Map(),
846
+ verb: params.type,
847
+ sourceId: params.to,
848
+ targetId: params.from
849
+ }));
850
+ // Operation 5: Save reverse verb metadata
851
+ tx.addOperation(new SaveVerbMetadataOperation(this.storage, reverseId, verbMetadata));
852
+ // Operation 6: Add reverse relationship to graph index
853
+ tx.addOperation(new AddToGraphIndexOperation(this.graphIndex, reverseVerb));
854
+ }
855
+ });
852
856
  return id;
853
857
  });
854
858
  }
@@ -858,10 +862,17 @@ export class Brainy {
858
862
  async unrelate(id) {
859
863
  await this.ensureInitialized();
860
864
  return this.augmentationRegistry.execute('unrelate', { id }, async () => {
861
- // Remove from graph index
862
- await this.graphIndex.removeVerb(id);
863
- // Remove from storage
864
- await this.storage.deleteVerb(id);
865
+ // Get verb data before deletion for rollback
866
+ const verb = await this.storage.getVerb(id);
867
+ // v5.8.0: Execute atomically with transaction system
868
+ await this.transactionManager.executeTransaction(async (tx) => {
869
+ // Operation 1: Remove from graph index
870
+ if (verb) {
871
+ tx.addOperation(new RemoveFromGraphIndexOperation(this.graphIndex, verb));
872
+ }
873
+ // Operation 2: Delete verb metadata (which also deletes vector)
874
+ tx.addOperation(new DeleteVerbMetadataOperation(this.storage, id));
875
+ });
865
876
  });
866
877
  }
867
878
  /**
@@ -1164,41 +1175,6 @@ export class Brainy {
1164
1175
  Object.assign(filter, params.where);
1165
1176
  if (params.service)
1166
1177
  filter.service = params.service;
1167
- // v5.7.12: excludeVFS helper - ONLY exclude VFS infrastructure entities
1168
- // Bug fix: Previously excluded entities with ANY vfsType field
1169
- // Now ONLY excludes entities with isVFSEntity: true OR vfsType: 'file'/'directory'
1170
- // This allows extracted entities (concepts/people/etc) to be included even if they
1171
- // have vfsPath metadata showing where they were imported from
1172
- if (params.excludeVFS === true) {
1173
- // Build filter: EXCLUDE WHERE (isVFSEntity == true) OR (vfsType IN ['file', 'directory'])
1174
- // Implementation: INCLUDE WHERE (isVFSEntity missing/false) AND (vfsType missing/not file or directory)
1175
- const existingFilter = { ...filter };
1176
- filter = {
1177
- allOf: [
1178
- existingFilter,
1179
- {
1180
- // Only include entities WITHOUT isVFSEntity: true
1181
- anyOf: [
1182
- { isVFSEntity: { exists: false } },
1183
- { isVFSEntity: { ne: true } }
1184
- ]
1185
- },
1186
- {
1187
- // Only include entities WITHOUT vfsType: 'file' or 'directory'
1188
- // Since VFS files ALWAYS have vfsType set, we check it's missing OR not file/dir
1189
- anyOf: [
1190
- { vfsType: { exists: false } },
1191
- {
1192
- allOf: [
1193
- { vfsType: { ne: 'file' } },
1194
- { vfsType: { ne: 'directory' } }
1195
- ]
1196
- }
1197
- ]
1198
- }
1199
- ]
1200
- };
1201
- }
1202
1178
  if (params.type) {
1203
1179
  const types = Array.isArray(params.type) ? params.type : [params.type];
1204
1180
  if (types.length === 1) {
@@ -1213,6 +1189,19 @@ export class Brainy {
1213
1189
  };
1214
1190
  }
1215
1191
  }
1192
+ // v5.7.13: excludeVFS helper - ONLY exclude VFS infrastructure entities
1193
+ // Applied AFTER type filter to avoid execution order bugs
1194
+ // Excludes entities where:
1195
+ // - vfsType is 'file' or 'directory' (VFS files/folders)
1196
+ // - isVFSEntity is true (explicitly marked as VFS)
1197
+ // Includes extracted entities (person/concept/etc) even if they have vfsPath metadata
1198
+ if (params.excludeVFS === true) {
1199
+ // VFS infrastructure entities ALWAYS have vfsType set
1200
+ // Extracted entities do NOT have vfsType (undefined)
1201
+ filter.vfsType = { exists: false };
1202
+ // Extra safety: exclude entities explicitly marked as VFS
1203
+ filter.isVFSEntity = { ne: true };
1204
+ }
1216
1205
  // v4.5.4: Apply sorting if requested, otherwise just filter
1217
1206
  let filteredIds;
1218
1207
  if (params.orderBy) {
@@ -1240,10 +1229,12 @@ export class Brainy {
1240
1229
  if (!hasVectorSearchCriteria && !hasFilterCriteria && !hasGraphCriteria) {
1241
1230
  const limit = params.limit || 20;
1242
1231
  const offset = params.offset || 0;
1243
- // v4.7.0: excludeVFS helper
1232
+ // v5.7.13: excludeVFS helper - exclude VFS infrastructure entities
1233
+ // VFS files/folders have vfsType set, extracted entities do NOT
1244
1234
  let filter = {};
1245
1235
  if (params.excludeVFS === true) {
1246
1236
  filter.vfsType = { exists: false };
1237
+ filter.isVFSEntity = { ne: true };
1247
1238
  }
1248
1239
  // Use metadata index if we need to filter
1249
1240
  if (Object.keys(filter).length > 0) {
@@ -1308,9 +1299,11 @@ export class Brainy {
1308
1299
  Object.assign(filter, params.where);
1309
1300
  if (params.service)
1310
1301
  filter.service = params.service;
1311
- // v4.7.0: excludeVFS helper for cleaner UX
1302
+ // v5.7.13: excludeVFS helper - exclude VFS infrastructure entities
1303
+ // VFS files/folders have vfsType set, extracted entities do NOT
1312
1304
  if (params.excludeVFS === true) {
1313
1305
  filter.vfsType = { exists: false };
1306
+ filter.isVFSEntity = { ne: true };
1314
1307
  }
1315
1308
  if (params.type) {
1316
1309
  const types = Array.isArray(params.type) ? params.type : [params.type];
@@ -55,27 +55,96 @@ export declare class GraphAdjacencyIndex {
55
55
  private ensureInitialized;
56
56
  /**
57
57
  * Core API - Neighbor lookup with LSM-tree storage
58
- * Now O(log n) with bloom filter optimization (90% of queries skip disk I/O)
58
+ *
59
+ * O(log n) with bloom filter optimization (90% of queries skip disk I/O)
60
+ * v5.8.0: Added pagination support for high-degree nodes
61
+ *
62
+ * @param id Entity ID to get neighbors for
63
+ * @param optionsOrDirection Optional: direction string OR options object
64
+ * @returns Array of neighbor IDs (paginated if limit/offset specified)
65
+ *
66
+ * @example
67
+ * // Get all neighbors (backward compatible)
68
+ * const all = await graphIndex.getNeighbors(id)
69
+ *
70
+ * @example
71
+ * // Get outgoing neighbors (backward compatible)
72
+ * const out = await graphIndex.getNeighbors(id, 'out')
73
+ *
74
+ * @example
75
+ * // Get first 50 outgoing neighbors (new API)
76
+ * const page1 = await graphIndex.getNeighbors(id, { direction: 'out', limit: 50 })
77
+ *
78
+ * @example
79
+ * // Paginate through neighbors
80
+ * const page1 = await graphIndex.getNeighbors(id, { limit: 100, offset: 0 })
81
+ * const page2 = await graphIndex.getNeighbors(id, { limit: 100, offset: 100 })
59
82
  */
60
- getNeighbors(id: string, direction?: 'in' | 'out' | 'both'): Promise<string[]>;
83
+ getNeighbors(id: string, optionsOrDirection?: {
84
+ direction?: 'in' | 'out' | 'both';
85
+ limit?: number;
86
+ offset?: number;
87
+ } | 'in' | 'out' | 'both'): Promise<string[]>;
61
88
  /**
62
89
  * Get verb IDs by source - Billion-scale optimization for getVerbsBySource
90
+ *
63
91
  * O(log n) LSM-tree lookup with bloom filter optimization
64
92
  * v5.7.1: Filters out deleted verb IDs (tombstone deletion workaround)
93
+ * v5.8.0: Added pagination support for entities with many relationships
65
94
  *
66
95
  * @param sourceId Source entity ID
67
- * @returns Array of verb IDs originating from this source (excluding deleted)
96
+ * @param options Optional configuration
97
+ * @param options.limit Maximum number of verb IDs to return (default: all)
98
+ * @param options.offset Number of verb IDs to skip (default: 0)
99
+ * @returns Array of verb IDs originating from this source (excluding deleted, paginated if requested)
100
+ *
101
+ * @example
102
+ * // Get all verb IDs (backward compatible)
103
+ * const all = await graphIndex.getVerbIdsBySource(sourceId)
104
+ *
105
+ * @example
106
+ * // Get first 50 verb IDs
107
+ * const page1 = await graphIndex.getVerbIdsBySource(sourceId, { limit: 50 })
108
+ *
109
+ * @example
110
+ * // Paginate through verb IDs
111
+ * const page1 = await graphIndex.getVerbIdsBySource(sourceId, { limit: 100, offset: 0 })
112
+ * const page2 = await graphIndex.getVerbIdsBySource(sourceId, { limit: 100, offset: 100 })
68
113
  */
69
- getVerbIdsBySource(sourceId: string): Promise<string[]>;
114
+ getVerbIdsBySource(sourceId: string, options?: {
115
+ limit?: number;
116
+ offset?: number;
117
+ }): Promise<string[]>;
70
118
  /**
71
119
  * Get verb IDs by target - Billion-scale optimization for getVerbsByTarget
120
+ *
72
121
  * O(log n) LSM-tree lookup with bloom filter optimization
73
122
  * v5.7.1: Filters out deleted verb IDs (tombstone deletion workaround)
123
+ * v5.8.0: Added pagination support for popular target entities
74
124
  *
75
125
  * @param targetId Target entity ID
76
- * @returns Array of verb IDs pointing to this target (excluding deleted)
77
- */
78
- getVerbIdsByTarget(targetId: string): Promise<string[]>;
126
+ * @param options Optional configuration
127
+ * @param options.limit Maximum number of verb IDs to return (default: all)
128
+ * @param options.offset Number of verb IDs to skip (default: 0)
129
+ * @returns Array of verb IDs pointing to this target (excluding deleted, paginated if requested)
130
+ *
131
+ * @example
132
+ * // Get all verb IDs (backward compatible)
133
+ * const all = await graphIndex.getVerbIdsByTarget(targetId)
134
+ *
135
+ * @example
136
+ * // Get first 50 verb IDs
137
+ * const page1 = await graphIndex.getVerbIdsByTarget(targetId, { limit: 50 })
138
+ *
139
+ * @example
140
+ * // Paginate through verb IDs
141
+ * const page1 = await graphIndex.getVerbIdsByTarget(targetId, { limit: 100, offset: 0 })
142
+ * const page2 = await graphIndex.getVerbIdsByTarget(targetId, { limit: 100, offset: 100 })
143
+ */
144
+ getVerbIdsByTarget(targetId: string, options?: {
145
+ limit?: number;
146
+ offset?: number;
147
+ }): Promise<string[]>;
79
148
  /**
80
149
  * Get verb from cache or storage - Billion-scale memory optimization
81
150
  * Uses UnifiedCache with LRU eviction instead of storing all verbs in memory