bunsane 0.2.4 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/core/ArcheType.ts +67 -34
  2. package/core/BatchLoader.ts +215 -30
  3. package/core/Entity.ts +2 -2
  4. package/core/RequestContext.ts +15 -10
  5. package/core/RequestLoaders.ts +4 -2
  6. package/core/cache/CacheProvider.ts +1 -0
  7. package/core/cache/MemoryCache.ts +10 -1
  8. package/core/cache/RedisCache.ts +16 -2
  9. package/core/validateEnv.ts +8 -0
  10. package/database/DatabaseHelper.ts +113 -1
  11. package/database/index.ts +78 -45
  12. package/docs/SCALABILITY_PLAN.md +175 -0
  13. package/package.json +13 -2
  14. package/query/CTENode.ts +44 -24
  15. package/query/ComponentInclusionNode.ts +195 -95
  16. package/query/Query.ts +9 -9
  17. package/tests/benchmark/BENCHMARK_DATABASES_PLAN.md +338 -0
  18. package/tests/benchmark/bunfig.toml +9 -0
  19. package/tests/benchmark/fixtures/EcommerceComponents.ts +283 -0
  20. package/tests/benchmark/fixtures/EcommerceDataGenerators.ts +301 -0
  21. package/tests/benchmark/fixtures/RelationTracker.ts +159 -0
  22. package/tests/benchmark/fixtures/index.ts +6 -0
  23. package/tests/benchmark/index.ts +22 -0
  24. package/tests/benchmark/noop-preload.ts +3 -0
  25. package/tests/benchmark/query-lateral-benchmark.test.ts +372 -0
  26. package/tests/benchmark/runners/BenchmarkLoader.ts +132 -0
  27. package/tests/benchmark/runners/index.ts +4 -0
  28. package/tests/benchmark/scenarios/query-benchmarks.test.ts +465 -0
  29. package/tests/benchmark/scripts/generate-db.ts +344 -0
  30. package/tests/benchmark/scripts/run-benchmarks.ts +97 -0
  31. package/tests/integration/query/Query.complexAnalysis.test.ts +557 -0
  32. package/tests/integration/query/Query.edgeCases.test.ts +595 -0
  33. package/tests/integration/query/Query.explainAnalyze.test.ts +233 -0
  34. package/tests/stress/fixtures/RealisticComponents.ts +235 -0
  35. package/tests/stress/scenarios/realistic-scenarios.test.ts +1081 -0
  36. package/tests/stress/scenarios/timeout-investigation.test.ts +522 -0
  37. package/tests/unit/BatchLoader.test.ts +139 -25
@@ -108,6 +108,14 @@ export const CreateEntityTable = async () => {
108
108
  updated_at TIMESTAMP DEFAULT NOW(),
109
109
  deleted_at TIMESTAMP
110
110
  );`;
111
+
112
+ // Add partial index for soft-delete queries - critical for 1M+ scale
113
+ // This allows efficient filtering of non-deleted entities
114
+ await db.unsafe(`
115
+ CREATE INDEX IF NOT EXISTS idx_entities_deleted_null
116
+ ON entities (id)
117
+ WHERE deleted_at IS NULL
118
+ `);
111
119
  }
112
120
 
113
121
  export const CreateComponentTable = async () => {
@@ -638,4 +646,108 @@ export const BenchmarkPartitionCounts = async (partitionCounts: number[] = [8, 1
638
646
  return results;
639
647
  }
640
648
 
641
- export const GenerateTableName = (name: string) => `components_${name.toLowerCase().replace(/\s+/g, '_')}`;
649
+ export const GenerateTableName = (name: string) => `components_${name.toLowerCase().replace(/\s+/g, '_')}`;
650
+
651
+ /**
652
+ * Creates a GIN index on a JSONB foreign key field for optimized relation queries.
653
+ * This significantly improves @HasMany and @BelongsTo relation resolution performance.
654
+ *
655
+ * @param tableName The component table name (e.g., 'components_userprofile')
656
+ * @param foreignKeyField The JSONB field name that holds the foreign key (e.g., 'user_id')
657
+ * @returns Promise<boolean> - true if index was created, false if it already exists
658
+ *
659
+ * @example
660
+ * // Create index for user_id foreign key
661
+ * await CreateForeignKeyIndex('components_userprofile', 'user_id');
662
+ */
663
+ export const CreateForeignKeyIndex = async (tableName: string, foreignKeyField: string): Promise<boolean> => {
664
+ tableName = validateIdentifier(tableName);
665
+ foreignKeyField = validateIdentifier(foreignKeyField);
666
+
667
+ const indexName = `idx_${tableName}_fk_${foreignKeyField}`;
668
+
669
+ // Check if index already exists
670
+ const existingIndex = await db.unsafe(`
671
+ SELECT 1 FROM pg_indexes
672
+ WHERE tablename = '${tableName}' AND indexname = '${indexName}'
673
+ `);
674
+
675
+ if (existingIndex.length > 0) {
676
+ logger.trace(`Foreign key index ${indexName} already exists`);
677
+ return false;
678
+ }
679
+
680
+ // Check partition strategy
681
+ const partitionStrategy = await GetPartitionStrategy();
682
+ const useConcurrently = partitionStrategy !== 'hash' && !process.env.USE_PGLITE;
683
+
684
+ try {
685
+ await retryWithBackoff(async () => {
686
+ // Use btree index on the extracted text value for equality lookups (faster than GIN for FK)
687
+ await db.unsafe(`
688
+ CREATE INDEX${useConcurrently ? ' CONCURRENTLY' : ''} IF NOT EXISTS ${indexName}
689
+ ON ${tableName} ((data->>'${foreignKeyField}'))
690
+ WHERE deleted_at IS NULL
691
+ `);
692
+ });
693
+ logger.info(`Created foreign key index ${indexName} on ${tableName}.data->>'${foreignKeyField}'`);
694
+ return true;
695
+ } catch (error: any) {
696
+ if (error.message?.includes('duplicate key value violates unique constraint')) {
697
+ logger.trace(`Foreign key index ${indexName} already exists (concurrent creation)`);
698
+ return false;
699
+ }
700
+ throw error;
701
+ }
702
+ };
703
+
704
+ /**
705
+ * Creates foreign key indexes for all relation fields defined in archetypes.
706
+ * Should be called during database initialization for optimal relation query performance.
707
+ */
708
+ export const CreateRelationIndexes = async (): Promise<void> => {
709
+ const storage = getMetadataStorage();
710
+ const createdIndexes: string[] = [];
711
+
712
+ for (const [archetypeId, relations] of storage.archetypes_relations_map) {
713
+ for (const relation of relations) {
714
+ if (!relation.options?.foreignKey) continue;
715
+
716
+ const foreignKey = relation.options.foreignKey;
717
+ // Skip nested foreign keys (handled differently)
718
+ if (foreignKey.includes('.')) continue;
719
+
720
+ // Find the component that has this foreign key
721
+ const archetypeMetadata = storage.archetypes.find(a =>
722
+ storage.getComponentId(a.name) === archetypeId || a.typeId === archetypeId
723
+ );
724
+
725
+ if (!archetypeMetadata) continue;
726
+
727
+ // Get the component fields for this archetype
728
+ const archetypeFields = storage.archetypes_field_map.get(archetypeId) || [];
729
+
730
+ for (const field of archetypeFields) {
731
+ const componentId = storage.getComponentId(field.component.name);
732
+ const componentProps = storage.getComponentProperties(componentId);
733
+ const hasForeignKey = componentProps.some(prop => prop.propertyKey === foreignKey);
734
+
735
+ if (hasForeignKey) {
736
+ const tableName = GenerateTableName(field.component.name);
737
+ try {
738
+ const created = await CreateForeignKeyIndex(tableName, foreignKey);
739
+ if (created) {
740
+ createdIndexes.push(`${tableName}.${foreignKey}`);
741
+ }
742
+ } catch (error) {
743
+ logger.warn(`Failed to create FK index for ${tableName}.${foreignKey}: ${error}`);
744
+ }
745
+ }
746
+ }
747
+ }
748
+ }
749
+
750
+ if (createdIndexes.length > 0) {
751
+ logger.info(`Created ${createdIndexes.length} relation foreign key indexes`);
752
+ }
753
+ };
package/database/index.ts CHANGED
@@ -1,56 +1,89 @@
1
1
  import {SQL} from "bun";
2
2
  import { logger } from "../core/Logger";
3
3
 
4
- let connectionUrl = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT ?? "5432"}/${process.env.POSTGRES_DB}`;
5
- if(process.env.DB_CONNECTION_URL) {
6
- connectionUrl = process.env.DB_CONNECTION_URL;
7
- }
4
+ // Query timeout in milliseconds (default 30s, configurable via env)
5
+ // This is used by Query.exec(), Entity.save(), etc.
6
+ export const QUERY_TIMEOUT_MS = parseInt(process.env.DB_QUERY_TIMEOUT ?? '30000', 10);
7
+
8
+ // Module-level state for the database connection
9
+ let _db: SQL | null = null;
8
10
 
9
- // Add statement_timeout only when explicitly configured (opt-in)
10
- // Note: PgBouncer rejects statement_timeout as a startup parameter — use PostgreSQL config or connect_query instead
11
- if (process.env.USE_PGLITE !== 'true' && process.env.DB_STATEMENT_TIMEOUT) {
12
- try {
13
- const urlObj = new URL(connectionUrl);
14
- urlObj.searchParams.set('options', `-c statement_timeout=${process.env.DB_STATEMENT_TIMEOUT}`);
15
- connectionUrl = urlObj.toString();
16
- } catch {
17
- // Non-standard URL format, skip statement_timeout
11
+ function createDatabase(): SQL {
12
+ let url = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT ?? "5432"}/${process.env.POSTGRES_DB}`;
13
+ if(process.env.DB_CONNECTION_URL) {
14
+ url = process.env.DB_CONNECTION_URL;
18
15
  }
19
- }
20
16
 
21
- // Log connection URL with credentials redacted
22
- const redactedUrl = connectionUrl.replace(/:\/\/([^:]+):([^@]+)@/, '://$1:****@');
23
- logger.info(`Database connection URL: ${redactedUrl}`);
24
-
25
- // OPTIMIZED: Reduced from 20 to 10 to prevent overwhelming PGBouncer
26
- // With 5 app instances: 5 × 10 = 50 connections (well under PGBouncer's limit)
27
- const maxConnections = parseInt(process.env.POSTGRES_MAX_CONNECTIONS ?? '10', 10);
28
- logger.info(`Connection pool size: ${maxConnections} connections`);
29
-
30
- const db = new SQL({
31
- url: connectionUrl,
32
- // Connection pool settings - OPTIMIZED for PGBouncer
33
- max: maxConnections,
34
- idleTimeout: 30000, // Close idle connections after 30s
35
- maxLifetime: 600000, // Connection lifetime 10 minutes
36
- connectionTimeout: 30, // Timeout when establishing new connections
37
- onclose: (err) => {
38
- if (err) {
39
- if((err as unknown as { code: string }).code === "ERR_POSTGRES_IDLE_TIMEOUT") {
40
- logger.trace("Closing connection. Idle");
17
+ // Add statement_timeout only when explicitly configured (opt-in)
18
+ // Note: PgBouncer rejects statement_timeout as a startup parameter
19
+ if (process.env.USE_PGLITE !== 'true' && process.env.DB_STATEMENT_TIMEOUT) {
20
+ try {
21
+ const urlObj = new URL(url);
22
+ urlObj.searchParams.set('options', `-c statement_timeout=${process.env.DB_STATEMENT_TIMEOUT}`);
23
+ url = urlObj.toString();
24
+ } catch {
25
+ // Non-standard URL format, skip statement_timeout
26
+ }
27
+ }
28
+
29
+ const redactedUrl = url.replace(/:\/\/([^:]+):([^@]+)@/, '://$1:****@');
30
+ logger.info(`Database connection URL: ${redactedUrl}`);
31
+
32
+ const max = parseInt(process.env.POSTGRES_MAX_CONNECTIONS ?? '10', 10);
33
+ logger.info(`Connection pool size: ${max} connections`);
34
+ logger.info(`Query timeout: ${QUERY_TIMEOUT_MS}ms`);
35
+
36
+ const connTimeout = parseInt(process.env.DB_CONNECTION_TIMEOUT ?? '30', 10);
37
+
38
+ return new SQL({
39
+ url,
40
+ max,
41
+ idleTimeout: 30000,
42
+ maxLifetime: 600000,
43
+ connectionTimeout: connTimeout,
44
+ onclose: (err) => {
45
+ if (err) {
46
+ const errCode = (err as unknown as { code: string }).code;
47
+ if(errCode === "ERR_POSTGRES_IDLE_TIMEOUT") {
48
+ logger.trace("Closing connection. Idle");
49
+ } else if (errCode === "ERR_POSTGRES_CONNECTION_CLOSED") {
50
+ logger.warn("Database connection closed unexpectedly");
51
+ } else {
52
+ logger.error("Database connection closed with error:");
53
+ logger.error(err);
54
+ }
41
55
  } else {
42
- logger.error("Database connection closed with error:");
43
- logger.error(err);
56
+ logger.trace("Database connection closed gracefully.");
44
57
  }
45
- } else {
46
- logger.trace("Database connection closed gracefully.");
47
- }
48
- },
49
- onconnect: () => {
50
- // Log when new connections are created
51
- logger.trace("New database connection established");
58
+ },
59
+ onconnect: () => {
60
+ logger.trace("New database connection established");
61
+ }
62
+ });
63
+ }
64
+
65
+ /**
66
+ * Get the database connection. Lazily initializes on first access.
67
+ * This allows env vars to be set before the first database usage.
68
+ */
69
+ export function getDb(): SQL {
70
+ if (!_db) {
71
+ _db = createDatabase();
52
72
  }
53
- });
73
+ return _db;
74
+ }
75
+
76
+ /**
77
+ * Reinitialize the database connection with current env vars.
78
+ * Used by benchmark tests that set env vars after module load.
79
+ */
80
+ export function resetDatabase(): void {
81
+ _db = createDatabase();
82
+ }
54
83
 
84
+ // For backward compatibility, initialize eagerly on import
85
+ // This ensures existing code using `import db from './database'` continues to work
86
+ // Note: For benchmarks that need delayed initialization, use getDb() or resetDatabase()
87
+ const db = getDb();
55
88
 
56
- export default db;
89
+ export default db;
@@ -0,0 +1,175 @@
1
+ # BunSane Scalability Plan: 1M+ Entities
2
+
3
+ ## Problem Statement
4
+
5
+ At 50k entities, complex multi-component queries with sorting degrade catastrophically:
6
+ - 10k entities: 20ms
7
+ - 50k entities: 7,880ms (394x slower)
8
+ - Projected 1M: minutes to hours
9
+
10
+ Root cause: Cartesian product explosion in nested loop joins when sorting on JSONB fields.
11
+
12
+ ## Bottleneck Analysis
13
+
14
+ ### 1. Multi-Component Query Pattern (Critical)
15
+
16
+ Current SQL for 2-component query:
17
+ ```sql
18
+ SELECT DISTINCT ec.entity_id
19
+ FROM entity_components ec
20
+ WHERE ec.type_id IN ($1, $2) AND ec.deleted_at IS NULL
21
+ GROUP BY ec.entity_id
22
+ HAVING COUNT(DISTINCT ec.type_id) = 2
23
+ ```
24
+
25
+ **Problem**: Scans ALL entity_components for ALL matching types, then aggregates.
26
+ At 1M entities × 3 components = 3M rows scanned before filtering.
27
+
28
+ **Solution**: Use INTERSECT or EXISTS pattern:
29
+ ```sql
30
+ -- Option A: INTERSECT (better for 2-3 components)
31
+ SELECT entity_id FROM entity_components WHERE type_id = $1 AND deleted_at IS NULL
32
+ INTERSECT
33
+ SELECT entity_id FROM entity_components WHERE type_id = $2 AND deleted_at IS NULL
34
+
35
+ -- Option B: EXISTS (better for many components)
36
+ SELECT DISTINCT e.entity_id
37
+ FROM entity_components e
38
+ WHERE e.type_id = $1 AND e.deleted_at IS NULL
39
+ AND EXISTS (SELECT 1 FROM entity_components e2
40
+ WHERE e2.entity_id = e.entity_id AND e2.type_id = $2 AND e2.deleted_at IS NULL)
41
+ ```
42
+
43
+ ### 2. Sorting on JSONB Fields (Critical)
44
+
45
+ Current pattern:
46
+ ```sql
47
+ ORDER BY c.data->>'age' DESC
48
+ ```
49
+
50
+ **Problem**: Can't use B-tree indexes, falls to sequential scan + in-memory sort.
51
+
52
+ **Solutions**:
53
+
54
+ A. **Expression Index** (per-field, must exist):
55
+ ```sql
56
+ CREATE INDEX idx_testuser_age_btree ON components_testuser ((data->>'age'));
57
+ -- For numeric sorting:
58
+ CREATE INDEX idx_testuser_age_numeric ON components_testuser (((data->>'age')::numeric));
59
+ ```
60
+
61
+ B. **Query must cast for numeric sort**:
62
+ ```sql
63
+ ORDER BY (c.data->>'age')::numeric DESC -- Uses numeric index
64
+ ```
65
+
66
+ C. **Covering Index** (include entity_id for index-only scan):
67
+ ```sql
68
+ CREATE INDEX idx_testuser_age_covering
69
+ ON components_testuser ((data->>'age'), entity_id)
70
+ WHERE deleted_at IS NULL;
71
+ ```
72
+
73
+ ### 3. Missing Index on entities.deleted_at
74
+
75
+ Every query does `WHERE deleted_at IS NULL` on entities table.
76
+
77
+ **Fix**:
78
+ ```sql
79
+ CREATE INDEX idx_entities_deleted_null ON entities (id) WHERE deleted_at IS NULL;
80
+ ```
81
+
82
+ ### 4. OFFSET Pagination Scaling
83
+
84
+ `OFFSET 900000` requires scanning 900k rows to skip them.
85
+
86
+ **Already implemented**: `cursor(entityId)` pagination in Query.ts.
87
+ **Action**: Document as required pattern for large datasets.
88
+
89
+ ## Implementation Plan
90
+
91
+ ### Phase 1: Quick Wins (Immediate)
92
+
93
+ 1. **Add missing index on entities**
94
+ - File: `database/DatabaseHelper.ts`
95
+ - Add: `idx_entities_deleted_null`
96
+
97
+ 2. **Numeric cast in ORDER BY**
98
+ - File: `query/ComponentInclusionNode.ts`
99
+ - Detect numeric fields and add `::numeric` cast
100
+
101
+ 3. **Use INTERSECT for 2-3 component queries**
102
+ - File: `query/ComponentInclusionNode.ts`
103
+ - Threshold: Use INTERSECT when componentIds.size <= 3
104
+
105
+ ### Phase 2: Index Strategy (Short-term)
106
+
107
+ 4. **Auto-create expression indexes for sortable fields**
108
+ - File: `database/IndexingStrategy.ts`
109
+ - Add: `createSortIndex(table, field, type: 'text' | 'numeric' | 'date')`
110
+
111
+ 5. **Query hints for sort fields**
112
+ - New decorator: `@SortableField(type)`
113
+ - Creates appropriate expression index at registration
114
+
115
+ ### Phase 3: Query Restructuring (Medium-term)
116
+
117
+ 6. **EXISTS pattern for multi-component with filters**
118
+ - Rewrite CTE to use correlated EXISTS
119
+ - Push filters into EXISTS subqueries
120
+
121
+ 7. **Batch entity lookup optimization**
122
+ - Use `= ANY($1::uuid[])` instead of `IN (...)` for large ID lists
123
+ - Better plan caching with array parameter
124
+
125
+ ### Phase 4: Denormalization Options (Long-term)
126
+
127
+ 8. **entity_component_summary table**
128
+ ```sql
129
+ CREATE TABLE entity_component_summary (
130
+ entity_id UUID PRIMARY KEY,
131
+ component_types TEXT[], -- Array of type_ids
132
+ updated_at TIMESTAMP
133
+ );
134
+ CREATE INDEX idx_ecs_types_gin ON entity_component_summary USING GIN (component_types);
135
+ ```
136
+
137
+ Query pattern:
138
+ ```sql
139
+ SELECT entity_id FROM entity_component_summary
140
+ WHERE component_types @> ARRAY[$1, $2]::text[]
141
+ ```
142
+
143
+ 9. **Materialized views for hot paths**
144
+ - Pre-join common component combinations
145
+ - Refresh on schedule or trigger
146
+
147
+ ## Benchmarks Required
148
+
149
+ | Scenario | Target (1M entities) |
150
+ |----------|---------------------|
151
+ | Single component, no filter | < 50ms |
152
+ | Single component, indexed filter | < 20ms |
153
+ | 2-component intersection | < 100ms |
154
+ | 3-component intersection | < 200ms |
155
+ | Sort on indexed field, limit 100 | < 50ms |
156
+ | Complex (2-comp + filter + sort) | < 500ms |
157
+ | Count | < 100ms |
158
+ | Cursor pagination (any page) | < 50ms |
159
+
160
+ ## Migration Strategy
161
+
162
+ 1. New indexes are additive (no breaking changes)
163
+ 2. Query changes behind feature flag: `BUNSANE_QUERY_V2=true`
164
+ 3. Gradual rollout with A/B testing on query performance
165
+ 4. Deprecate old patterns after validation
166
+
167
+ ## Files to Modify
168
+
169
+ - `database/DatabaseHelper.ts` - Add entity index
170
+ - `database/IndexingStrategy.ts` - Sort index creation
171
+ - `query/ComponentInclusionNode.ts` - INTERSECT pattern, numeric cast
172
+ - `query/QueryDAG.ts` - Component count threshold for strategy selection
173
+ - `core/components/Decorators.ts` - @SortableField decorator
174
+ - New: `query/strategies/IntersectStrategy.ts`
175
+ - New: `query/strategies/ExistsStrategy.ts`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bunsane",
3
- "version": "0.2.4",
3
+ "version": "0.2.8",
4
4
  "author": {
5
5
  "name": "yaaruu"
6
6
  },
@@ -29,7 +29,18 @@
29
29
  "test:stress": "bun test tests/stress --timeout 600000",
30
30
  "test:stress:smoke": "STRESS_RECORD_COUNT=10000 bun test tests/stress --timeout 300000",
31
31
  "test:stress:standard": "STRESS_RECORD_COUNT=100000 bun test tests/stress --timeout 600000",
32
- "test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000"
32
+ "test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000",
33
+ "bench:generate:xs": "bun tests/benchmark/scripts/generate-db.ts xs",
34
+ "bench:generate:sm": "bun tests/benchmark/scripts/generate-db.ts sm",
35
+ "bench:generate:md": "bun tests/benchmark/scripts/generate-db.ts md",
36
+ "bench:generate:lg": "bun tests/benchmark/scripts/generate-db.ts lg",
37
+ "bench:generate:xl": "bun tests/benchmark/scripts/generate-db.ts xl",
38
+ "bench:generate:all": "bun tests/benchmark/scripts/generate-db.ts --all",
39
+ "bench:run:xs": "bun tests/benchmark/scripts/run-benchmarks.ts xs",
40
+ "bench:run:sm": "bun tests/benchmark/scripts/run-benchmarks.ts sm",
41
+ "bench:run:md": "bun tests/benchmark/scripts/run-benchmarks.ts md",
42
+ "bench:run:lg": "bun tests/benchmark/scripts/run-benchmarks.ts lg",
43
+ "bench:run:xl": "bun tests/benchmark/scripts/run-benchmarks.ts xl"
33
44
  },
34
45
  "devDependencies": {
35
46
  "@electric-sql/pglite": "^0.3.15",
package/query/CTENode.ts CHANGED
@@ -13,49 +13,69 @@ export class CTENode extends QueryNode {
13
13
  }
14
14
 
15
15
  let cteSql = "WITH base_entities AS (\n";
16
- cteSql += " SELECT DISTINCT ec.entity_id\n";
17
- cteSql += " FROM entity_components ec\n";
18
- cteSql += " WHERE ec.type_id IN (";
19
16
 
20
- // Add component type placeholders
21
- const typePlaceholders = componentIds.map((_, index) => `$${context.addParam(componentIds[index])}`).join(', ');
22
- cteSql += typePlaceholders + ")\n";
23
- cteSql += " AND ec.deleted_at IS NULL\n";
24
-
25
- // Add cursor-based pagination filter in CTE (more efficient than OFFSET)
17
+ // Build cursor condition for reuse across INTERSECT queries
18
+ let cursorCondition = "";
26
19
  if (context.cursorId !== null) {
27
20
  const operator = context.cursorDirection === 'after' ? '>' : '<';
28
- cteSql += ` AND ec.entity_id ${operator} $${context.addParam(context.cursorId)}\n`;
21
+ cursorCondition = ` AND ec.entity_id ${operator} $${context.addParam(context.cursorId)}`;
29
22
  }
30
23
 
31
- // Add exclusions if any
24
+ // Build exclusion condition for reuse across INTERSECT queries
25
+ let exclusionCondition = "";
32
26
  if (excludedIds.length > 0) {
33
27
  const excludedPlaceholders = excludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
34
- cteSql += ` AND NOT EXISTS (\n`;
35
- cteSql += ` SELECT 1 FROM entity_components ec_ex\n`;
36
- cteSql += ` WHERE ec_ex.entity_id = ec.entity_id\n`;
37
- cteSql += ` AND ec_ex.type_id IN (${excludedPlaceholders})\n`;
38
- cteSql += ` AND ec_ex.deleted_at IS NULL\n`;
39
- cteSql += ` )\n`;
28
+ exclusionCondition = ` AND NOT EXISTS (
29
+ SELECT 1 FROM entity_components ec_ex
30
+ WHERE ec_ex.entity_id = ec.entity_id
31
+ AND ec_ex.type_id IN (${excludedPlaceholders})
32
+ AND ec_ex.deleted_at IS NULL
33
+ )`;
40
34
  }
41
35
 
42
- // Add entity exclusions if any
36
+ // Build entity exclusion condition for reuse
37
+ let entityExclusionCondition = "";
43
38
  if (context.excludedEntityIds.size > 0) {
44
39
  const entityExcludedIds = Array.from(context.excludedEntityIds);
45
40
  const entityPlaceholders = entityExcludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
46
- cteSql += ` AND ec.entity_id NOT IN (${entityPlaceholders})\n`;
41
+ entityExclusionCondition = ` AND ec.entity_id NOT IN (${entityPlaceholders})`;
47
42
  }
48
43
 
49
- // Group by entity_id to count distinct component types
50
- // This ensures entities have ALL required components
51
- cteSql += ` GROUP BY ec.entity_id\n`;
52
- cteSql += ` HAVING COUNT(DISTINCT ec.type_id) >= $${context.addParam(componentIds.length)}\n`;
44
+ if (componentIds.length === 1) {
45
+ // Single component - simple query, no INTERSECT needed
46
+ const paramIdx = context.addParam(componentIds[0]);
47
+ cteSql += ` SELECT DISTINCT ec.entity_id\n`;
48
+ cteSql += ` FROM entity_components ec\n`;
49
+ cteSql += ` WHERE ec.type_id = $${paramIdx}::text\n`;
50
+ cteSql += ` AND ec.deleted_at IS NULL\n`;
51
+ if (cursorCondition) cteSql += ` ${cursorCondition.trim()}\n`;
52
+ if (exclusionCondition) cteSql += ` ${exclusionCondition.trim()}\n`;
53
+ if (entityExclusionCondition) cteSql += ` ${entityExclusionCondition.trim()}\n`;
54
+ } else {
55
+ // Multiple components - use INTERSECT for much faster queries
56
+ // INTERSECT allows PostgreSQL to use index scans independently per component
57
+ // then efficiently merge results, avoiding Cartesian product explosion
58
+ const intersectQueries = componentIds.map((compId) => {
59
+ const paramIdx = context.addParam(compId);
60
+ let subquery = `SELECT ec.entity_id FROM entity_components ec WHERE ec.type_id = $${paramIdx}::text AND ec.deleted_at IS NULL`;
61
+ // Add cursor/exclusion conditions to each subquery for efficiency
62
+ if (cursorCondition) subquery += cursorCondition;
63
+ if (exclusionCondition) subquery += exclusionCondition;
64
+ if (entityExclusionCondition) subquery += entityExclusionCondition;
65
+ return `(${subquery})`;
66
+ });
67
+ cteSql += ` SELECT entity_id FROM (\n`;
68
+ cteSql += ` ${intersectQueries.join('\n INTERSECT\n ')}\n`;
69
+ cteSql += ` ) AS intersected\n`;
70
+ }
53
71
 
54
72
  // Add ORDER BY for deterministic pagination results
55
73
  // Must be before LIMIT/OFFSET for consistent page results
56
74
  // Reverse order for 'before' cursor direction
57
75
  const orderDirection = context.cursorDirection === 'before' ? 'DESC' : 'ASC';
58
- cteSql += ` ORDER BY ec.entity_id ${orderDirection}\n`;
76
+ // Use correct column reference based on query structure
77
+ const orderColumn = componentIds.length === 1 ? 'ec.entity_id' : 'entity_id';
78
+ cteSql += ` ORDER BY ${orderColumn} ${orderDirection}\n`;
59
79
 
60
80
  // Check if there are component filters - if so, pagination must happen AFTER filtering
61
81
  // Otherwise we'd limit results before applying filters, causing incorrect results