bunsane 0.2.4 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/ArcheType.ts +67 -34
- package/core/BatchLoader.ts +215 -30
- package/core/Entity.ts +2 -2
- package/core/RequestContext.ts +15 -10
- package/core/RequestLoaders.ts +4 -2
- package/core/cache/CacheProvider.ts +1 -0
- package/core/cache/MemoryCache.ts +10 -1
- package/core/cache/RedisCache.ts +16 -2
- package/core/validateEnv.ts +8 -0
- package/database/DatabaseHelper.ts +113 -1
- package/database/index.ts +78 -45
- package/docs/SCALABILITY_PLAN.md +175 -0
- package/package.json +13 -2
- package/query/CTENode.ts +44 -24
- package/query/ComponentInclusionNode.ts +195 -95
- package/query/Query.ts +9 -9
- package/tests/benchmark/BENCHMARK_DATABASES_PLAN.md +338 -0
- package/tests/benchmark/bunfig.toml +9 -0
- package/tests/benchmark/fixtures/EcommerceComponents.ts +283 -0
- package/tests/benchmark/fixtures/EcommerceDataGenerators.ts +301 -0
- package/tests/benchmark/fixtures/RelationTracker.ts +159 -0
- package/tests/benchmark/fixtures/index.ts +6 -0
- package/tests/benchmark/index.ts +22 -0
- package/tests/benchmark/noop-preload.ts +3 -0
- package/tests/benchmark/query-lateral-benchmark.test.ts +372 -0
- package/tests/benchmark/runners/BenchmarkLoader.ts +132 -0
- package/tests/benchmark/runners/index.ts +4 -0
- package/tests/benchmark/scenarios/query-benchmarks.test.ts +465 -0
- package/tests/benchmark/scripts/generate-db.ts +344 -0
- package/tests/benchmark/scripts/run-benchmarks.ts +97 -0
- package/tests/integration/query/Query.complexAnalysis.test.ts +557 -0
- package/tests/integration/query/Query.edgeCases.test.ts +595 -0
- package/tests/integration/query/Query.explainAnalyze.test.ts +233 -0
- package/tests/stress/fixtures/RealisticComponents.ts +235 -0
- package/tests/stress/scenarios/realistic-scenarios.test.ts +1081 -0
- package/tests/stress/scenarios/timeout-investigation.test.ts +522 -0
- package/tests/unit/BatchLoader.test.ts +139 -25
|
@@ -108,6 +108,14 @@ export const CreateEntityTable = async () => {
|
|
|
108
108
|
updated_at TIMESTAMP DEFAULT NOW(),
|
|
109
109
|
deleted_at TIMESTAMP
|
|
110
110
|
);`;
|
|
111
|
+
|
|
112
|
+
// Add partial index for soft-delete queries - critical for 1M+ scale
|
|
113
|
+
// This allows efficient filtering of non-deleted entities
|
|
114
|
+
await db.unsafe(`
|
|
115
|
+
CREATE INDEX IF NOT EXISTS idx_entities_deleted_null
|
|
116
|
+
ON entities (id)
|
|
117
|
+
WHERE deleted_at IS NULL
|
|
118
|
+
`);
|
|
111
119
|
}
|
|
112
120
|
|
|
113
121
|
export const CreateComponentTable = async () => {
|
|
@@ -638,4 +646,108 @@ export const BenchmarkPartitionCounts = async (partitionCounts: number[] = [8, 1
|
|
|
638
646
|
return results;
|
|
639
647
|
}
|
|
640
648
|
|
|
641
|
-
export const GenerateTableName = (name: string) => `components_${name.toLowerCase().replace(/\s+/g, '_')}`;
|
|
649
|
+
export const GenerateTableName = (name: string) => `components_${name.toLowerCase().replace(/\s+/g, '_')}`;
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Creates a GIN index on a JSONB foreign key field for optimized relation queries.
|
|
653
|
+
* This significantly improves @HasMany and @BelongsTo relation resolution performance.
|
|
654
|
+
*
|
|
655
|
+
* @param tableName The component table name (e.g., 'components_userprofile')
|
|
656
|
+
* @param foreignKeyField The JSONB field name that holds the foreign key (e.g., 'user_id')
|
|
657
|
+
* @returns Promise<boolean> - true if index was created, false if it already exists
|
|
658
|
+
*
|
|
659
|
+
* @example
|
|
660
|
+
* // Create index for user_id foreign key
|
|
661
|
+
* await CreateForeignKeyIndex('components_userprofile', 'user_id');
|
|
662
|
+
*/
|
|
663
|
+
export const CreateForeignKeyIndex = async (tableName: string, foreignKeyField: string): Promise<boolean> => {
|
|
664
|
+
tableName = validateIdentifier(tableName);
|
|
665
|
+
foreignKeyField = validateIdentifier(foreignKeyField);
|
|
666
|
+
|
|
667
|
+
const indexName = `idx_${tableName}_fk_${foreignKeyField}`;
|
|
668
|
+
|
|
669
|
+
// Check if index already exists
|
|
670
|
+
const existingIndex = await db.unsafe(`
|
|
671
|
+
SELECT 1 FROM pg_indexes
|
|
672
|
+
WHERE tablename = '${tableName}' AND indexname = '${indexName}'
|
|
673
|
+
`);
|
|
674
|
+
|
|
675
|
+
if (existingIndex.length > 0) {
|
|
676
|
+
logger.trace(`Foreign key index ${indexName} already exists`);
|
|
677
|
+
return false;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// Check partition strategy
|
|
681
|
+
const partitionStrategy = await GetPartitionStrategy();
|
|
682
|
+
const useConcurrently = partitionStrategy !== 'hash' && !process.env.USE_PGLITE;
|
|
683
|
+
|
|
684
|
+
try {
|
|
685
|
+
await retryWithBackoff(async () => {
|
|
686
|
+
// Use btree index on the extracted text value for equality lookups (faster than GIN for FK)
|
|
687
|
+
await db.unsafe(`
|
|
688
|
+
CREATE INDEX${useConcurrently ? ' CONCURRENTLY' : ''} IF NOT EXISTS ${indexName}
|
|
689
|
+
ON ${tableName} ((data->>'${foreignKeyField}'))
|
|
690
|
+
WHERE deleted_at IS NULL
|
|
691
|
+
`);
|
|
692
|
+
});
|
|
693
|
+
logger.info(`Created foreign key index ${indexName} on ${tableName}.data->>'${foreignKeyField}'`);
|
|
694
|
+
return true;
|
|
695
|
+
} catch (error: any) {
|
|
696
|
+
if (error.message?.includes('duplicate key value violates unique constraint')) {
|
|
697
|
+
logger.trace(`Foreign key index ${indexName} already exists (concurrent creation)`);
|
|
698
|
+
return false;
|
|
699
|
+
}
|
|
700
|
+
throw error;
|
|
701
|
+
}
|
|
702
|
+
};
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* Creates foreign key indexes for all relation fields defined in archetypes.
|
|
706
|
+
* Should be called during database initialization for optimal relation query performance.
|
|
707
|
+
*/
|
|
708
|
+
export const CreateRelationIndexes = async (): Promise<void> => {
|
|
709
|
+
const storage = getMetadataStorage();
|
|
710
|
+
const createdIndexes: string[] = [];
|
|
711
|
+
|
|
712
|
+
for (const [archetypeId, relations] of storage.archetypes_relations_map) {
|
|
713
|
+
for (const relation of relations) {
|
|
714
|
+
if (!relation.options?.foreignKey) continue;
|
|
715
|
+
|
|
716
|
+
const foreignKey = relation.options.foreignKey;
|
|
717
|
+
// Skip nested foreign keys (handled differently)
|
|
718
|
+
if (foreignKey.includes('.')) continue;
|
|
719
|
+
|
|
720
|
+
// Find the component that has this foreign key
|
|
721
|
+
const archetypeMetadata = storage.archetypes.find(a =>
|
|
722
|
+
storage.getComponentId(a.name) === archetypeId || a.typeId === archetypeId
|
|
723
|
+
);
|
|
724
|
+
|
|
725
|
+
if (!archetypeMetadata) continue;
|
|
726
|
+
|
|
727
|
+
// Get the component fields for this archetype
|
|
728
|
+
const archetypeFields = storage.archetypes_field_map.get(archetypeId) || [];
|
|
729
|
+
|
|
730
|
+
for (const field of archetypeFields) {
|
|
731
|
+
const componentId = storage.getComponentId(field.component.name);
|
|
732
|
+
const componentProps = storage.getComponentProperties(componentId);
|
|
733
|
+
const hasForeignKey = componentProps.some(prop => prop.propertyKey === foreignKey);
|
|
734
|
+
|
|
735
|
+
if (hasForeignKey) {
|
|
736
|
+
const tableName = GenerateTableName(field.component.name);
|
|
737
|
+
try {
|
|
738
|
+
const created = await CreateForeignKeyIndex(tableName, foreignKey);
|
|
739
|
+
if (created) {
|
|
740
|
+
createdIndexes.push(`${tableName}.${foreignKey}`);
|
|
741
|
+
}
|
|
742
|
+
} catch (error) {
|
|
743
|
+
logger.warn(`Failed to create FK index for ${tableName}.${foreignKey}: ${error}`);
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
if (createdIndexes.length > 0) {
|
|
751
|
+
logger.info(`Created ${createdIndexes.length} relation foreign key indexes`);
|
|
752
|
+
}
|
|
753
|
+
};
|
package/database/index.ts
CHANGED
|
@@ -1,56 +1,89 @@
|
|
|
1
1
|
import {SQL} from "bun";
|
|
2
2
|
import { logger } from "../core/Logger";
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
// Query timeout in milliseconds (default 30s, configurable via env)
|
|
5
|
+
// This is used by Query.exec(), Entity.save(), etc.
|
|
6
|
+
export const QUERY_TIMEOUT_MS = parseInt(process.env.DB_QUERY_TIMEOUT ?? '30000', 10);
|
|
7
|
+
|
|
8
|
+
// Module-level state for the database connection
|
|
9
|
+
let _db: SQL | null = null;
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
if
|
|
12
|
-
|
|
13
|
-
const urlObj = new URL(connectionUrl);
|
|
14
|
-
urlObj.searchParams.set('options', `-c statement_timeout=${process.env.DB_STATEMENT_TIMEOUT}`);
|
|
15
|
-
connectionUrl = urlObj.toString();
|
|
16
|
-
} catch {
|
|
17
|
-
// Non-standard URL format, skip statement_timeout
|
|
11
|
+
function createDatabase(): SQL {
|
|
12
|
+
let url = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT ?? "5432"}/${process.env.POSTGRES_DB}`;
|
|
13
|
+
if(process.env.DB_CONNECTION_URL) {
|
|
14
|
+
url = process.env.DB_CONNECTION_URL;
|
|
18
15
|
}
|
|
19
|
-
}
|
|
20
16
|
|
|
21
|
-
//
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
17
|
+
// Add statement_timeout only when explicitly configured (opt-in)
|
|
18
|
+
// Note: PgBouncer rejects statement_timeout as a startup parameter
|
|
19
|
+
if (process.env.USE_PGLITE !== 'true' && process.env.DB_STATEMENT_TIMEOUT) {
|
|
20
|
+
try {
|
|
21
|
+
const urlObj = new URL(url);
|
|
22
|
+
urlObj.searchParams.set('options', `-c statement_timeout=${process.env.DB_STATEMENT_TIMEOUT}`);
|
|
23
|
+
url = urlObj.toString();
|
|
24
|
+
} catch {
|
|
25
|
+
// Non-standard URL format, skip statement_timeout
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const redactedUrl = url.replace(/:\/\/([^:]+):([^@]+)@/, '://$1:****@');
|
|
30
|
+
logger.info(`Database connection URL: ${redactedUrl}`);
|
|
31
|
+
|
|
32
|
+
const max = parseInt(process.env.POSTGRES_MAX_CONNECTIONS ?? '10', 10);
|
|
33
|
+
logger.info(`Connection pool size: ${max} connections`);
|
|
34
|
+
logger.info(`Query timeout: ${QUERY_TIMEOUT_MS}ms`);
|
|
35
|
+
|
|
36
|
+
const connTimeout = parseInt(process.env.DB_CONNECTION_TIMEOUT ?? '30', 10);
|
|
37
|
+
|
|
38
|
+
return new SQL({
|
|
39
|
+
url,
|
|
40
|
+
max,
|
|
41
|
+
idleTimeout: 30000,
|
|
42
|
+
maxLifetime: 600000,
|
|
43
|
+
connectionTimeout: connTimeout,
|
|
44
|
+
onclose: (err) => {
|
|
45
|
+
if (err) {
|
|
46
|
+
const errCode = (err as unknown as { code: string }).code;
|
|
47
|
+
if(errCode === "ERR_POSTGRES_IDLE_TIMEOUT") {
|
|
48
|
+
logger.trace("Closing connection. Idle");
|
|
49
|
+
} else if (errCode === "ERR_POSTGRES_CONNECTION_CLOSED") {
|
|
50
|
+
logger.warn("Database connection closed unexpectedly");
|
|
51
|
+
} else {
|
|
52
|
+
logger.error("Database connection closed with error:");
|
|
53
|
+
logger.error(err);
|
|
54
|
+
}
|
|
41
55
|
} else {
|
|
42
|
-
logger.
|
|
43
|
-
logger.error(err);
|
|
56
|
+
logger.trace("Database connection closed gracefully.");
|
|
44
57
|
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
58
|
+
},
|
|
59
|
+
onconnect: () => {
|
|
60
|
+
logger.trace("New database connection established");
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Get the database connection. Lazily initializes on first access.
|
|
67
|
+
* This allows env vars to be set before the first database usage.
|
|
68
|
+
*/
|
|
69
|
+
export function getDb(): SQL {
|
|
70
|
+
if (!_db) {
|
|
71
|
+
_db = createDatabase();
|
|
52
72
|
}
|
|
53
|
-
|
|
73
|
+
return _db;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Reinitialize the database connection with current env vars.
|
|
78
|
+
* Used by benchmark tests that set env vars after module load.
|
|
79
|
+
*/
|
|
80
|
+
export function resetDatabase(): void {
|
|
81
|
+
_db = createDatabase();
|
|
82
|
+
}
|
|
54
83
|
|
|
84
|
+
// For backward compatibility, initialize eagerly on import
|
|
85
|
+
// This ensures existing code using `import db from './database'` continues to work
|
|
86
|
+
// Note: For benchmarks that need delayed initialization, use getDb() or resetDatabase()
|
|
87
|
+
const db = getDb();
|
|
55
88
|
|
|
56
|
-
export default db;
|
|
89
|
+
export default db;
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# BunSane Scalability Plan: 1M+ Entities
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
At 50k entities, complex multi-component queries with sorting degrade catastrophically:
|
|
6
|
+
- 10k entities: 20ms
|
|
7
|
+
- 50k entities: 7,880ms (394x slower)
|
|
8
|
+
- Projected 1M: minutes to hours
|
|
9
|
+
|
|
10
|
+
Root cause: Cartesian product explosion in nested loop joins when sorting on JSONB fields.
|
|
11
|
+
|
|
12
|
+
## Bottleneck Analysis
|
|
13
|
+
|
|
14
|
+
### 1. Multi-Component Query Pattern (Critical)
|
|
15
|
+
|
|
16
|
+
Current SQL for 2-component query:
|
|
17
|
+
```sql
|
|
18
|
+
SELECT DISTINCT ec.entity_id
|
|
19
|
+
FROM entity_components ec
|
|
20
|
+
WHERE ec.type_id IN ($1, $2) AND ec.deleted_at IS NULL
|
|
21
|
+
GROUP BY ec.entity_id
|
|
22
|
+
HAVING COUNT(DISTINCT ec.type_id) = 2
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Problem**: Scans ALL entity_components for ALL matching types, then aggregates.
|
|
26
|
+
At 1M entities × 3 components = 3M rows scanned before filtering.
|
|
27
|
+
|
|
28
|
+
**Solution**: Use INTERSECT or EXISTS pattern:
|
|
29
|
+
```sql
|
|
30
|
+
-- Option A: INTERSECT (better for 2-3 components)
|
|
31
|
+
SELECT entity_id FROM entity_components WHERE type_id = $1 AND deleted_at IS NULL
|
|
32
|
+
INTERSECT
|
|
33
|
+
SELECT entity_id FROM entity_components WHERE type_id = $2 AND deleted_at IS NULL
|
|
34
|
+
|
|
35
|
+
-- Option B: EXISTS (better for many components)
|
|
36
|
+
SELECT DISTINCT e.entity_id
|
|
37
|
+
FROM entity_components e
|
|
38
|
+
WHERE e.type_id = $1 AND e.deleted_at IS NULL
|
|
39
|
+
AND EXISTS (SELECT 1 FROM entity_components e2
|
|
40
|
+
WHERE e2.entity_id = e.entity_id AND e2.type_id = $2 AND e2.deleted_at IS NULL)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### 2. Sorting on JSONB Fields (Critical)
|
|
44
|
+
|
|
45
|
+
Current pattern:
|
|
46
|
+
```sql
|
|
47
|
+
ORDER BY c.data->>'age' DESC
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Problem**: Can't use B-tree indexes, falls to sequential scan + in-memory sort.
|
|
51
|
+
|
|
52
|
+
**Solutions**:
|
|
53
|
+
|
|
54
|
+
A. **Expression Index** (per-field, must exist):
|
|
55
|
+
```sql
|
|
56
|
+
CREATE INDEX idx_testuser_age_btree ON components_testuser ((data->>'age'));
|
|
57
|
+
-- For numeric sorting:
|
|
58
|
+
CREATE INDEX idx_testuser_age_numeric ON components_testuser (((data->>'age')::numeric));
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
B. **Query must cast for numeric sort**:
|
|
62
|
+
```sql
|
|
63
|
+
ORDER BY (c.data->>'age')::numeric DESC -- Uses numeric index
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
C. **Covering Index** (include entity_id for index-only scan):
|
|
67
|
+
```sql
|
|
68
|
+
CREATE INDEX idx_testuser_age_covering
|
|
69
|
+
ON components_testuser ((data->>'age'), entity_id)
|
|
70
|
+
WHERE deleted_at IS NULL;
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 3. Missing Index on entities.deleted_at
|
|
74
|
+
|
|
75
|
+
Every query does `WHERE deleted_at IS NULL` on entities table.
|
|
76
|
+
|
|
77
|
+
**Fix**:
|
|
78
|
+
```sql
|
|
79
|
+
CREATE INDEX idx_entities_deleted_null ON entities (id) WHERE deleted_at IS NULL;
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 4. OFFSET Pagination Scaling
|
|
83
|
+
|
|
84
|
+
`OFFSET 900000` requires scanning 900k rows to skip them.
|
|
85
|
+
|
|
86
|
+
**Already implemented**: `cursor(entityId)` pagination in Query.ts.
|
|
87
|
+
**Action**: Document as required pattern for large datasets.
|
|
88
|
+
|
|
89
|
+
## Implementation Plan
|
|
90
|
+
|
|
91
|
+
### Phase 1: Quick Wins (Immediate)
|
|
92
|
+
|
|
93
|
+
1. **Add missing index on entities**
|
|
94
|
+
- File: `database/DatabaseHelper.ts`
|
|
95
|
+
- Add: `idx_entities_deleted_null`
|
|
96
|
+
|
|
97
|
+
2. **Numeric cast in ORDER BY**
|
|
98
|
+
- File: `query/ComponentInclusionNode.ts`
|
|
99
|
+
- Detect numeric fields and add `::numeric` cast
|
|
100
|
+
|
|
101
|
+
3. **Use INTERSECT for 2-3 component queries**
|
|
102
|
+
- File: `query/ComponentInclusionNode.ts`
|
|
103
|
+
- Threshold: Use INTERSECT when componentIds.size <= 3
|
|
104
|
+
|
|
105
|
+
### Phase 2: Index Strategy (Short-term)
|
|
106
|
+
|
|
107
|
+
4. **Auto-create expression indexes for sortable fields**
|
|
108
|
+
- File: `database/IndexingStrategy.ts`
|
|
109
|
+
- Add: `createSortIndex(table, field, type: 'text' | 'numeric' | 'date')`
|
|
110
|
+
|
|
111
|
+
5. **Query hints for sort fields**
|
|
112
|
+
- New decorator: `@SortableField(type)`
|
|
113
|
+
- Creates appropriate expression index at registration
|
|
114
|
+
|
|
115
|
+
### Phase 3: Query Restructuring (Medium-term)
|
|
116
|
+
|
|
117
|
+
6. **EXISTS pattern for multi-component with filters**
|
|
118
|
+
- Rewrite CTE to use correlated EXISTS
|
|
119
|
+
- Push filters into EXISTS subqueries
|
|
120
|
+
|
|
121
|
+
7. **Batch entity lookup optimization**
|
|
122
|
+
- Use `= ANY($1::uuid[])` instead of `IN (...)` for large ID lists
|
|
123
|
+
- Better plan caching with array parameter
|
|
124
|
+
|
|
125
|
+
### Phase 4: Denormalization Options (Long-term)
|
|
126
|
+
|
|
127
|
+
8. **entity_component_summary table**
|
|
128
|
+
```sql
|
|
129
|
+
CREATE TABLE entity_component_summary (
|
|
130
|
+
entity_id UUID PRIMARY KEY,
|
|
131
|
+
component_types TEXT[], -- Array of type_ids
|
|
132
|
+
updated_at TIMESTAMP
|
|
133
|
+
);
|
|
134
|
+
CREATE INDEX idx_ecs_types_gin ON entity_component_summary USING GIN (component_types);
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Query pattern:
|
|
138
|
+
```sql
|
|
139
|
+
SELECT entity_id FROM entity_component_summary
|
|
140
|
+
WHERE component_types @> ARRAY[$1, $2]::text[]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
9. **Materialized views for hot paths**
|
|
144
|
+
- Pre-join common component combinations
|
|
145
|
+
- Refresh on schedule or trigger
|
|
146
|
+
|
|
147
|
+
## Benchmarks Required
|
|
148
|
+
|
|
149
|
+
| Scenario | Target (1M entities) |
|
|
150
|
+
|----------|---------------------|
|
|
151
|
+
| Single component, no filter | < 50ms |
|
|
152
|
+
| Single component, indexed filter | < 20ms |
|
|
153
|
+
| 2-component intersection | < 100ms |
|
|
154
|
+
| 3-component intersection | < 200ms |
|
|
155
|
+
| Sort on indexed field, limit 100 | < 50ms |
|
|
156
|
+
| Complex (2-comp + filter + sort) | < 500ms |
|
|
157
|
+
| Count | < 100ms |
|
|
158
|
+
| Cursor pagination (any page) | < 50ms |
|
|
159
|
+
|
|
160
|
+
## Migration Strategy
|
|
161
|
+
|
|
162
|
+
1. New indexes are additive (no breaking changes)
|
|
163
|
+
2. Query changes behind feature flag: `BUNSANE_QUERY_V2=true`
|
|
164
|
+
3. Gradual rollout with A/B testing on query performance
|
|
165
|
+
4. Deprecate old patterns after validation
|
|
166
|
+
|
|
167
|
+
## Files to Modify
|
|
168
|
+
|
|
169
|
+
- `database/DatabaseHelper.ts` - Add entity index
|
|
170
|
+
- `database/IndexingStrategy.ts` - Sort index creation
|
|
171
|
+
- `query/ComponentInclusionNode.ts` - INTERSECT pattern, numeric cast
|
|
172
|
+
- `query/QueryDAG.ts` - Component count threshold for strategy selection
|
|
173
|
+
- `core/components/Decorators.ts` - @SortableField decorator
|
|
174
|
+
- New: `query/strategies/IntersectStrategy.ts`
|
|
175
|
+
- New: `query/strategies/ExistsStrategy.ts`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bunsane",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.8",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "yaaruu"
|
|
6
6
|
},
|
|
@@ -29,7 +29,18 @@
|
|
|
29
29
|
"test:stress": "bun test tests/stress --timeout 600000",
|
|
30
30
|
"test:stress:smoke": "STRESS_RECORD_COUNT=10000 bun test tests/stress --timeout 300000",
|
|
31
31
|
"test:stress:standard": "STRESS_RECORD_COUNT=100000 bun test tests/stress --timeout 600000",
|
|
32
|
-
"test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000"
|
|
32
|
+
"test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000",
|
|
33
|
+
"bench:generate:xs": "bun tests/benchmark/scripts/generate-db.ts xs",
|
|
34
|
+
"bench:generate:sm": "bun tests/benchmark/scripts/generate-db.ts sm",
|
|
35
|
+
"bench:generate:md": "bun tests/benchmark/scripts/generate-db.ts md",
|
|
36
|
+
"bench:generate:lg": "bun tests/benchmark/scripts/generate-db.ts lg",
|
|
37
|
+
"bench:generate:xl": "bun tests/benchmark/scripts/generate-db.ts xl",
|
|
38
|
+
"bench:generate:all": "bun tests/benchmark/scripts/generate-db.ts --all",
|
|
39
|
+
"bench:run:xs": "bun tests/benchmark/scripts/run-benchmarks.ts xs",
|
|
40
|
+
"bench:run:sm": "bun tests/benchmark/scripts/run-benchmarks.ts sm",
|
|
41
|
+
"bench:run:md": "bun tests/benchmark/scripts/run-benchmarks.ts md",
|
|
42
|
+
"bench:run:lg": "bun tests/benchmark/scripts/run-benchmarks.ts lg",
|
|
43
|
+
"bench:run:xl": "bun tests/benchmark/scripts/run-benchmarks.ts xl"
|
|
33
44
|
},
|
|
34
45
|
"devDependencies": {
|
|
35
46
|
"@electric-sql/pglite": "^0.3.15",
|
package/query/CTENode.ts
CHANGED
|
@@ -13,49 +13,69 @@ export class CTENode extends QueryNode {
|
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
let cteSql = "WITH base_entities AS (\n";
|
|
16
|
-
cteSql += " SELECT DISTINCT ec.entity_id\n";
|
|
17
|
-
cteSql += " FROM entity_components ec\n";
|
|
18
|
-
cteSql += " WHERE ec.type_id IN (";
|
|
19
16
|
|
|
20
|
-
//
|
|
21
|
-
|
|
22
|
-
cteSql += typePlaceholders + ")\n";
|
|
23
|
-
cteSql += " AND ec.deleted_at IS NULL\n";
|
|
24
|
-
|
|
25
|
-
// Add cursor-based pagination filter in CTE (more efficient than OFFSET)
|
|
17
|
+
// Build cursor condition for reuse across INTERSECT queries
|
|
18
|
+
let cursorCondition = "";
|
|
26
19
|
if (context.cursorId !== null) {
|
|
27
20
|
const operator = context.cursorDirection === 'after' ? '>' : '<';
|
|
28
|
-
|
|
21
|
+
cursorCondition = ` AND ec.entity_id ${operator} $${context.addParam(context.cursorId)}`;
|
|
29
22
|
}
|
|
30
23
|
|
|
31
|
-
//
|
|
24
|
+
// Build exclusion condition for reuse across INTERSECT queries
|
|
25
|
+
let exclusionCondition = "";
|
|
32
26
|
if (excludedIds.length > 0) {
|
|
33
27
|
const excludedPlaceholders = excludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
28
|
+
exclusionCondition = ` AND NOT EXISTS (
|
|
29
|
+
SELECT 1 FROM entity_components ec_ex
|
|
30
|
+
WHERE ec_ex.entity_id = ec.entity_id
|
|
31
|
+
AND ec_ex.type_id IN (${excludedPlaceholders})
|
|
32
|
+
AND ec_ex.deleted_at IS NULL
|
|
33
|
+
)`;
|
|
40
34
|
}
|
|
41
35
|
|
|
42
|
-
//
|
|
36
|
+
// Build entity exclusion condition for reuse
|
|
37
|
+
let entityExclusionCondition = "";
|
|
43
38
|
if (context.excludedEntityIds.size > 0) {
|
|
44
39
|
const entityExcludedIds = Array.from(context.excludedEntityIds);
|
|
45
40
|
const entityPlaceholders = entityExcludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
|
|
46
|
-
|
|
41
|
+
entityExclusionCondition = ` AND ec.entity_id NOT IN (${entityPlaceholders})`;
|
|
47
42
|
}
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
44
|
+
if (componentIds.length === 1) {
|
|
45
|
+
// Single component - simple query, no INTERSECT needed
|
|
46
|
+
const paramIdx = context.addParam(componentIds[0]);
|
|
47
|
+
cteSql += ` SELECT DISTINCT ec.entity_id\n`;
|
|
48
|
+
cteSql += ` FROM entity_components ec\n`;
|
|
49
|
+
cteSql += ` WHERE ec.type_id = $${paramIdx}::text\n`;
|
|
50
|
+
cteSql += ` AND ec.deleted_at IS NULL\n`;
|
|
51
|
+
if (cursorCondition) cteSql += ` ${cursorCondition.trim()}\n`;
|
|
52
|
+
if (exclusionCondition) cteSql += ` ${exclusionCondition.trim()}\n`;
|
|
53
|
+
if (entityExclusionCondition) cteSql += ` ${entityExclusionCondition.trim()}\n`;
|
|
54
|
+
} else {
|
|
55
|
+
// Multiple components - use INTERSECT for much faster queries
|
|
56
|
+
// INTERSECT allows PostgreSQL to use index scans independently per component
|
|
57
|
+
// then efficiently merge results, avoiding Cartesian product explosion
|
|
58
|
+
const intersectQueries = componentIds.map((compId) => {
|
|
59
|
+
const paramIdx = context.addParam(compId);
|
|
60
|
+
let subquery = `SELECT ec.entity_id FROM entity_components ec WHERE ec.type_id = $${paramIdx}::text AND ec.deleted_at IS NULL`;
|
|
61
|
+
// Add cursor/exclusion conditions to each subquery for efficiency
|
|
62
|
+
if (cursorCondition) subquery += cursorCondition;
|
|
63
|
+
if (exclusionCondition) subquery += exclusionCondition;
|
|
64
|
+
if (entityExclusionCondition) subquery += entityExclusionCondition;
|
|
65
|
+
return `(${subquery})`;
|
|
66
|
+
});
|
|
67
|
+
cteSql += ` SELECT entity_id FROM (\n`;
|
|
68
|
+
cteSql += ` ${intersectQueries.join('\n INTERSECT\n ')}\n`;
|
|
69
|
+
cteSql += ` ) AS intersected\n`;
|
|
70
|
+
}
|
|
53
71
|
|
|
54
72
|
// Add ORDER BY for deterministic pagination results
|
|
55
73
|
// Must be before LIMIT/OFFSET for consistent page results
|
|
56
74
|
// Reverse order for 'before' cursor direction
|
|
57
75
|
const orderDirection = context.cursorDirection === 'before' ? 'DESC' : 'ASC';
|
|
58
|
-
|
|
76
|
+
// Use correct column reference based on query structure
|
|
77
|
+
const orderColumn = componentIds.length === 1 ? 'ec.entity_id' : 'entity_id';
|
|
78
|
+
cteSql += ` ORDER BY ${orderColumn} ${orderDirection}\n`;
|
|
59
79
|
|
|
60
80
|
// Check if there are component filters - if so, pagination must happen AFTER filtering
|
|
61
81
|
// Otherwise we'd limit results before applying filters, causing incorrect results
|