bunsane 0.2.3 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/cache.config.ts +2 -0
- package/core/ArcheType.ts +67 -34
- package/core/BatchLoader.ts +215 -30
- package/core/Entity.ts +2 -2
- package/core/RequestContext.ts +15 -10
- package/core/RequestLoaders.ts +4 -2
- package/core/cache/CacheFactory.ts +3 -1
- package/core/cache/CacheProvider.ts +1 -0
- package/core/cache/CacheWarmer.ts +45 -23
- package/core/cache/MemoryCache.ts +10 -1
- package/core/cache/RedisCache.ts +26 -7
- package/core/validateEnv.ts +8 -0
- package/database/DatabaseHelper.ts +113 -1
- package/database/index.ts +78 -45
- package/docs/SCALABILITY_PLAN.md +175 -0
- package/package.json +13 -2
- package/query/CTENode.ts +44 -24
- package/query/ComponentInclusionNode.ts +181 -91
- package/query/Query.ts +9 -9
- package/tests/benchmark/BENCHMARK_DATABASES_PLAN.md +338 -0
- package/tests/benchmark/bunfig.toml +9 -0
- package/tests/benchmark/fixtures/EcommerceComponents.ts +283 -0
- package/tests/benchmark/fixtures/EcommerceDataGenerators.ts +301 -0
- package/tests/benchmark/fixtures/RelationTracker.ts +159 -0
- package/tests/benchmark/fixtures/index.ts +6 -0
- package/tests/benchmark/index.ts +22 -0
- package/tests/benchmark/noop-preload.ts +3 -0
- package/tests/benchmark/runners/BenchmarkLoader.ts +132 -0
- package/tests/benchmark/runners/index.ts +4 -0
- package/tests/benchmark/scenarios/query-benchmarks.test.ts +465 -0
- package/tests/benchmark/scripts/generate-db.ts +344 -0
- package/tests/benchmark/scripts/run-benchmarks.ts +97 -0
- package/tests/integration/query/Query.complexAnalysis.test.ts +557 -0
- package/tests/integration/query/Query.explainAnalyze.test.ts +233 -0
- package/tests/stress/fixtures/RealisticComponents.ts +235 -0
- package/tests/stress/scenarios/realistic-scenarios.test.ts +1081 -0
- package/tests/stress/scenarios/timeout-investigation.test.ts +522 -0
- package/tests/unit/BatchLoader.test.ts +139 -25
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# BunSane Scalability Plan: 1M+ Entities
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
At 50k entities, complex multi-component queries with sorting degrade catastrophically:
|
|
6
|
+
- 10k entities: 20ms
|
|
7
|
+
- 50k entities: 7,880ms (394x slower)
|
|
8
|
+
- Projected 1M: minutes to hours
|
|
9
|
+
|
|
10
|
+
Root cause: Cartesian product explosion in nested loop joins when sorting on JSONB fields.
|
|
11
|
+
|
|
12
|
+
## Bottleneck Analysis
|
|
13
|
+
|
|
14
|
+
### 1. Multi-Component Query Pattern (Critical)
|
|
15
|
+
|
|
16
|
+
Current SQL for 2-component query:
|
|
17
|
+
```sql
|
|
18
|
+
SELECT DISTINCT ec.entity_id
|
|
19
|
+
FROM entity_components ec
|
|
20
|
+
WHERE ec.type_id IN ($1, $2) AND ec.deleted_at IS NULL
|
|
21
|
+
GROUP BY ec.entity_id
|
|
22
|
+
HAVING COUNT(DISTINCT ec.type_id) = 2
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Problem**: Scans ALL entity_components for ALL matching types, then aggregates.
|
|
26
|
+
At 1M entities × 3 components = 3M rows scanned before filtering.
|
|
27
|
+
|
|
28
|
+
**Solution**: Use INTERSECT or EXISTS pattern:
|
|
29
|
+
```sql
|
|
30
|
+
-- Option A: INTERSECT (better for 2-3 components)
|
|
31
|
+
SELECT entity_id FROM entity_components WHERE type_id = $1 AND deleted_at IS NULL
|
|
32
|
+
INTERSECT
|
|
33
|
+
SELECT entity_id FROM entity_components WHERE type_id = $2 AND deleted_at IS NULL
|
|
34
|
+
|
|
35
|
+
-- Option B: EXISTS (better for many components)
|
|
36
|
+
SELECT DISTINCT e.entity_id
|
|
37
|
+
FROM entity_components e
|
|
38
|
+
WHERE e.type_id = $1 AND e.deleted_at IS NULL
|
|
39
|
+
AND EXISTS (SELECT 1 FROM entity_components e2
|
|
40
|
+
WHERE e2.entity_id = e.entity_id AND e2.type_id = $2 AND e2.deleted_at IS NULL)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### 2. Sorting on JSONB Fields (Critical)
|
|
44
|
+
|
|
45
|
+
Current pattern:
|
|
46
|
+
```sql
|
|
47
|
+
ORDER BY c.data->>'age' DESC
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Problem**: Can't use B-tree indexes, falls to sequential scan + in-memory sort.
|
|
51
|
+
|
|
52
|
+
**Solutions**:
|
|
53
|
+
|
|
54
|
+
A. **Expression Index** (per-field, must exist):
|
|
55
|
+
```sql
|
|
56
|
+
CREATE INDEX idx_testuser_age_btree ON components_testuser ((data->>'age'));
|
|
57
|
+
-- For numeric sorting:
|
|
58
|
+
CREATE INDEX idx_testuser_age_numeric ON components_testuser (((data->>'age')::numeric));
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
B. **Query must cast for numeric sort**:
|
|
62
|
+
```sql
|
|
63
|
+
ORDER BY (c.data->>'age')::numeric DESC -- Uses numeric index
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
C. **Covering Index** (include entity_id for index-only scan):
|
|
67
|
+
```sql
|
|
68
|
+
CREATE INDEX idx_testuser_age_covering
|
|
69
|
+
ON components_testuser ((data->>'age'), entity_id)
|
|
70
|
+
WHERE deleted_at IS NULL;
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 3. Missing Index on entities.deleted_at
|
|
74
|
+
|
|
75
|
+
Every query does `WHERE deleted_at IS NULL` on entities table.
|
|
76
|
+
|
|
77
|
+
**Fix**:
|
|
78
|
+
```sql
|
|
79
|
+
CREATE INDEX idx_entities_deleted_null ON entities (id) WHERE deleted_at IS NULL;
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 4. OFFSET Pagination Scaling
|
|
83
|
+
|
|
84
|
+
`OFFSET 900000` requires scanning 900k rows to skip them.
|
|
85
|
+
|
|
86
|
+
**Already implemented**: `cursor(entityId)` pagination in Query.ts.
|
|
87
|
+
**Action**: Document as required pattern for large datasets.
|
|
88
|
+
|
|
89
|
+
## Implementation Plan
|
|
90
|
+
|
|
91
|
+
### Phase 1: Quick Wins (Immediate)
|
|
92
|
+
|
|
93
|
+
1. **Add missing index on entities**
|
|
94
|
+
- File: `database/DatabaseHelper.ts`
|
|
95
|
+
- Add: `idx_entities_deleted_null`
|
|
96
|
+
|
|
97
|
+
2. **Numeric cast in ORDER BY**
|
|
98
|
+
- File: `query/ComponentInclusionNode.ts`
|
|
99
|
+
- Detect numeric fields and add `::numeric` cast
|
|
100
|
+
|
|
101
|
+
3. **Use INTERSECT for 2-3 component queries**
|
|
102
|
+
- File: `query/ComponentInclusionNode.ts`
|
|
103
|
+
- Threshold: Use INTERSECT when componentIds.size <= 3
|
|
104
|
+
|
|
105
|
+
### Phase 2: Index Strategy (Short-term)
|
|
106
|
+
|
|
107
|
+
4. **Auto-create expression indexes for sortable fields**
|
|
108
|
+
- File: `database/IndexingStrategy.ts`
|
|
109
|
+
- Add: `createSortIndex(table, field, type: 'text' | 'numeric' | 'date')`
|
|
110
|
+
|
|
111
|
+
5. **Query hints for sort fields**
|
|
112
|
+
- New decorator: `@SortableField(type)`
|
|
113
|
+
- Creates appropriate expression index at registration
|
|
114
|
+
|
|
115
|
+
### Phase 3: Query Restructuring (Medium-term)
|
|
116
|
+
|
|
117
|
+
6. **EXISTS pattern for multi-component with filters**
|
|
118
|
+
- Rewrite CTE to use correlated EXISTS
|
|
119
|
+
- Push filters into EXISTS subqueries
|
|
120
|
+
|
|
121
|
+
7. **Batch entity lookup optimization**
|
|
122
|
+
- Use `= ANY($1::uuid[])` instead of `IN (...)` for large ID lists
|
|
123
|
+
- Better plan caching with array parameter
|
|
124
|
+
|
|
125
|
+
### Phase 4: Denormalization Options (Long-term)
|
|
126
|
+
|
|
127
|
+
8. **entity_component_summary table**
|
|
128
|
+
```sql
|
|
129
|
+
CREATE TABLE entity_component_summary (
|
|
130
|
+
entity_id UUID PRIMARY KEY,
|
|
131
|
+
component_types TEXT[], -- Array of type_ids
|
|
132
|
+
updated_at TIMESTAMP
|
|
133
|
+
);
|
|
134
|
+
CREATE INDEX idx_ecs_types_gin ON entity_component_summary USING GIN (component_types);
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Query pattern:
|
|
138
|
+
```sql
|
|
139
|
+
SELECT entity_id FROM entity_component_summary
|
|
140
|
+
WHERE component_types @> ARRAY[$1, $2]::text[]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
9. **Materialized views for hot paths**
|
|
144
|
+
- Pre-join common component combinations
|
|
145
|
+
- Refresh on schedule or trigger
|
|
146
|
+
|
|
147
|
+
## Benchmarks Required
|
|
148
|
+
|
|
149
|
+
| Scenario | Target (1M entities) |
|
|
150
|
+
|----------|---------------------|
|
|
151
|
+
| Single component, no filter | < 50ms |
|
|
152
|
+
| Single component, indexed filter | < 20ms |
|
|
153
|
+
| 2-component intersection | < 100ms |
|
|
154
|
+
| 3-component intersection | < 200ms |
|
|
155
|
+
| Sort on indexed field, limit 100 | < 50ms |
|
|
156
|
+
| Complex (2-comp + filter + sort) | < 500ms |
|
|
157
|
+
| Count | < 100ms |
|
|
158
|
+
| Cursor pagination (any page) | < 50ms |
|
|
159
|
+
|
|
160
|
+
## Migration Strategy
|
|
161
|
+
|
|
162
|
+
1. New indexes are additive (no breaking changes)
|
|
163
|
+
2. Query changes behind feature flag: `BUNSANE_QUERY_V2=true`
|
|
164
|
+
3. Gradual rollout with A/B testing on query performance
|
|
165
|
+
4. Deprecate old patterns after validation
|
|
166
|
+
|
|
167
|
+
## Files to Modify
|
|
168
|
+
|
|
169
|
+
- `database/DatabaseHelper.ts` - Add entity index
|
|
170
|
+
- `database/IndexingStrategy.ts` - Sort index creation
|
|
171
|
+
- `query/ComponentInclusionNode.ts` - INTERSECT pattern, numeric cast
|
|
172
|
+
- `query/QueryDAG.ts` - Component count threshold for strategy selection
|
|
173
|
+
- `core/components/Decorators.ts` - @SortableField decorator
|
|
174
|
+
- New: `query/strategies/IntersectStrategy.ts`
|
|
175
|
+
- New: `query/strategies/ExistsStrategy.ts`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bunsane",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "yaaruu"
|
|
6
6
|
},
|
|
@@ -29,7 +29,18 @@
|
|
|
29
29
|
"test:stress": "bun test tests/stress --timeout 600000",
|
|
30
30
|
"test:stress:smoke": "STRESS_RECORD_COUNT=10000 bun test tests/stress --timeout 300000",
|
|
31
31
|
"test:stress:standard": "STRESS_RECORD_COUNT=100000 bun test tests/stress --timeout 600000",
|
|
32
|
-
"test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000"
|
|
32
|
+
"test:stress:full": "STRESS_RECORD_COUNT=1000000 bun test tests/stress --timeout 1800000",
|
|
33
|
+
"bench:generate:xs": "bun tests/benchmark/scripts/generate-db.ts xs",
|
|
34
|
+
"bench:generate:sm": "bun tests/benchmark/scripts/generate-db.ts sm",
|
|
35
|
+
"bench:generate:md": "bun tests/benchmark/scripts/generate-db.ts md",
|
|
36
|
+
"bench:generate:lg": "bun tests/benchmark/scripts/generate-db.ts lg",
|
|
37
|
+
"bench:generate:xl": "bun tests/benchmark/scripts/generate-db.ts xl",
|
|
38
|
+
"bench:generate:all": "bun tests/benchmark/scripts/generate-db.ts --all",
|
|
39
|
+
"bench:run:xs": "bun tests/benchmark/scripts/run-benchmarks.ts xs",
|
|
40
|
+
"bench:run:sm": "bun tests/benchmark/scripts/run-benchmarks.ts sm",
|
|
41
|
+
"bench:run:md": "bun tests/benchmark/scripts/run-benchmarks.ts md",
|
|
42
|
+
"bench:run:lg": "bun tests/benchmark/scripts/run-benchmarks.ts lg",
|
|
43
|
+
"bench:run:xl": "bun tests/benchmark/scripts/run-benchmarks.ts xl"
|
|
33
44
|
},
|
|
34
45
|
"devDependencies": {
|
|
35
46
|
"@electric-sql/pglite": "^0.3.15",
|
package/query/CTENode.ts
CHANGED
|
@@ -13,49 +13,69 @@ export class CTENode extends QueryNode {
|
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
let cteSql = "WITH base_entities AS (\n";
|
|
16
|
-
cteSql += " SELECT DISTINCT ec.entity_id\n";
|
|
17
|
-
cteSql += " FROM entity_components ec\n";
|
|
18
|
-
cteSql += " WHERE ec.type_id IN (";
|
|
19
16
|
|
|
20
|
-
//
|
|
21
|
-
|
|
22
|
-
cteSql += typePlaceholders + ")\n";
|
|
23
|
-
cteSql += " AND ec.deleted_at IS NULL\n";
|
|
24
|
-
|
|
25
|
-
// Add cursor-based pagination filter in CTE (more efficient than OFFSET)
|
|
17
|
+
// Build cursor condition for reuse across INTERSECT queries
|
|
18
|
+
let cursorCondition = "";
|
|
26
19
|
if (context.cursorId !== null) {
|
|
27
20
|
const operator = context.cursorDirection === 'after' ? '>' : '<';
|
|
28
|
-
|
|
21
|
+
cursorCondition = ` AND ec.entity_id ${operator} $${context.addParam(context.cursorId)}`;
|
|
29
22
|
}
|
|
30
23
|
|
|
31
|
-
//
|
|
24
|
+
// Build exclusion condition for reuse across INTERSECT queries
|
|
25
|
+
let exclusionCondition = "";
|
|
32
26
|
if (excludedIds.length > 0) {
|
|
33
27
|
const excludedPlaceholders = excludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
28
|
+
exclusionCondition = ` AND NOT EXISTS (
|
|
29
|
+
SELECT 1 FROM entity_components ec_ex
|
|
30
|
+
WHERE ec_ex.entity_id = ec.entity_id
|
|
31
|
+
AND ec_ex.type_id IN (${excludedPlaceholders})
|
|
32
|
+
AND ec_ex.deleted_at IS NULL
|
|
33
|
+
)`;
|
|
40
34
|
}
|
|
41
35
|
|
|
42
|
-
//
|
|
36
|
+
// Build entity exclusion condition for reuse
|
|
37
|
+
let entityExclusionCondition = "";
|
|
43
38
|
if (context.excludedEntityIds.size > 0) {
|
|
44
39
|
const entityExcludedIds = Array.from(context.excludedEntityIds);
|
|
45
40
|
const entityPlaceholders = entityExcludedIds.map((id) => `$${context.addParam(id)}`).join(', ');
|
|
46
|
-
|
|
41
|
+
entityExclusionCondition = ` AND ec.entity_id NOT IN (${entityPlaceholders})`;
|
|
47
42
|
}
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
44
|
+
if (componentIds.length === 1) {
|
|
45
|
+
// Single component - simple query, no INTERSECT needed
|
|
46
|
+
const paramIdx = context.addParam(componentIds[0]);
|
|
47
|
+
cteSql += ` SELECT DISTINCT ec.entity_id\n`;
|
|
48
|
+
cteSql += ` FROM entity_components ec\n`;
|
|
49
|
+
cteSql += ` WHERE ec.type_id = $${paramIdx}::text\n`;
|
|
50
|
+
cteSql += ` AND ec.deleted_at IS NULL\n`;
|
|
51
|
+
if (cursorCondition) cteSql += ` ${cursorCondition.trim()}\n`;
|
|
52
|
+
if (exclusionCondition) cteSql += ` ${exclusionCondition.trim()}\n`;
|
|
53
|
+
if (entityExclusionCondition) cteSql += ` ${entityExclusionCondition.trim()}\n`;
|
|
54
|
+
} else {
|
|
55
|
+
// Multiple components - use INTERSECT for much faster queries
|
|
56
|
+
// INTERSECT allows PostgreSQL to use index scans independently per component
|
|
57
|
+
// then efficiently merge results, avoiding Cartesian product explosion
|
|
58
|
+
const intersectQueries = componentIds.map((compId) => {
|
|
59
|
+
const paramIdx = context.addParam(compId);
|
|
60
|
+
let subquery = `SELECT ec.entity_id FROM entity_components ec WHERE ec.type_id = $${paramIdx}::text AND ec.deleted_at IS NULL`;
|
|
61
|
+
// Add cursor/exclusion conditions to each subquery for efficiency
|
|
62
|
+
if (cursorCondition) subquery += cursorCondition;
|
|
63
|
+
if (exclusionCondition) subquery += exclusionCondition;
|
|
64
|
+
if (entityExclusionCondition) subquery += entityExclusionCondition;
|
|
65
|
+
return `(${subquery})`;
|
|
66
|
+
});
|
|
67
|
+
cteSql += ` SELECT entity_id FROM (\n`;
|
|
68
|
+
cteSql += ` ${intersectQueries.join('\n INTERSECT\n ')}\n`;
|
|
69
|
+
cteSql += ` ) AS intersected\n`;
|
|
70
|
+
}
|
|
53
71
|
|
|
54
72
|
// Add ORDER BY for deterministic pagination results
|
|
55
73
|
// Must be before LIMIT/OFFSET for consistent page results
|
|
56
74
|
// Reverse order for 'before' cursor direction
|
|
57
75
|
const orderDirection = context.cursorDirection === 'before' ? 'DESC' : 'ASC';
|
|
58
|
-
|
|
76
|
+
// Use correct column reference based on query structure
|
|
77
|
+
const orderColumn = componentIds.length === 1 ? 'ec.entity_id' : 'entity_id';
|
|
78
|
+
cteSql += ` ORDER BY ${orderColumn} ${orderDirection}\n`;
|
|
59
79
|
|
|
60
80
|
// Check if there are component filters - if so, pagination must happen AFTER filtering
|
|
61
81
|
// Otherwise we'd limit results before applying filters, causing incorrect results
|