@tanstack/db 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/collection.cjs +60 -19
- package/dist/cjs/collection.cjs.map +1 -1
- package/dist/cjs/collection.d.cts +27 -6
- package/dist/cjs/local-only.cjs +2 -1
- package/dist/cjs/local-only.cjs.map +1 -1
- package/dist/cjs/local-storage.cjs +2 -1
- package/dist/cjs/local-storage.cjs.map +1 -1
- package/dist/cjs/proxy.cjs +105 -11
- package/dist/cjs/proxy.cjs.map +1 -1
- package/dist/cjs/proxy.d.cts +8 -0
- package/dist/cjs/query/builder/index.cjs +72 -0
- package/dist/cjs/query/builder/index.cjs.map +1 -1
- package/dist/cjs/query/builder/index.d.cts +64 -0
- package/dist/cjs/query/compiler/index.cjs +44 -8
- package/dist/cjs/query/compiler/index.cjs.map +1 -1
- package/dist/cjs/query/compiler/index.d.cts +4 -7
- package/dist/cjs/query/compiler/joins.cjs +14 -6
- package/dist/cjs/query/compiler/joins.cjs.map +1 -1
- package/dist/cjs/query/compiler/joins.d.cts +4 -8
- package/dist/cjs/query/compiler/types.d.cts +10 -0
- package/dist/cjs/query/live-query-collection.cjs +2 -1
- package/dist/cjs/query/live-query-collection.cjs.map +1 -1
- package/dist/cjs/query/optimizer.cjs +283 -0
- package/dist/cjs/query/optimizer.cjs.map +1 -0
- package/dist/cjs/query/optimizer.d.cts +42 -0
- package/dist/cjs/types.d.cts +1 -0
- package/dist/cjs/utils.cjs +42 -0
- package/dist/cjs/utils.cjs.map +1 -0
- package/dist/cjs/utils.d.cts +18 -0
- package/dist/esm/collection.d.ts +27 -6
- package/dist/esm/collection.js +60 -19
- package/dist/esm/collection.js.map +1 -1
- package/dist/esm/local-only.js +2 -1
- package/dist/esm/local-only.js.map +1 -1
- package/dist/esm/local-storage.js +2 -1
- package/dist/esm/local-storage.js.map +1 -1
- package/dist/esm/proxy.d.ts +8 -0
- package/dist/esm/proxy.js +105 -11
- package/dist/esm/proxy.js.map +1 -1
- package/dist/esm/query/builder/index.d.ts +64 -0
- package/dist/esm/query/builder/index.js +72 -0
- package/dist/esm/query/builder/index.js.map +1 -1
- package/dist/esm/query/compiler/index.d.ts +4 -7
- package/dist/esm/query/compiler/index.js +44 -8
- package/dist/esm/query/compiler/index.js.map +1 -1
- package/dist/esm/query/compiler/joins.d.ts +4 -8
- package/dist/esm/query/compiler/joins.js +14 -6
- package/dist/esm/query/compiler/joins.js.map +1 -1
- package/dist/esm/query/compiler/types.d.ts +10 -0
- package/dist/esm/query/live-query-collection.js +2 -1
- package/dist/esm/query/live-query-collection.js.map +1 -1
- package/dist/esm/query/optimizer.d.ts +42 -0
- package/dist/esm/query/optimizer.js +283 -0
- package/dist/esm/query/optimizer.js.map +1 -0
- package/dist/esm/types.d.ts +1 -0
- package/dist/esm/utils.d.ts +18 -0
- package/dist/esm/utils.js +42 -0
- package/dist/esm/utils.js.map +1 -0
- package/package.json +1 -1
- package/src/collection.ts +75 -26
- package/src/local-only.ts +4 -1
- package/src/local-storage.ts +4 -1
- package/src/proxy.ts +152 -24
- package/src/query/builder/index.ts +104 -0
- package/src/query/compiler/index.ts +85 -18
- package/src/query/compiler/joins.ts +21 -13
- package/src/query/compiler/types.ts +12 -0
- package/src/query/live-query-collection.ts +3 -1
- package/src/query/optimizer.ts +738 -0
- package/src/types.ts +1 -0
- package/src/utils.ts +86 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* # Query Optimizer
|
|
3
|
+
*
|
|
4
|
+
* The query optimizer improves query performance by implementing predicate pushdown optimization.
|
|
5
|
+
* It rewrites the intermediate representation (IR) to push WHERE clauses as close to the data
|
|
6
|
+
* source as possible, reducing the amount of data processed during joins.
|
|
7
|
+
*
|
|
8
|
+
* ## How It Works
|
|
9
|
+
*
|
|
10
|
+
* The optimizer follows a 4-step process:
|
|
11
|
+
*
|
|
12
|
+
* ### 1. AND Clause Splitting
|
|
13
|
+
* Splits AND clauses at the root level into separate WHERE clauses for granular optimization.
|
|
14
|
+
* ```javascript
|
|
15
|
+
* // Before: WHERE and(eq(users.department_id, 1), gt(users.age, 25))
|
|
16
|
+
* // After: WHERE eq(users.department_id, 1) + WHERE gt(users.age, 25)
|
|
17
|
+
* ```
|
|
18
|
+
*
|
|
19
|
+
* ### 2. Source Analysis
|
|
20
|
+
* Analyzes each WHERE clause to determine which table sources it references:
|
|
21
|
+
* - Single-source clauses: Touch only one table (e.g., `users.department_id = 1`)
|
|
22
|
+
* - Multi-source clauses: Touch multiple tables (e.g., `users.id = posts.user_id`)
|
|
23
|
+
*
|
|
24
|
+
* ### 3. Clause Grouping
|
|
25
|
+
* Groups WHERE clauses by the sources they touch:
|
|
26
|
+
* - Single-source clauses are grouped by their respective table
|
|
27
|
+
* - Multi-source clauses are combined for the main query
|
|
28
|
+
*
|
|
29
|
+
* ### 4. Subquery Creation
|
|
30
|
+
* Lifts single-source WHERE clauses into subqueries that wrap the original table references.
|
|
31
|
+
*
|
|
32
|
+
* ## Safety & Edge Cases
|
|
33
|
+
*
|
|
34
|
+
* The optimizer includes targeted safety checks to prevent predicate pushdown when it could
|
|
35
|
+
* break query semantics:
|
|
36
|
+
*
|
|
37
|
+
* ### Always Safe Operations
|
|
38
|
+
* - **Creating new subqueries**: Wrapping collection references in subqueries with WHERE clauses
|
|
39
|
+
* - **Main query optimizations**: Moving single-source WHERE clauses from main query to subqueries
|
|
40
|
+
* - **Queries with aggregates/ORDER BY/HAVING**: Can still create new filtered subqueries
|
|
41
|
+
*
|
|
42
|
+
* ### Unsafe Operations (blocked by safety checks)
|
|
43
|
+
* Pushing WHERE clauses **into existing subqueries** that have:
|
|
44
|
+
* - **Aggregates**: GROUP BY, HAVING, or aggregate functions in SELECT (would change aggregation)
|
|
45
|
+
* - **Ordering + Limits**: ORDER BY combined with LIMIT/OFFSET (would change result set)
|
|
46
|
+
* - **Functional Operations**: fnSelect, fnWhere, fnHaving (potential side effects)
|
|
47
|
+
*
|
|
48
|
+
* The optimizer tracks which clauses were actually optimized and only removes those from the
|
|
49
|
+
* main query. Subquery reuse is handled safely through immutable query copies.
|
|
50
|
+
*
|
|
51
|
+
* ## Example Optimizations
|
|
52
|
+
*
|
|
53
|
+
* ### Basic Query with Joins
|
|
54
|
+
* **Original Query:**
|
|
55
|
+
* ```javascript
|
|
56
|
+
* query
|
|
57
|
+
* .from({ users: usersCollection })
|
|
58
|
+
* .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
|
|
59
|
+
* .where(({users}) => eq(users.department_id, 1))
|
|
60
|
+
* .where(({posts}) => gt(posts.views, 100))
|
|
61
|
+
* .where(({users, posts}) => eq(users.id, posts.author_id))
|
|
62
|
+
* ```
|
|
63
|
+
*
|
|
64
|
+
* **Optimized Query:**
|
|
65
|
+
* ```javascript
|
|
66
|
+
* query
|
|
67
|
+
* .from({
|
|
68
|
+
* users: subquery
|
|
69
|
+
* .from({ users: usersCollection })
|
|
70
|
+
* .where(({users}) => eq(users.department_id, 1))
|
|
71
|
+
* })
|
|
72
|
+
* .join({
|
|
73
|
+
* posts: subquery
|
|
74
|
+
* .from({ posts: postsCollection })
|
|
75
|
+
* .where(({posts}) => gt(posts.views, 100))
|
|
76
|
+
* }, ({users, posts}) => eq(users.id, posts.user_id))
|
|
77
|
+
* .where(({users, posts}) => eq(users.id, posts.author_id))
|
|
78
|
+
* ```
|
|
79
|
+
*
|
|
80
|
+
* ### Query with Aggregates (Now Optimizable!)
|
|
81
|
+
* **Original Query:**
|
|
82
|
+
* ```javascript
|
|
83
|
+
* query
|
|
84
|
+
* .from({ users: usersCollection })
|
|
85
|
+
* .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
|
|
86
|
+
* .where(({users}) => eq(users.department_id, 1))
|
|
87
|
+
* .groupBy(['users.department_id'])
|
|
88
|
+
* .select({ count: agg('count', '*') })
|
|
89
|
+
* ```
|
|
90
|
+
*
|
|
91
|
+
* **Optimized Query:**
|
|
92
|
+
* ```javascript
|
|
93
|
+
* query
|
|
94
|
+
* .from({
|
|
95
|
+
* users: subquery
|
|
96
|
+
* .from({ users: usersCollection })
|
|
97
|
+
* .where(({users}) => eq(users.department_id, 1))
|
|
98
|
+
* })
|
|
99
|
+
* .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
|
|
100
|
+
* .groupBy(['users.department_id'])
|
|
101
|
+
* .select({ count: agg('count', '*') })
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* ## Benefits
|
|
105
|
+
*
|
|
106
|
+
* - **Reduced Data Processing**: Filters applied before joins reduce intermediate result size
|
|
107
|
+
* - **Better Performance**: Smaller datasets lead to faster query execution
|
|
108
|
+
* - **Automatic Optimization**: No manual query rewriting required
|
|
109
|
+
* - **Preserves Semantics**: Optimized queries return identical results
|
|
110
|
+
* - **Safe by Design**: Comprehensive checks prevent semantic-breaking optimizations
|
|
111
|
+
*
|
|
112
|
+
* ## Integration
|
|
113
|
+
*
|
|
114
|
+
* The optimizer is automatically called during query compilation before the IR is
|
|
115
|
+
* transformed into a D2Mini pipeline.
|
|
116
|
+
*/
|
|
117
|
+
|
|
118
|
+
import { deepEquals } from "../utils.js"
|
|
119
|
+
import {
|
|
120
|
+
CollectionRef as CollectionRefClass,
|
|
121
|
+
Func,
|
|
122
|
+
QueryRef as QueryRefClass,
|
|
123
|
+
} from "./ir.js"
|
|
124
|
+
import type { BasicExpression, From, QueryIR } from "./ir.js"
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Represents a WHERE clause after source analysis
|
|
128
|
+
*/
|
|
129
|
+
export interface AnalyzedWhereClause {
|
|
130
|
+
/** The WHERE expression */
|
|
131
|
+
expression: BasicExpression<boolean>
|
|
132
|
+
/** Set of table/source aliases that this WHERE clause touches */
|
|
133
|
+
touchedSources: Set<string>
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Represents WHERE clauses grouped by the sources they touch
|
|
138
|
+
*/
|
|
139
|
+
export interface GroupedWhereClauses {
|
|
140
|
+
/** WHERE clauses that touch only a single source, grouped by source alias */
|
|
141
|
+
singleSource: Map<string, BasicExpression<boolean>>
|
|
142
|
+
/** WHERE clauses that touch multiple sources, combined into one expression */
|
|
143
|
+
multiSource?: BasicExpression<boolean>
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Main query optimizer entry point that lifts WHERE clauses into subqueries.
|
|
148
|
+
*
|
|
149
|
+
* This function implements multi-level predicate pushdown optimization by recursively
|
|
150
|
+
* moving WHERE clauses through nested subqueries to get them as close to the data
|
|
151
|
+
* sources as possible, then removing redundant subqueries.
|
|
152
|
+
*
|
|
153
|
+
* @param query - The QueryIR to optimize
|
|
154
|
+
* @returns A new QueryIR with optimizations applied (or original if no optimization possible)
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```typescript
|
|
158
|
+
* const originalQuery = {
|
|
159
|
+
* from: new CollectionRef(users, 'u'),
|
|
160
|
+
* join: [{ from: new CollectionRef(posts, 'p'), ... }],
|
|
161
|
+
* where: [eq(u.dept_id, 1), gt(p.views, 100)]
|
|
162
|
+
* }
|
|
163
|
+
*
|
|
164
|
+
* const optimized = optimizeQuery(originalQuery)
|
|
165
|
+
* // Result: Single-source clauses moved to deepest possible subqueries
|
|
166
|
+
* ```
|
|
167
|
+
*/
|
|
168
|
+
export function optimizeQuery(query: QueryIR): QueryIR {
|
|
169
|
+
// Apply multi-level predicate pushdown with iterative convergence
|
|
170
|
+
let optimized = query
|
|
171
|
+
let previousOptimized: QueryIR | undefined
|
|
172
|
+
let iterations = 0
|
|
173
|
+
const maxIterations = 10 // Prevent infinite loops
|
|
174
|
+
|
|
175
|
+
// Keep optimizing until no more changes occur or max iterations reached
|
|
176
|
+
while (
|
|
177
|
+
iterations < maxIterations &&
|
|
178
|
+
!deepEquals(optimized, previousOptimized)
|
|
179
|
+
) {
|
|
180
|
+
previousOptimized = optimized
|
|
181
|
+
optimized = applyRecursiveOptimization(optimized)
|
|
182
|
+
iterations++
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Remove redundant subqueries
|
|
186
|
+
const cleaned = removeRedundantSubqueries(optimized)
|
|
187
|
+
|
|
188
|
+
return cleaned
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Applies recursive predicate pushdown optimization.
|
|
193
|
+
*
|
|
194
|
+
* @param query - The QueryIR to optimize
|
|
195
|
+
* @returns A new QueryIR with optimizations applied
|
|
196
|
+
*/
|
|
197
|
+
function applyRecursiveOptimization(query: QueryIR): QueryIR {
|
|
198
|
+
// First, recursively optimize any existing subqueries
|
|
199
|
+
const subqueriesOptimized = {
|
|
200
|
+
...query,
|
|
201
|
+
from:
|
|
202
|
+
query.from.type === `queryRef`
|
|
203
|
+
? new QueryRefClass(
|
|
204
|
+
applyRecursiveOptimization(query.from.query),
|
|
205
|
+
query.from.alias
|
|
206
|
+
)
|
|
207
|
+
: query.from,
|
|
208
|
+
join: query.join?.map((joinClause) => ({
|
|
209
|
+
...joinClause,
|
|
210
|
+
from:
|
|
211
|
+
joinClause.from.type === `queryRef`
|
|
212
|
+
? new QueryRefClass(
|
|
213
|
+
applyRecursiveOptimization(joinClause.from.query),
|
|
214
|
+
joinClause.from.alias
|
|
215
|
+
)
|
|
216
|
+
: joinClause.from,
|
|
217
|
+
})),
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Then apply single-level optimization to this query
|
|
221
|
+
return applySingleLevelOptimization(subqueriesOptimized)
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Applies single-level predicate pushdown optimization (existing logic)
|
|
226
|
+
*/
|
|
227
|
+
function applySingleLevelOptimization(query: QueryIR): QueryIR {
|
|
228
|
+
// Skip optimization if no WHERE clauses exist
|
|
229
|
+
if (!query.where || query.where.length === 0) {
|
|
230
|
+
return query
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Skip optimization if there are no joins - predicate pushdown only benefits joins
|
|
234
|
+
// Single-table queries don't benefit from this optimization
|
|
235
|
+
if (!query.join || query.join.length === 0) {
|
|
236
|
+
return query
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Step 1: Split all AND clauses at the root level for granular optimization
|
|
240
|
+
const splitWhereClauses = splitAndClauses(query.where)
|
|
241
|
+
|
|
242
|
+
// Step 2: Analyze each WHERE clause to determine which sources it touches
|
|
243
|
+
const analyzedClauses = splitWhereClauses.map((clause) =>
|
|
244
|
+
analyzeWhereClause(clause)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
// Step 3: Group clauses by single-source vs multi-source
|
|
248
|
+
const groupedClauses = groupWhereClauses(analyzedClauses)
|
|
249
|
+
|
|
250
|
+
// Step 4: Apply optimizations by lifting single-source clauses into subqueries
|
|
251
|
+
return applyOptimizations(query, groupedClauses)
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Removes redundant subqueries that don't add value.
|
|
256
|
+
* A subquery is redundant if it only wraps another query without adding
|
|
257
|
+
* WHERE, SELECT, GROUP BY, HAVING, ORDER BY, or LIMIT/OFFSET clauses.
|
|
258
|
+
*
|
|
259
|
+
* @param query - The QueryIR to process
|
|
260
|
+
* @returns A new QueryIR with redundant subqueries removed
|
|
261
|
+
*/
|
|
262
|
+
function removeRedundantSubqueries(query: QueryIR): QueryIR {
|
|
263
|
+
return {
|
|
264
|
+
...query,
|
|
265
|
+
from: removeRedundantFromClause(query.from),
|
|
266
|
+
join: query.join?.map((joinClause) => ({
|
|
267
|
+
...joinClause,
|
|
268
|
+
from: removeRedundantFromClause(joinClause.from),
|
|
269
|
+
})),
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Removes redundant subqueries from a FROM clause.
|
|
275
|
+
*
|
|
276
|
+
* @param from - The FROM clause to process
|
|
277
|
+
* @returns A FROM clause with redundant subqueries removed
|
|
278
|
+
*/
|
|
279
|
+
function removeRedundantFromClause(from: From): From {
|
|
280
|
+
if (from.type === `collectionRef`) {
|
|
281
|
+
return from
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const processedQuery = removeRedundantSubqueries(from.query)
|
|
285
|
+
|
|
286
|
+
// Check if this subquery is redundant
|
|
287
|
+
if (isRedundantSubquery(processedQuery)) {
|
|
288
|
+
// Return the inner query's FROM clause with this alias
|
|
289
|
+
const innerFrom = removeRedundantFromClause(processedQuery.from)
|
|
290
|
+
if (innerFrom.type === `collectionRef`) {
|
|
291
|
+
return new CollectionRefClass(innerFrom.collection, from.alias)
|
|
292
|
+
} else {
|
|
293
|
+
return new QueryRefClass(innerFrom.query, from.alias)
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return new QueryRefClass(processedQuery, from.alias)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Determines if a subquery is redundant (adds no value).
|
|
302
|
+
*
|
|
303
|
+
* @param query - The query to check
|
|
304
|
+
* @returns True if the query is redundant and can be removed
|
|
305
|
+
*/
|
|
306
|
+
function isRedundantSubquery(query: QueryIR): boolean {
|
|
307
|
+
return (
|
|
308
|
+
(!query.where || query.where.length === 0) &&
|
|
309
|
+
!query.select &&
|
|
310
|
+
(!query.groupBy || query.groupBy.length === 0) &&
|
|
311
|
+
(!query.having || query.having.length === 0) &&
|
|
312
|
+
(!query.orderBy || query.orderBy.length === 0) &&
|
|
313
|
+
(!query.join || query.join.length === 0) &&
|
|
314
|
+
query.limit === undefined &&
|
|
315
|
+
query.offset === undefined &&
|
|
316
|
+
!query.fnSelect &&
|
|
317
|
+
(!query.fnWhere || query.fnWhere.length === 0) &&
|
|
318
|
+
(!query.fnHaving || query.fnHaving.length === 0)
|
|
319
|
+
)
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Step 1: Split all AND clauses recursively into separate WHERE clauses.
|
|
324
|
+
*
|
|
325
|
+
* This enables more granular optimization by treating each condition independently.
|
|
326
|
+
* OR clauses are preserved as they cannot be split without changing query semantics.
|
|
327
|
+
*
|
|
328
|
+
* @param whereClauses - Array of WHERE expressions to split
|
|
329
|
+
* @returns Flattened array with AND clauses split into separate expressions
|
|
330
|
+
*
|
|
331
|
+
* @example
|
|
332
|
+
* ```typescript
|
|
333
|
+
* // Input: [and(eq(a, 1), gt(b, 2)), eq(c, 3)]
|
|
334
|
+
* // Output: [eq(a, 1), gt(b, 2), eq(c, 3)]
|
|
335
|
+
* ```
|
|
336
|
+
*/
|
|
337
|
+
function splitAndClauses(
|
|
338
|
+
whereClauses: Array<BasicExpression<boolean>>
|
|
339
|
+
): Array<BasicExpression<boolean>> {
|
|
340
|
+
const result: Array<BasicExpression<boolean>> = []
|
|
341
|
+
|
|
342
|
+
for (const clause of whereClauses) {
|
|
343
|
+
if (clause.type === `func` && clause.name === `and`) {
|
|
344
|
+
// Recursively split nested AND clauses to handle complex expressions
|
|
345
|
+
const splitArgs = splitAndClauses(
|
|
346
|
+
clause.args as Array<BasicExpression<boolean>>
|
|
347
|
+
)
|
|
348
|
+
result.push(...splitArgs)
|
|
349
|
+
} else {
|
|
350
|
+
// Preserve non-AND clauses as-is (including OR clauses)
|
|
351
|
+
result.push(clause)
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return result
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Step 2: Analyze which table sources a WHERE clause touches.
|
|
360
|
+
*
|
|
361
|
+
* This determines whether a clause can be pushed down to a specific table
|
|
362
|
+
* or must remain in the main query (for multi-source clauses like join conditions).
|
|
363
|
+
*
|
|
364
|
+
* @param clause - The WHERE expression to analyze
|
|
365
|
+
* @returns Analysis result with the expression and touched source aliases
|
|
366
|
+
*
|
|
367
|
+
* @example
|
|
368
|
+
* ```typescript
|
|
369
|
+
* // eq(users.department_id, 1) -> touches ['users']
|
|
370
|
+
* // eq(users.id, posts.user_id) -> touches ['users', 'posts']
|
|
371
|
+
* ```
|
|
372
|
+
*/
|
|
373
|
+
function analyzeWhereClause(
|
|
374
|
+
clause: BasicExpression<boolean>
|
|
375
|
+
): AnalyzedWhereClause {
|
|
376
|
+
const touchedSources = new Set<string>()
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Recursively collect all table aliases referenced in an expression
|
|
380
|
+
*/
|
|
381
|
+
function collectSources(expr: BasicExpression | any): void {
|
|
382
|
+
switch (expr.type) {
|
|
383
|
+
case `ref`:
|
|
384
|
+
// PropRef path has the table alias as the first element
|
|
385
|
+
if (expr.path && expr.path.length > 0) {
|
|
386
|
+
const firstElement = expr.path[0]
|
|
387
|
+
if (firstElement) {
|
|
388
|
+
touchedSources.add(firstElement)
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
break
|
|
392
|
+
case `func`:
|
|
393
|
+
// Recursively analyze function arguments (e.g., eq, gt, and, or)
|
|
394
|
+
if (expr.args) {
|
|
395
|
+
expr.args.forEach(collectSources)
|
|
396
|
+
}
|
|
397
|
+
break
|
|
398
|
+
case `val`:
|
|
399
|
+
// Values don't reference any sources
|
|
400
|
+
break
|
|
401
|
+
case `agg`:
|
|
402
|
+
// Aggregates can reference sources in their arguments
|
|
403
|
+
if (expr.args) {
|
|
404
|
+
expr.args.forEach(collectSources)
|
|
405
|
+
}
|
|
406
|
+
break
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
collectSources(clause)
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
expression: clause,
|
|
414
|
+
touchedSources,
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Step 3: Group WHERE clauses by the sources they touch.
|
|
420
|
+
*
|
|
421
|
+
* Single-source clauses can be pushed down to subqueries for optimization.
|
|
422
|
+
* Multi-source clauses must remain in the main query to preserve join semantics.
|
|
423
|
+
*
|
|
424
|
+
* @param analyzedClauses - Array of analyzed WHERE clauses
|
|
425
|
+
* @returns Grouped clauses ready for optimization
|
|
426
|
+
*/
|
|
427
|
+
function groupWhereClauses(
|
|
428
|
+
analyzedClauses: Array<AnalyzedWhereClause>
|
|
429
|
+
): GroupedWhereClauses {
|
|
430
|
+
const singleSource = new Map<string, Array<BasicExpression<boolean>>>()
|
|
431
|
+
const multiSource: Array<BasicExpression<boolean>> = []
|
|
432
|
+
|
|
433
|
+
// Categorize each clause based on how many sources it touches
|
|
434
|
+
for (const clause of analyzedClauses) {
|
|
435
|
+
if (clause.touchedSources.size === 1) {
|
|
436
|
+
// Single source clause - can be optimized
|
|
437
|
+
const source = Array.from(clause.touchedSources)[0]!
|
|
438
|
+
if (!singleSource.has(source)) {
|
|
439
|
+
singleSource.set(source, [])
|
|
440
|
+
}
|
|
441
|
+
singleSource.get(source)!.push(clause.expression)
|
|
442
|
+
} else if (clause.touchedSources.size > 1) {
|
|
443
|
+
// Multi-source clause - must stay in main query
|
|
444
|
+
multiSource.push(clause.expression)
|
|
445
|
+
}
|
|
446
|
+
// Skip clauses that touch no sources (constants) - they don't need optimization
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Combine multiple clauses for each source with AND
|
|
450
|
+
const combinedSingleSource = new Map<string, BasicExpression<boolean>>()
|
|
451
|
+
for (const [source, clauses] of singleSource) {
|
|
452
|
+
combinedSingleSource.set(source, combineWithAnd(clauses))
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Combine multi-source clauses with AND
|
|
456
|
+
const combinedMultiSource =
|
|
457
|
+
multiSource.length > 0 ? combineWithAnd(multiSource) : undefined
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
singleSource: combinedSingleSource,
|
|
461
|
+
multiSource: combinedMultiSource,
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Step 4: Apply optimizations by lifting single-source clauses into subqueries.
|
|
467
|
+
*
|
|
468
|
+
* Creates a new QueryIR with single-source WHERE clauses moved to subqueries
|
|
469
|
+
* that wrap the original table references. This ensures immutability and prevents
|
|
470
|
+
* infinite recursion issues.
|
|
471
|
+
*
|
|
472
|
+
* @param query - Original QueryIR to optimize
|
|
473
|
+
* @param groupedClauses - WHERE clauses grouped by optimization strategy
|
|
474
|
+
* @returns New QueryIR with optimizations applied
|
|
475
|
+
*/
|
|
476
|
+
function applyOptimizations(
|
|
477
|
+
query: QueryIR,
|
|
478
|
+
groupedClauses: GroupedWhereClauses
|
|
479
|
+
): QueryIR {
|
|
480
|
+
// Track which single-source clauses were actually optimized
|
|
481
|
+
const actuallyOptimized = new Set<string>()
|
|
482
|
+
|
|
483
|
+
// Optimize the main FROM clause and track what was optimized
|
|
484
|
+
const optimizedFrom = optimizeFromWithTracking(
|
|
485
|
+
query.from,
|
|
486
|
+
groupedClauses.singleSource,
|
|
487
|
+
actuallyOptimized
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
// Optimize JOIN clauses and track what was optimized
|
|
491
|
+
const optimizedJoins = query.join
|
|
492
|
+
? query.join.map((joinClause) => ({
|
|
493
|
+
...joinClause,
|
|
494
|
+
from: optimizeFromWithTracking(
|
|
495
|
+
joinClause.from,
|
|
496
|
+
groupedClauses.singleSource,
|
|
497
|
+
actuallyOptimized
|
|
498
|
+
),
|
|
499
|
+
}))
|
|
500
|
+
: undefined
|
|
501
|
+
|
|
502
|
+
// Build the remaining WHERE clauses: multi-source + any single-source that weren't optimized
|
|
503
|
+
const remainingWhereClauses: Array<BasicExpression<boolean>> = []
|
|
504
|
+
|
|
505
|
+
// Add multi-source clauses
|
|
506
|
+
if (groupedClauses.multiSource) {
|
|
507
|
+
remainingWhereClauses.push(groupedClauses.multiSource)
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Add single-source clauses that weren't actually optimized
|
|
511
|
+
for (const [source, clause] of groupedClauses.singleSource) {
|
|
512
|
+
if (!actuallyOptimized.has(source)) {
|
|
513
|
+
remainingWhereClauses.push(clause)
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Create a completely new query object to ensure immutability
|
|
518
|
+
const optimizedQuery: QueryIR = {
|
|
519
|
+
// Copy all non-optimized fields as-is
|
|
520
|
+
select: query.select,
|
|
521
|
+
groupBy: query.groupBy ? [...query.groupBy] : undefined,
|
|
522
|
+
having: query.having ? [...query.having] : undefined,
|
|
523
|
+
orderBy: query.orderBy ? [...query.orderBy] : undefined,
|
|
524
|
+
limit: query.limit,
|
|
525
|
+
offset: query.offset,
|
|
526
|
+
fnSelect: query.fnSelect,
|
|
527
|
+
fnWhere: query.fnWhere ? [...query.fnWhere] : undefined,
|
|
528
|
+
fnHaving: query.fnHaving ? [...query.fnHaving] : undefined,
|
|
529
|
+
|
|
530
|
+
// Use the optimized FROM and JOIN clauses
|
|
531
|
+
from: optimizedFrom,
|
|
532
|
+
join: optimizedJoins,
|
|
533
|
+
|
|
534
|
+
// Only include WHERE clauses that weren't successfully optimized
|
|
535
|
+
where: remainingWhereClauses.length > 0 ? remainingWhereClauses : [],
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
return optimizedQuery
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Helper function to create a deep copy of a QueryIR object for immutability.
|
|
543
|
+
*
|
|
544
|
+
* This ensures that all optimizations create new objects rather than modifying
|
|
545
|
+
* existing ones, preventing infinite recursion and shared reference issues.
|
|
546
|
+
*
|
|
547
|
+
* @param query - QueryIR to deep copy
|
|
548
|
+
* @returns New QueryIR object with all nested objects copied
|
|
549
|
+
*/
|
|
550
|
+
function deepCopyQuery(query: QueryIR): QueryIR {
|
|
551
|
+
return {
|
|
552
|
+
// Recursively copy the FROM clause
|
|
553
|
+
from:
|
|
554
|
+
query.from.type === `collectionRef`
|
|
555
|
+
? new CollectionRefClass(query.from.collection, query.from.alias)
|
|
556
|
+
: new QueryRefClass(deepCopyQuery(query.from.query), query.from.alias),
|
|
557
|
+
|
|
558
|
+
// Copy all other fields, creating new arrays where necessary
|
|
559
|
+
select: query.select,
|
|
560
|
+
join: query.join
|
|
561
|
+
? query.join.map((joinClause) => ({
|
|
562
|
+
type: joinClause.type,
|
|
563
|
+
left: joinClause.left,
|
|
564
|
+
right: joinClause.right,
|
|
565
|
+
from:
|
|
566
|
+
joinClause.from.type === `collectionRef`
|
|
567
|
+
? new CollectionRefClass(
|
|
568
|
+
joinClause.from.collection,
|
|
569
|
+
joinClause.from.alias
|
|
570
|
+
)
|
|
571
|
+
: new QueryRefClass(
|
|
572
|
+
deepCopyQuery(joinClause.from.query),
|
|
573
|
+
joinClause.from.alias
|
|
574
|
+
),
|
|
575
|
+
}))
|
|
576
|
+
: undefined,
|
|
577
|
+
where: query.where ? [...query.where] : undefined,
|
|
578
|
+
groupBy: query.groupBy ? [...query.groupBy] : undefined,
|
|
579
|
+
having: query.having ? [...query.having] : undefined,
|
|
580
|
+
orderBy: query.orderBy ? [...query.orderBy] : undefined,
|
|
581
|
+
limit: query.limit,
|
|
582
|
+
offset: query.offset,
|
|
583
|
+
fnSelect: query.fnSelect,
|
|
584
|
+
fnWhere: query.fnWhere ? [...query.fnWhere] : undefined,
|
|
585
|
+
fnHaving: query.fnHaving ? [...query.fnHaving] : undefined,
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/**
|
|
590
|
+
* Helper function to optimize a FROM clause while tracking what was actually optimized.
|
|
591
|
+
*
|
|
592
|
+
* @param from - FROM clause to optimize
|
|
593
|
+
* @param singleSourceClauses - Map of source aliases to their WHERE clauses
|
|
594
|
+
* @param actuallyOptimized - Set to track which sources were actually optimized
|
|
595
|
+
* @returns New FROM clause, potentially wrapped in a subquery
|
|
596
|
+
*/
|
|
597
|
+
function optimizeFromWithTracking(
|
|
598
|
+
from: From,
|
|
599
|
+
singleSourceClauses: Map<string, BasicExpression<boolean>>,
|
|
600
|
+
actuallyOptimized: Set<string>
|
|
601
|
+
): From {
|
|
602
|
+
const whereClause = singleSourceClauses.get(from.alias)
|
|
603
|
+
|
|
604
|
+
if (!whereClause) {
|
|
605
|
+
// No optimization needed, but return a copy to maintain immutability
|
|
606
|
+
if (from.type === `collectionRef`) {
|
|
607
|
+
return new CollectionRefClass(from.collection, from.alias)
|
|
608
|
+
}
|
|
609
|
+
// Must be queryRef due to type system
|
|
610
|
+
return new QueryRefClass(deepCopyQuery(from.query), from.alias)
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
if (from.type === `collectionRef`) {
|
|
614
|
+
// Create a new subquery with the WHERE clause for the collection
|
|
615
|
+
// This is always safe since we're creating a new subquery
|
|
616
|
+
const subQuery: QueryIR = {
|
|
617
|
+
from: new CollectionRefClass(from.collection, from.alias),
|
|
618
|
+
where: [whereClause],
|
|
619
|
+
}
|
|
620
|
+
actuallyOptimized.add(from.alias) // Mark as successfully optimized
|
|
621
|
+
return new QueryRefClass(subQuery, from.alias)
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Must be queryRef due to type system
|
|
625
|
+
|
|
626
|
+
// SAFETY CHECK: Only check safety when pushing WHERE clauses into existing subqueries
|
|
627
|
+
// We need to be careful about pushing WHERE clauses into subqueries that already have
|
|
628
|
+
// aggregates, HAVING, or ORDER BY + LIMIT since that could change their semantics
|
|
629
|
+
if (!isSafeToPushIntoExistingSubquery(from.query)) {
|
|
630
|
+
// Return a copy without optimization to maintain immutability
|
|
631
|
+
// Do NOT mark as optimized since we didn't actually optimize it
|
|
632
|
+
return new QueryRefClass(deepCopyQuery(from.query), from.alias)
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// Add the WHERE clause to the existing subquery
|
|
636
|
+
// Create a deep copy to ensure immutability
|
|
637
|
+
const existingWhere = from.query.where || []
|
|
638
|
+
const optimizedSubQuery: QueryIR = {
|
|
639
|
+
...deepCopyQuery(from.query),
|
|
640
|
+
where: [...existingWhere, whereClause],
|
|
641
|
+
}
|
|
642
|
+
actuallyOptimized.add(from.alias) // Mark as successfully optimized
|
|
643
|
+
return new QueryRefClass(optimizedSubQuery, from.alias)
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Determines if it's safe to push WHERE clauses into an existing subquery.
|
|
648
|
+
*
|
|
649
|
+
* Pushing WHERE clauses into existing subqueries can break semantics in several cases:
|
|
650
|
+
*
|
|
651
|
+
* 1. **Aggregates**: Pushing predicates before GROUP BY changes what gets aggregated
|
|
652
|
+
* 2. **ORDER BY + LIMIT/OFFSET**: Pushing predicates before sorting+limiting changes the result set
|
|
653
|
+
* 3. **HAVING clauses**: These operate on aggregated data, predicates should not be pushed past them
|
|
654
|
+
* 4. **Functional operations**: fnSelect, fnWhere, fnHaving could have side effects
|
|
655
|
+
*
|
|
656
|
+
* Note: This safety check only applies when pushing WHERE clauses into existing subqueries.
|
|
657
|
+
* Creating new subqueries from collection references is always safe.
|
|
658
|
+
*
|
|
659
|
+
* @param query - The existing subquery to check for safety
|
|
660
|
+
* @returns True if it's safe to push WHERE clauses into this subquery, false otherwise
|
|
661
|
+
*
|
|
662
|
+
* @example
|
|
663
|
+
* ```typescript
|
|
664
|
+
* // UNSAFE: has GROUP BY - pushing WHERE could change aggregation
|
|
665
|
+
* { from: users, groupBy: [dept], select: { count: agg('count', '*') } }
|
|
666
|
+
*
|
|
667
|
+
* // UNSAFE: has ORDER BY + LIMIT - pushing WHERE could change "top 10"
|
|
668
|
+
* { from: users, orderBy: [salary desc], limit: 10 }
|
|
669
|
+
*
|
|
670
|
+
* // SAFE: plain SELECT without aggregates/limits
|
|
671
|
+
* { from: users, select: { id, name } }
|
|
672
|
+
* ```
|
|
673
|
+
*/
|
|
674
|
+
function isSafeToPushIntoExistingSubquery(query: QueryIR): boolean {
|
|
675
|
+
// Check for aggregates in SELECT clause
|
|
676
|
+
if (query.select) {
|
|
677
|
+
const hasAggregates = Object.values(query.select).some(
|
|
678
|
+
(expr) => expr.type === `agg`
|
|
679
|
+
)
|
|
680
|
+
if (hasAggregates) {
|
|
681
|
+
return false
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Check for GROUP BY clause
|
|
686
|
+
if (query.groupBy && query.groupBy.length > 0) {
|
|
687
|
+
return false
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// Check for HAVING clause
|
|
691
|
+
if (query.having && query.having.length > 0) {
|
|
692
|
+
return false
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Check for ORDER BY with LIMIT or OFFSET (dangerous combination)
|
|
696
|
+
if (query.orderBy && query.orderBy.length > 0) {
|
|
697
|
+
if (query.limit !== undefined || query.offset !== undefined) {
|
|
698
|
+
return false
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Check for functional variants that might have side effects
|
|
703
|
+
if (
|
|
704
|
+
query.fnSelect ||
|
|
705
|
+
(query.fnWhere && query.fnWhere.length > 0) ||
|
|
706
|
+
(query.fnHaving && query.fnHaving.length > 0)
|
|
707
|
+
) {
|
|
708
|
+
return false
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// If none of the unsafe conditions are present, it's safe to optimize
|
|
712
|
+
return true
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Helper function to combine multiple expressions with AND.
|
|
717
|
+
*
|
|
718
|
+
* If there's only one expression, it's returned as-is.
|
|
719
|
+
* If there are multiple expressions, they're combined with an AND function.
|
|
720
|
+
*
|
|
721
|
+
* @param expressions - Array of expressions to combine
|
|
722
|
+
* @returns Single expression representing the AND combination
|
|
723
|
+
* @throws Error if the expressions array is empty
|
|
724
|
+
*/
|
|
725
|
+
function combineWithAnd(
|
|
726
|
+
expressions: Array<BasicExpression<boolean>>
|
|
727
|
+
): BasicExpression<boolean> {
|
|
728
|
+
if (expressions.length === 0) {
|
|
729
|
+
throw new Error(`Cannot combine empty expression list`)
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
if (expressions.length === 1) {
|
|
733
|
+
return expressions[0]!
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// Create an AND function with all expressions as arguments
|
|
737
|
+
return new Func(`and`, expressions)
|
|
738
|
+
}
|