@tanstack/db 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/cjs/collection.cjs +14 -6
  2. package/dist/cjs/collection.cjs.map +1 -1
  3. package/dist/cjs/collection.d.cts +10 -9
  4. package/dist/cjs/local-storage.cjs +1 -1
  5. package/dist/cjs/local-storage.cjs.map +1 -1
  6. package/dist/cjs/proxy.cjs +21 -0
  7. package/dist/cjs/proxy.cjs.map +1 -1
  8. package/dist/cjs/query/builder/index.cjs +72 -0
  9. package/dist/cjs/query/builder/index.cjs.map +1 -1
  10. package/dist/cjs/query/builder/index.d.cts +64 -0
  11. package/dist/cjs/query/compiler/index.cjs +44 -8
  12. package/dist/cjs/query/compiler/index.cjs.map +1 -1
  13. package/dist/cjs/query/compiler/index.d.cts +4 -7
  14. package/dist/cjs/query/compiler/joins.cjs +14 -6
  15. package/dist/cjs/query/compiler/joins.cjs.map +1 -1
  16. package/dist/cjs/query/compiler/joins.d.cts +4 -8
  17. package/dist/cjs/query/compiler/types.d.cts +10 -0
  18. package/dist/cjs/query/optimizer.cjs +283 -0
  19. package/dist/cjs/query/optimizer.cjs.map +1 -0
  20. package/dist/cjs/query/optimizer.d.cts +42 -0
  21. package/dist/cjs/transactions.cjs.map +1 -1
  22. package/dist/cjs/transactions.d.cts +5 -5
  23. package/dist/cjs/types.d.cts +35 -10
  24. package/dist/cjs/utils.cjs +42 -0
  25. package/dist/cjs/utils.cjs.map +1 -0
  26. package/dist/cjs/utils.d.cts +18 -0
  27. package/dist/esm/collection.d.ts +10 -9
  28. package/dist/esm/collection.js +14 -6
  29. package/dist/esm/collection.js.map +1 -1
  30. package/dist/esm/local-storage.js +1 -1
  31. package/dist/esm/local-storage.js.map +1 -1
  32. package/dist/esm/proxy.js +21 -0
  33. package/dist/esm/proxy.js.map +1 -1
  34. package/dist/esm/query/builder/index.d.ts +64 -0
  35. package/dist/esm/query/builder/index.js +72 -0
  36. package/dist/esm/query/builder/index.js.map +1 -1
  37. package/dist/esm/query/compiler/index.d.ts +4 -7
  38. package/dist/esm/query/compiler/index.js +44 -8
  39. package/dist/esm/query/compiler/index.js.map +1 -1
  40. package/dist/esm/query/compiler/joins.d.ts +4 -8
  41. package/dist/esm/query/compiler/joins.js +14 -6
  42. package/dist/esm/query/compiler/joins.js.map +1 -1
  43. package/dist/esm/query/compiler/types.d.ts +10 -0
  44. package/dist/esm/query/optimizer.d.ts +42 -0
  45. package/dist/esm/query/optimizer.js +283 -0
  46. package/dist/esm/query/optimizer.js.map +1 -0
  47. package/dist/esm/transactions.d.ts +5 -5
  48. package/dist/esm/transactions.js.map +1 -1
  49. package/dist/esm/types.d.ts +35 -10
  50. package/dist/esm/utils.d.ts +18 -0
  51. package/dist/esm/utils.js +42 -0
  52. package/dist/esm/utils.js.map +1 -0
  53. package/package.json +1 -1
  54. package/src/collection.ts +62 -21
  55. package/src/local-storage.ts +2 -2
  56. package/src/proxy.ts +24 -0
  57. package/src/query/builder/index.ts +104 -0
  58. package/src/query/compiler/index.ts +85 -18
  59. package/src/query/compiler/joins.ts +21 -13
  60. package/src/query/compiler/types.ts +12 -0
  61. package/src/query/optimizer.ts +738 -0
  62. package/src/transactions.ts +8 -12
  63. package/src/types.ts +69 -14
  64. package/src/utils.ts +86 -0
@@ -0,0 +1,738 @@
1
+ /**
2
+ * # Query Optimizer
3
+ *
4
+ * The query optimizer improves query performance by implementing predicate pushdown optimization.
5
+ * It rewrites the intermediate representation (IR) to push WHERE clauses as close to the data
6
+ * source as possible, reducing the amount of data processed during joins.
7
+ *
8
+ * ## How It Works
9
+ *
10
+ * The optimizer follows a 4-step process:
11
+ *
12
+ * ### 1. AND Clause Splitting
13
+ * Splits AND clauses at the root level into separate WHERE clauses for granular optimization.
14
+ * ```javascript
15
+ * // Before: WHERE and(eq(users.department_id, 1), gt(users.age, 25))
16
+ * // After: WHERE eq(users.department_id, 1) + WHERE gt(users.age, 25)
17
+ * ```
18
+ *
19
+ * ### 2. Source Analysis
20
+ * Analyzes each WHERE clause to determine which table sources it references:
21
+ * - Single-source clauses: Touch only one table (e.g., `users.department_id = 1`)
22
+ * - Multi-source clauses: Touch multiple tables (e.g., `users.id = posts.user_id`)
23
+ *
24
+ * ### 3. Clause Grouping
25
+ * Groups WHERE clauses by the sources they touch:
26
+ * - Single-source clauses are grouped by their respective table
27
+ * - Multi-source clauses are combined for the main query
28
+ *
29
+ * ### 4. Subquery Creation
30
+ * Lifts single-source WHERE clauses into subqueries that wrap the original table references.
31
+ *
32
+ * ## Safety & Edge Cases
33
+ *
34
+ * The optimizer includes targeted safety checks to prevent predicate pushdown when it could
35
+ * break query semantics:
36
+ *
37
+ * ### Always Safe Operations
38
+ * - **Creating new subqueries**: Wrapping collection references in subqueries with WHERE clauses
39
+ * - **Main query optimizations**: Moving single-source WHERE clauses from main query to subqueries
40
+ * - **Queries with aggregates/ORDER BY/HAVING**: Can still create new filtered subqueries
41
+ *
42
+ * ### Unsafe Operations (blocked by safety checks)
43
+ * Pushing WHERE clauses **into existing subqueries** that have:
44
+ * - **Aggregates**: GROUP BY, HAVING, or aggregate functions in SELECT (would change aggregation)
45
+ * - **Ordering + Limits**: ORDER BY combined with LIMIT/OFFSET (would change result set)
46
+ * - **Functional Operations**: fnSelect, fnWhere, fnHaving (potential side effects)
47
+ *
48
+ * The optimizer tracks which clauses were actually optimized and only removes those from the
49
+ * main query. Subquery reuse is handled safely through immutable query copies.
50
+ *
51
+ * ## Example Optimizations
52
+ *
53
+ * ### Basic Query with Joins
54
+ * **Original Query:**
55
+ * ```javascript
56
+ * query
57
+ * .from({ users: usersCollection })
58
+ * .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
59
+ * .where(({users}) => eq(users.department_id, 1))
60
+ * .where(({posts}) => gt(posts.views, 100))
61
+ * .where(({users, posts}) => eq(users.id, posts.author_id))
62
+ * ```
63
+ *
64
+ * **Optimized Query:**
65
+ * ```javascript
66
+ * query
67
+ * .from({
68
+ * users: subquery
69
+ * .from({ users: usersCollection })
70
+ * .where(({users}) => eq(users.department_id, 1))
71
+ * })
72
+ * .join({
73
+ * posts: subquery
74
+ * .from({ posts: postsCollection })
75
+ * .where(({posts}) => gt(posts.views, 100))
76
+ * }, ({users, posts}) => eq(users.id, posts.user_id))
77
+ * .where(({users, posts}) => eq(users.id, posts.author_id))
78
+ * ```
79
+ *
80
+ * ### Query with Aggregates (Now Optimizable!)
81
+ * **Original Query:**
82
+ * ```javascript
83
+ * query
84
+ * .from({ users: usersCollection })
85
+ * .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
86
+ * .where(({users}) => eq(users.department_id, 1))
87
+ * .groupBy(['users.department_id'])
88
+ * .select({ count: agg('count', '*') })
89
+ * ```
90
+ *
91
+ * **Optimized Query:**
92
+ * ```javascript
93
+ * query
94
+ * .from({
95
+ * users: subquery
96
+ * .from({ users: usersCollection })
97
+ * .where(({users}) => eq(users.department_id, 1))
98
+ * })
99
+ * .join({ posts: postsCollection }, ({users, posts}) => eq(users.id, posts.user_id))
100
+ * .groupBy(['users.department_id'])
101
+ * .select({ count: agg('count', '*') })
102
+ * ```
103
+ *
104
+ * ## Benefits
105
+ *
106
+ * - **Reduced Data Processing**: Filters applied before joins reduce intermediate result size
107
+ * - **Better Performance**: Smaller datasets lead to faster query execution
108
+ * - **Automatic Optimization**: No manual query rewriting required
109
+ * - **Preserves Semantics**: Optimized queries return identical results
110
+ * - **Safe by Design**: Comprehensive checks prevent semantic-breaking optimizations
111
+ *
112
+ * ## Integration
113
+ *
114
+ * The optimizer is automatically called during query compilation before the IR is
115
+ * transformed into a D2Mini pipeline.
116
+ */
117
+
118
+ import { deepEquals } from "../utils.js"
119
+ import {
120
+ CollectionRef as CollectionRefClass,
121
+ Func,
122
+ QueryRef as QueryRefClass,
123
+ } from "./ir.js"
124
+ import type { BasicExpression, From, QueryIR } from "./ir.js"
125
+
126
+ /**
127
+ * Represents a WHERE clause after source analysis
128
+ */
129
+ export interface AnalyzedWhereClause {
130
+ /** The WHERE expression */
131
+ expression: BasicExpression<boolean>
132
+ /** Set of table/source aliases that this WHERE clause touches */
133
+ touchedSources: Set<string>
134
+ }
135
+
136
+ /**
137
+ * Represents WHERE clauses grouped by the sources they touch
138
+ */
139
+ export interface GroupedWhereClauses {
140
+ /** WHERE clauses that touch only a single source, grouped by source alias */
141
+ singleSource: Map<string, BasicExpression<boolean>>
142
+ /** WHERE clauses that touch multiple sources, combined into one expression */
143
+ multiSource?: BasicExpression<boolean>
144
+ }
145
+
146
+ /**
147
+ * Main query optimizer entry point that lifts WHERE clauses into subqueries.
148
+ *
149
+ * This function implements multi-level predicate pushdown optimization by recursively
150
+ * moving WHERE clauses through nested subqueries to get them as close to the data
151
+ * sources as possible, then removing redundant subqueries.
152
+ *
153
+ * @param query - The QueryIR to optimize
154
+ * @returns A new QueryIR with optimizations applied (or original if no optimization possible)
155
+ *
156
+ * @example
157
+ * ```typescript
158
+ * const originalQuery = {
159
+ * from: new CollectionRef(users, 'u'),
160
+ * join: [{ from: new CollectionRef(posts, 'p'), ... }],
161
+ * where: [eq(u.dept_id, 1), gt(p.views, 100)]
162
+ * }
163
+ *
164
+ * const optimized = optimizeQuery(originalQuery)
165
+ * // Result: Single-source clauses moved to deepest possible subqueries
166
+ * ```
167
+ */
168
+ export function optimizeQuery(query: QueryIR): QueryIR {
169
+ // Apply multi-level predicate pushdown with iterative convergence
170
+ let optimized = query
171
+ let previousOptimized: QueryIR | undefined
172
+ let iterations = 0
173
+ const maxIterations = 10 // Prevent infinite loops
174
+
175
+ // Keep optimizing until no more changes occur or max iterations reached
176
+ while (
177
+ iterations < maxIterations &&
178
+ !deepEquals(optimized, previousOptimized)
179
+ ) {
180
+ previousOptimized = optimized
181
+ optimized = applyRecursiveOptimization(optimized)
182
+ iterations++
183
+ }
184
+
185
+ // Remove redundant subqueries
186
+ const cleaned = removeRedundantSubqueries(optimized)
187
+
188
+ return cleaned
189
+ }
190
+
191
+ /**
192
+ * Applies recursive predicate pushdown optimization.
193
+ *
194
+ * @param query - The QueryIR to optimize
195
+ * @returns A new QueryIR with optimizations applied
196
+ */
197
+ function applyRecursiveOptimization(query: QueryIR): QueryIR {
198
+ // First, recursively optimize any existing subqueries
199
+ const subqueriesOptimized = {
200
+ ...query,
201
+ from:
202
+ query.from.type === `queryRef`
203
+ ? new QueryRefClass(
204
+ applyRecursiveOptimization(query.from.query),
205
+ query.from.alias
206
+ )
207
+ : query.from,
208
+ join: query.join?.map((joinClause) => ({
209
+ ...joinClause,
210
+ from:
211
+ joinClause.from.type === `queryRef`
212
+ ? new QueryRefClass(
213
+ applyRecursiveOptimization(joinClause.from.query),
214
+ joinClause.from.alias
215
+ )
216
+ : joinClause.from,
217
+ })),
218
+ }
219
+
220
+ // Then apply single-level optimization to this query
221
+ return applySingleLevelOptimization(subqueriesOptimized)
222
+ }
223
+
224
+ /**
225
+ * Applies single-level predicate pushdown optimization (existing logic)
226
+ */
227
+ function applySingleLevelOptimization(query: QueryIR): QueryIR {
228
+ // Skip optimization if no WHERE clauses exist
229
+ if (!query.where || query.where.length === 0) {
230
+ return query
231
+ }
232
+
233
+ // Skip optimization if there are no joins - predicate pushdown only benefits joins
234
+ // Single-table queries don't benefit from this optimization
235
+ if (!query.join || query.join.length === 0) {
236
+ return query
237
+ }
238
+
239
+ // Step 1: Split all AND clauses at the root level for granular optimization
240
+ const splitWhereClauses = splitAndClauses(query.where)
241
+
242
+ // Step 2: Analyze each WHERE clause to determine which sources it touches
243
+ const analyzedClauses = splitWhereClauses.map((clause) =>
244
+ analyzeWhereClause(clause)
245
+ )
246
+
247
+ // Step 3: Group clauses by single-source vs multi-source
248
+ const groupedClauses = groupWhereClauses(analyzedClauses)
249
+
250
+ // Step 4: Apply optimizations by lifting single-source clauses into subqueries
251
+ return applyOptimizations(query, groupedClauses)
252
+ }
253
+
254
+ /**
255
+ * Removes redundant subqueries that don't add value.
256
+ * A subquery is redundant if it only wraps another query without adding
257
+ * WHERE, SELECT, GROUP BY, HAVING, ORDER BY, or LIMIT/OFFSET clauses.
258
+ *
259
+ * @param query - The QueryIR to process
260
+ * @returns A new QueryIR with redundant subqueries removed
261
+ */
262
+ function removeRedundantSubqueries(query: QueryIR): QueryIR {
263
+ return {
264
+ ...query,
265
+ from: removeRedundantFromClause(query.from),
266
+ join: query.join?.map((joinClause) => ({
267
+ ...joinClause,
268
+ from: removeRedundantFromClause(joinClause.from),
269
+ })),
270
+ }
271
+ }
272
+
273
+ /**
274
+ * Removes redundant subqueries from a FROM clause.
275
+ *
276
+ * @param from - The FROM clause to process
277
+ * @returns A FROM clause with redundant subqueries removed
278
+ */
279
+ function removeRedundantFromClause(from: From): From {
280
+ if (from.type === `collectionRef`) {
281
+ return from
282
+ }
283
+
284
+ const processedQuery = removeRedundantSubqueries(from.query)
285
+
286
+ // Check if this subquery is redundant
287
+ if (isRedundantSubquery(processedQuery)) {
288
+ // Return the inner query's FROM clause with this alias
289
+ const innerFrom = removeRedundantFromClause(processedQuery.from)
290
+ if (innerFrom.type === `collectionRef`) {
291
+ return new CollectionRefClass(innerFrom.collection, from.alias)
292
+ } else {
293
+ return new QueryRefClass(innerFrom.query, from.alias)
294
+ }
295
+ }
296
+
297
+ return new QueryRefClass(processedQuery, from.alias)
298
+ }
299
+
300
+ /**
301
+ * Determines if a subquery is redundant (adds no value).
302
+ *
303
+ * @param query - The query to check
304
+ * @returns True if the query is redundant and can be removed
305
+ */
306
+ function isRedundantSubquery(query: QueryIR): boolean {
307
+ return (
308
+ (!query.where || query.where.length === 0) &&
309
+ !query.select &&
310
+ (!query.groupBy || query.groupBy.length === 0) &&
311
+ (!query.having || query.having.length === 0) &&
312
+ (!query.orderBy || query.orderBy.length === 0) &&
313
+ (!query.join || query.join.length === 0) &&
314
+ query.limit === undefined &&
315
+ query.offset === undefined &&
316
+ !query.fnSelect &&
317
+ (!query.fnWhere || query.fnWhere.length === 0) &&
318
+ (!query.fnHaving || query.fnHaving.length === 0)
319
+ )
320
+ }
321
+
322
+ /**
323
+ * Step 1: Split all AND clauses recursively into separate WHERE clauses.
324
+ *
325
+ * This enables more granular optimization by treating each condition independently.
326
+ * OR clauses are preserved as they cannot be split without changing query semantics.
327
+ *
328
+ * @param whereClauses - Array of WHERE expressions to split
329
+ * @returns Flattened array with AND clauses split into separate expressions
330
+ *
331
+ * @example
332
+ * ```typescript
333
+ * // Input: [and(eq(a, 1), gt(b, 2)), eq(c, 3)]
334
+ * // Output: [eq(a, 1), gt(b, 2), eq(c, 3)]
335
+ * ```
336
+ */
337
+ function splitAndClauses(
338
+ whereClauses: Array<BasicExpression<boolean>>
339
+ ): Array<BasicExpression<boolean>> {
340
+ const result: Array<BasicExpression<boolean>> = []
341
+
342
+ for (const clause of whereClauses) {
343
+ if (clause.type === `func` && clause.name === `and`) {
344
+ // Recursively split nested AND clauses to handle complex expressions
345
+ const splitArgs = splitAndClauses(
346
+ clause.args as Array<BasicExpression<boolean>>
347
+ )
348
+ result.push(...splitArgs)
349
+ } else {
350
+ // Preserve non-AND clauses as-is (including OR clauses)
351
+ result.push(clause)
352
+ }
353
+ }
354
+
355
+ return result
356
+ }
357
+
358
+ /**
359
+ * Step 2: Analyze which table sources a WHERE clause touches.
360
+ *
361
+ * This determines whether a clause can be pushed down to a specific table
362
+ * or must remain in the main query (for multi-source clauses like join conditions).
363
+ *
364
+ * @param clause - The WHERE expression to analyze
365
+ * @returns Analysis result with the expression and touched source aliases
366
+ *
367
+ * @example
368
+ * ```typescript
369
+ * // eq(users.department_id, 1) -> touches ['users']
370
+ * // eq(users.id, posts.user_id) -> touches ['users', 'posts']
371
+ * ```
372
+ */
373
+ function analyzeWhereClause(
374
+ clause: BasicExpression<boolean>
375
+ ): AnalyzedWhereClause {
376
+ const touchedSources = new Set<string>()
377
+
378
+ /**
379
+ * Recursively collect all table aliases referenced in an expression
380
+ */
381
+ function collectSources(expr: BasicExpression | any): void {
382
+ switch (expr.type) {
383
+ case `ref`:
384
+ // PropRef path has the table alias as the first element
385
+ if (expr.path && expr.path.length > 0) {
386
+ const firstElement = expr.path[0]
387
+ if (firstElement) {
388
+ touchedSources.add(firstElement)
389
+ }
390
+ }
391
+ break
392
+ case `func`:
393
+ // Recursively analyze function arguments (e.g., eq, gt, and, or)
394
+ if (expr.args) {
395
+ expr.args.forEach(collectSources)
396
+ }
397
+ break
398
+ case `val`:
399
+ // Values don't reference any sources
400
+ break
401
+ case `agg`:
402
+ // Aggregates can reference sources in their arguments
403
+ if (expr.args) {
404
+ expr.args.forEach(collectSources)
405
+ }
406
+ break
407
+ }
408
+ }
409
+
410
+ collectSources(clause)
411
+
412
+ return {
413
+ expression: clause,
414
+ touchedSources,
415
+ }
416
+ }
417
+
418
+ /**
419
+ * Step 3: Group WHERE clauses by the sources they touch.
420
+ *
421
+ * Single-source clauses can be pushed down to subqueries for optimization.
422
+ * Multi-source clauses must remain in the main query to preserve join semantics.
423
+ *
424
+ * @param analyzedClauses - Array of analyzed WHERE clauses
425
+ * @returns Grouped clauses ready for optimization
426
+ */
427
+ function groupWhereClauses(
428
+ analyzedClauses: Array<AnalyzedWhereClause>
429
+ ): GroupedWhereClauses {
430
+ const singleSource = new Map<string, Array<BasicExpression<boolean>>>()
431
+ const multiSource: Array<BasicExpression<boolean>> = []
432
+
433
+ // Categorize each clause based on how many sources it touches
434
+ for (const clause of analyzedClauses) {
435
+ if (clause.touchedSources.size === 1) {
436
+ // Single source clause - can be optimized
437
+ const source = Array.from(clause.touchedSources)[0]!
438
+ if (!singleSource.has(source)) {
439
+ singleSource.set(source, [])
440
+ }
441
+ singleSource.get(source)!.push(clause.expression)
442
+ } else if (clause.touchedSources.size > 1) {
443
+ // Multi-source clause - must stay in main query
444
+ multiSource.push(clause.expression)
445
+ }
446
+ // Skip clauses that touch no sources (constants) - they don't need optimization
447
+ }
448
+
449
+ // Combine multiple clauses for each source with AND
450
+ const combinedSingleSource = new Map<string, BasicExpression<boolean>>()
451
+ for (const [source, clauses] of singleSource) {
452
+ combinedSingleSource.set(source, combineWithAnd(clauses))
453
+ }
454
+
455
+ // Combine multi-source clauses with AND
456
+ const combinedMultiSource =
457
+ multiSource.length > 0 ? combineWithAnd(multiSource) : undefined
458
+
459
+ return {
460
+ singleSource: combinedSingleSource,
461
+ multiSource: combinedMultiSource,
462
+ }
463
+ }
464
+
465
+ /**
466
+ * Step 4: Apply optimizations by lifting single-source clauses into subqueries.
467
+ *
468
+ * Creates a new QueryIR with single-source WHERE clauses moved to subqueries
469
+ * that wrap the original table references. This ensures immutability and prevents
470
+ * infinite recursion issues.
471
+ *
472
+ * @param query - Original QueryIR to optimize
473
+ * @param groupedClauses - WHERE clauses grouped by optimization strategy
474
+ * @returns New QueryIR with optimizations applied
475
+ */
476
+ function applyOptimizations(
477
+ query: QueryIR,
478
+ groupedClauses: GroupedWhereClauses
479
+ ): QueryIR {
480
+ // Track which single-source clauses were actually optimized
481
+ const actuallyOptimized = new Set<string>()
482
+
483
+ // Optimize the main FROM clause and track what was optimized
484
+ const optimizedFrom = optimizeFromWithTracking(
485
+ query.from,
486
+ groupedClauses.singleSource,
487
+ actuallyOptimized
488
+ )
489
+
490
+ // Optimize JOIN clauses and track what was optimized
491
+ const optimizedJoins = query.join
492
+ ? query.join.map((joinClause) => ({
493
+ ...joinClause,
494
+ from: optimizeFromWithTracking(
495
+ joinClause.from,
496
+ groupedClauses.singleSource,
497
+ actuallyOptimized
498
+ ),
499
+ }))
500
+ : undefined
501
+
502
+ // Build the remaining WHERE clauses: multi-source + any single-source that weren't optimized
503
+ const remainingWhereClauses: Array<BasicExpression<boolean>> = []
504
+
505
+ // Add multi-source clauses
506
+ if (groupedClauses.multiSource) {
507
+ remainingWhereClauses.push(groupedClauses.multiSource)
508
+ }
509
+
510
+ // Add single-source clauses that weren't actually optimized
511
+ for (const [source, clause] of groupedClauses.singleSource) {
512
+ if (!actuallyOptimized.has(source)) {
513
+ remainingWhereClauses.push(clause)
514
+ }
515
+ }
516
+
517
+ // Create a completely new query object to ensure immutability
518
+ const optimizedQuery: QueryIR = {
519
+ // Copy all non-optimized fields as-is
520
+ select: query.select,
521
+ groupBy: query.groupBy ? [...query.groupBy] : undefined,
522
+ having: query.having ? [...query.having] : undefined,
523
+ orderBy: query.orderBy ? [...query.orderBy] : undefined,
524
+ limit: query.limit,
525
+ offset: query.offset,
526
+ fnSelect: query.fnSelect,
527
+ fnWhere: query.fnWhere ? [...query.fnWhere] : undefined,
528
+ fnHaving: query.fnHaving ? [...query.fnHaving] : undefined,
529
+
530
+ // Use the optimized FROM and JOIN clauses
531
+ from: optimizedFrom,
532
+ join: optimizedJoins,
533
+
534
+ // Only include WHERE clauses that weren't successfully optimized
535
+ where: remainingWhereClauses.length > 0 ? remainingWhereClauses : [],
536
+ }
537
+
538
+ return optimizedQuery
539
+ }
540
+
541
+ /**
542
+ * Helper function to create a deep copy of a QueryIR object for immutability.
543
+ *
544
+ * This ensures that all optimizations create new objects rather than modifying
545
+ * existing ones, preventing infinite recursion and shared reference issues.
546
+ *
547
+ * @param query - QueryIR to deep copy
548
+ * @returns New QueryIR object with all nested objects copied
549
+ */
550
+ function deepCopyQuery(query: QueryIR): QueryIR {
551
+ return {
552
+ // Recursively copy the FROM clause
553
+ from:
554
+ query.from.type === `collectionRef`
555
+ ? new CollectionRefClass(query.from.collection, query.from.alias)
556
+ : new QueryRefClass(deepCopyQuery(query.from.query), query.from.alias),
557
+
558
+ // Copy all other fields, creating new arrays where necessary
559
+ select: query.select,
560
+ join: query.join
561
+ ? query.join.map((joinClause) => ({
562
+ type: joinClause.type,
563
+ left: joinClause.left,
564
+ right: joinClause.right,
565
+ from:
566
+ joinClause.from.type === `collectionRef`
567
+ ? new CollectionRefClass(
568
+ joinClause.from.collection,
569
+ joinClause.from.alias
570
+ )
571
+ : new QueryRefClass(
572
+ deepCopyQuery(joinClause.from.query),
573
+ joinClause.from.alias
574
+ ),
575
+ }))
576
+ : undefined,
577
+ where: query.where ? [...query.where] : undefined,
578
+ groupBy: query.groupBy ? [...query.groupBy] : undefined,
579
+ having: query.having ? [...query.having] : undefined,
580
+ orderBy: query.orderBy ? [...query.orderBy] : undefined,
581
+ limit: query.limit,
582
+ offset: query.offset,
583
+ fnSelect: query.fnSelect,
584
+ fnWhere: query.fnWhere ? [...query.fnWhere] : undefined,
585
+ fnHaving: query.fnHaving ? [...query.fnHaving] : undefined,
586
+ }
587
+ }
588
+
589
+ /**
590
+ * Helper function to optimize a FROM clause while tracking what was actually optimized.
591
+ *
592
+ * @param from - FROM clause to optimize
593
+ * @param singleSourceClauses - Map of source aliases to their WHERE clauses
594
+ * @param actuallyOptimized - Set to track which sources were actually optimized
595
+ * @returns New FROM clause, potentially wrapped in a subquery
596
+ */
597
+ function optimizeFromWithTracking(
598
+ from: From,
599
+ singleSourceClauses: Map<string, BasicExpression<boolean>>,
600
+ actuallyOptimized: Set<string>
601
+ ): From {
602
+ const whereClause = singleSourceClauses.get(from.alias)
603
+
604
+ if (!whereClause) {
605
+ // No optimization needed, but return a copy to maintain immutability
606
+ if (from.type === `collectionRef`) {
607
+ return new CollectionRefClass(from.collection, from.alias)
608
+ }
609
+ // Must be queryRef due to type system
610
+ return new QueryRefClass(deepCopyQuery(from.query), from.alias)
611
+ }
612
+
613
+ if (from.type === `collectionRef`) {
614
+ // Create a new subquery with the WHERE clause for the collection
615
+ // This is always safe since we're creating a new subquery
616
+ const subQuery: QueryIR = {
617
+ from: new CollectionRefClass(from.collection, from.alias),
618
+ where: [whereClause],
619
+ }
620
+ actuallyOptimized.add(from.alias) // Mark as successfully optimized
621
+ return new QueryRefClass(subQuery, from.alias)
622
+ }
623
+
624
+ // Must be queryRef due to type system
625
+
626
+ // SAFETY CHECK: Only check safety when pushing WHERE clauses into existing subqueries
627
+ // We need to be careful about pushing WHERE clauses into subqueries that already have
628
+ // aggregates, HAVING, or ORDER BY + LIMIT since that could change their semantics
629
+ if (!isSafeToPushIntoExistingSubquery(from.query)) {
630
+ // Return a copy without optimization to maintain immutability
631
+ // Do NOT mark as optimized since we didn't actually optimize it
632
+ return new QueryRefClass(deepCopyQuery(from.query), from.alias)
633
+ }
634
+
635
+ // Add the WHERE clause to the existing subquery
636
+ // Create a deep copy to ensure immutability
637
+ const existingWhere = from.query.where || []
638
+ const optimizedSubQuery: QueryIR = {
639
+ ...deepCopyQuery(from.query),
640
+ where: [...existingWhere, whereClause],
641
+ }
642
+ actuallyOptimized.add(from.alias) // Mark as successfully optimized
643
+ return new QueryRefClass(optimizedSubQuery, from.alias)
644
+ }
645
+
646
+ /**
647
+ * Determines if it's safe to push WHERE clauses into an existing subquery.
648
+ *
649
+ * Pushing WHERE clauses into existing subqueries can break semantics in several cases:
650
+ *
651
+ * 1. **Aggregates**: Pushing predicates before GROUP BY changes what gets aggregated
652
+ * 2. **ORDER BY + LIMIT/OFFSET**: Pushing predicates before sorting+limiting changes the result set
653
+ * 3. **HAVING clauses**: These operate on aggregated data, predicates should not be pushed past them
654
+ * 4. **Functional operations**: fnSelect, fnWhere, fnHaving could have side effects
655
+ *
656
+ * Note: This safety check only applies when pushing WHERE clauses into existing subqueries.
657
+ * Creating new subqueries from collection references is always safe.
658
+ *
659
+ * @param query - The existing subquery to check for safety
660
+ * @returns True if it's safe to push WHERE clauses into this subquery, false otherwise
661
+ *
662
+ * @example
663
+ * ```typescript
664
+ * // UNSAFE: has GROUP BY - pushing WHERE could change aggregation
665
+ * { from: users, groupBy: [dept], select: { count: agg('count', '*') } }
666
+ *
667
+ * // UNSAFE: has ORDER BY + LIMIT - pushing WHERE could change "top 10"
668
+ * { from: users, orderBy: [salary desc], limit: 10 }
669
+ *
670
+ * // SAFE: plain SELECT without aggregates/limits
671
+ * { from: users, select: { id, name } }
672
+ * ```
673
+ */
674
+ function isSafeToPushIntoExistingSubquery(query: QueryIR): boolean {
675
+ // Check for aggregates in SELECT clause
676
+ if (query.select) {
677
+ const hasAggregates = Object.values(query.select).some(
678
+ (expr) => expr.type === `agg`
679
+ )
680
+ if (hasAggregates) {
681
+ return false
682
+ }
683
+ }
684
+
685
+ // Check for GROUP BY clause
686
+ if (query.groupBy && query.groupBy.length > 0) {
687
+ return false
688
+ }
689
+
690
+ // Check for HAVING clause
691
+ if (query.having && query.having.length > 0) {
692
+ return false
693
+ }
694
+
695
+ // Check for ORDER BY with LIMIT or OFFSET (dangerous combination)
696
+ if (query.orderBy && query.orderBy.length > 0) {
697
+ if (query.limit !== undefined || query.offset !== undefined) {
698
+ return false
699
+ }
700
+ }
701
+
702
+ // Check for functional variants that might have side effects
703
+ if (
704
+ query.fnSelect ||
705
+ (query.fnWhere && query.fnWhere.length > 0) ||
706
+ (query.fnHaving && query.fnHaving.length > 0)
707
+ ) {
708
+ return false
709
+ }
710
+
711
+ // If none of the unsafe conditions are present, it's safe to optimize
712
+ return true
713
+ }
714
+
715
+ /**
716
+ * Helper function to combine multiple expressions with AND.
717
+ *
718
+ * If there's only one expression, it's returned as-is.
719
+ * If there are multiple expressions, they're combined with an AND function.
720
+ *
721
+ * @param expressions - Array of expressions to combine
722
+ * @returns Single expression representing the AND combination
723
+ * @throws Error if the expressions array is empty
724
+ */
725
+ function combineWithAnd(
726
+ expressions: Array<BasicExpression<boolean>>
727
+ ): BasicExpression<boolean> {
728
+ if (expressions.length === 0) {
729
+ throw new Error(`Cannot combine empty expression list`)
730
+ }
731
+
732
+ if (expressions.length === 1) {
733
+ return expressions[0]!
734
+ }
735
+
736
+ // Create an AND function with all expressions as arguments
737
+ return new Func(`and`, expressions)
738
+ }