@tanstack/db 0.5.26 → 0.5.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -219,8 +219,11 @@ export function optimizeQuery(query: QueryIR): OptimizationResult {
219
219
 
220
220
  /**
221
221
  * Extracts collection-specific WHERE clauses from a query for index optimization.
222
- * This analyzes the original query to identify WHERE clauses that can be pushed down
223
- * to specific collections, but only for simple queries without joins.
222
+ * This analyzes the original query to identify single-source WHERE clauses that
223
+ * reference collection sources (not subqueries), including joined collections.
224
+ *
225
+ * For outer joins, clauses referencing the nullable side are excluded because
226
+ * using them to pre-filter collection data would change join semantics.
224
227
  *
225
228
  * @param query - The original QueryIR to analyze
226
229
  * @returns Map of source aliases to their WHERE clauses
@@ -246,10 +249,19 @@ function extractSourceWhereClauses(
246
249
  // Group clauses by single-source vs multi-source
247
250
  const groupedClauses = groupWhereClauses(analyzedClauses)
248
251
 
252
+ // Determine which source aliases are on the nullable side of outer joins.
253
+ // WHERE clauses for these sources must not be used for index optimization
254
+ // because they should filter the final joined result, not the input data.
255
+ const nullableSources = getNullableJoinSources(query)
256
+
249
257
  // Only include single-source clauses that reference collections directly
258
+ // and are not on the nullable side of an outer join
250
259
  for (const [sourceAlias, whereClause] of groupedClauses.singleSource) {
251
260
  // Check if this source alias corresponds to a collection reference
252
- if (isCollectionReference(query, sourceAlias)) {
261
+ if (
262
+ isCollectionReference(query, sourceAlias) &&
263
+ !nullableSources.has(sourceAlias)
264
+ ) {
253
265
  sourceWhereClauses.set(sourceAlias, whereClause)
254
266
  }
255
267
  }
@@ -283,6 +295,36 @@ function isCollectionReference(query: QueryIR, sourceAlias: string): boolean {
283
295
  return false
284
296
  }
285
297
 
298
+ /**
299
+ * Returns the set of source aliases that are on the nullable side of outer joins.
300
+ *
301
+ * For a LEFT join the joined (right) side is nullable.
302
+ * For a RIGHT join the main (left/from) side is nullable.
303
+ * For a FULL join both sides are nullable.
304
+ *
305
+ * WHERE clauses that reference only a nullable source must not be pushed down
306
+ * into that source's subquery or used for index optimization, because doing so
307
+ * changes the join semantics: rows that should be excluded by the WHERE become
308
+ * unmatched outer-join rows (with the nullable side set to undefined) and
309
+ * incorrectly survive residual filtering.
310
+ */
311
+ function getNullableJoinSources(query: QueryIR): Set<string> {
312
+ const nullable = new Set<string>()
313
+ if (query.join) {
314
+ const mainAlias = query.from.alias
315
+ for (const join of query.join) {
316
+ const joinedAlias = join.from.alias
317
+ if (join.type === `left` || join.type === `full`) {
318
+ nullable.add(joinedAlias)
319
+ }
320
+ if (join.type === `right` || join.type === `full`) {
321
+ nullable.add(mainAlias)
322
+ }
323
+ }
324
+ }
325
+ return nullable
326
+ }
327
+
286
328
  /**
287
329
  * Applies recursive predicate pushdown optimization.
288
330
  *
@@ -635,10 +677,25 @@ function applyOptimizations(
635
677
  // Track which single-source clauses were actually optimized
636
678
  const actuallyOptimized = new Set<string>()
637
679
 
680
+ // Determine which source aliases are on the nullable side of outer joins.
681
+ const nullableSources = getNullableJoinSources(query)
682
+
683
+ // Build a filtered copy of singleSource that excludes nullable-side clauses.
684
+ // Pushing a WHERE clause into the nullable side's subquery pre-filters the
685
+ // data before the join, converting "matched but WHERE-excluded" rows into
686
+ // "unmatched" outer-join rows. These are indistinguishable from genuinely
687
+ // unmatched rows, so the residual WHERE cannot correct the result.
688
+ const pushableSingleSource = new Map<string, BasicExpression<boolean>>()
689
+ for (const [source, clause] of groupedClauses.singleSource) {
690
+ if (!nullableSources.has(source)) {
691
+ pushableSingleSource.set(source, clause)
692
+ }
693
+ }
694
+
638
695
  // Optimize the main FROM clause and track what was optimized
639
696
  const optimizedFrom = optimizeFromWithTracking(
640
697
  query.from,
641
- groupedClauses.singleSource,
698
+ pushableSingleSource,
642
699
  actuallyOptimized,
643
700
  )
644
701
 
@@ -648,7 +705,7 @@ function applyOptimizations(
648
705
  ...joinClause,
649
706
  from: optimizeFromWithTracking(
650
707
  joinClause.from,
651
- groupedClauses.singleSource,
708
+ pushableSingleSource,
652
709
  actuallyOptimized,
653
710
  ),
654
711
  }))
@@ -663,12 +720,7 @@ function applyOptimizations(
663
720
  }
664
721
 
665
722
  // Determine if we need residual clauses (when query has outer JOINs)
666
- const hasOuterJoins =
667
- query.join &&
668
- query.join.some(
669
- (join) =>
670
- join.type === `left` || join.type === `right` || join.type === `full`,
671
- )
723
+ const hasOuterJoins = nullableSources.size > 0
672
724
 
673
725
  // Add single-source clauses
674
726
  for (const [source, clause] of groupedClauses.singleSource) {