squirreling 0.7.10 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,27 @@
1
- import { missingClauseError } from '../parseErrors.js'
2
- import { unsupportedOperationError } from '../executionErrors.js'
3
- import { generatorSource, memorySource } from '../backend/dataSource.js'
1
+ import { memorySource } from '../backend/dataSource.js'
2
+ import { tableNotFoundError } from '../executionErrors.js'
3
+ import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
- import { containsAggregate, extractColumns } from './columns.js'
6
- import { evaluateExpr } from './expression.js'
7
- import { evaluateHavingExpr } from './having.js'
8
- import { executeJoins } from './join.js'
9
- import { resolveTableSource } from './tableSource.js'
10
- import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
5
+ import { planSql } from '../plan/plan.js'
6
+ import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
7
+ import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
8
+ import { executeSort } from './sort.js'
9
+ import { defaultDerivedAlias, stableRowKey } from './utils.js'
11
10
 
12
11
  /**
13
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, ExprNode, OrderByItem, QueryHints, SelectColumn, SelectStatement, SqlPrimitive, UserDefinedFunction, WithClause } from '../types.js'
12
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, SelectStatement } from '../types.js'
13
+ * @import { DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode } from '../plan/types.js'
14
14
  */
15
15
 
16
16
  /**
17
- * Executes a SQL SELECT query against named data sources
17
+ * Executes a SQL SELECT query against tables
18
18
  *
19
- * @param {ExecuteSqlOptions} options - the execution options
20
- * @yields {AsyncRow} async generator yielding result rows
19
+ * @param {ExecuteSqlOptions} options
20
+ * @yields {AsyncRow}
21
21
  */
22
22
  export async function* executeSql({ tables, query, functions, signal }) {
23
23
  const select = typeof query === 'string' ? parseSql({ query, functions }) : query
24
24
 
25
- // Check for unsupported operations
26
- if (!select.from) {
27
- throw missingClauseError({
28
- missing: 'FROM clause',
29
- context: 'SELECT statement',
30
- })
31
- }
32
-
33
25
  // Normalize tables: convert arrays to AsyncDataSource
34
26
  /** @type {Record<string, AsyncDataSource>} */
35
27
  const normalizedTables = {}
@@ -41,7 +33,7 @@ export async function* executeSql({ tables, query, functions, signal }) {
41
33
  }
42
34
  }
43
35
 
44
- yield* executeSelect({ select, tables: normalizedTables, withClause: select.with, functions, signal })
36
+ yield* executeSelect({ select, context: { tables: normalizedTables, functions, signal } })
45
37
  }
46
38
 
47
39
  /**
@@ -49,261 +41,166 @@ export async function* executeSql({ tables, query, functions, signal }) {
49
41
  *
50
42
  * @param {Object} options
51
43
  * @param {SelectStatement} options.select
52
- * @param {Record<string, AsyncDataSource>} options.tables
53
- * @param {WithClause} [options.withClause] - WITH clause containing CTE definitions
54
- * @param {Record<string, UserDefinedFunction>} [options.functions]
55
- * @param {AbortSignal} [options.signal]
44
+ * @param {ExecuteContext} options.context
56
45
  * @yields {AsyncRow}
57
46
  */
58
- export async function* executeSelect({ select, tables, withClause, functions, signal }) {
59
- /** @type {AsyncDataSource} */
60
- let dataSource
61
- /** @type {string} */
62
- let leftTable
63
-
64
- if (select.from.kind === 'table') {
65
- const tableName = select.from.table
66
- leftTable = select.from.alias ?? tableName
67
- dataSource = resolveTableSource(tableName, tables, withClause, executeSelect, functions, signal)
68
- } else {
69
- // Nested subquery - recursively resolve
70
- leftTable = select.from.alias
71
- dataSource = generatorSource(executeSelect({
72
- select: select.from.query,
73
- tables,
74
- withClause,
75
- functions,
76
- signal,
77
- }))
78
- }
79
-
80
- // Execute JOINs if present
81
- if (select.joins.length) {
82
- dataSource = await executeJoins({
83
- leftSource: dataSource,
84
- joins: select.joins,
85
- leftTable,
86
- tables,
87
- withClause,
88
- functions,
89
- executeSelectFn: executeSelect,
90
- signal,
91
- })
92
- }
93
-
94
- yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
47
+ export async function* executeSelect({ select, context }) {
48
+ const plan = planSql({ query: select, functions: context.functions })
49
+ yield* executePlan({ plan, context })
95
50
  }
96
51
 
97
52
  /**
98
- * Creates a stable string key for a row to enable deduplication
53
+ * Executes a query plan and yields result rows
99
54
  *
100
- * @param {AsyncCells} cells
101
- * @returns {Promise<string>} a stable string representation of the row
55
+ * @param {Object} options
56
+ * @param {QueryPlan} options.plan - the query plan to execute
57
+ * @param {ExecuteContext} options.context - execution context
58
+ * @returns {AsyncGenerator<AsyncRow>}
102
59
  */
103
- async function stableRowKey(cells) {
104
- const keys = Object.keys(cells).sort()
105
- /** @type {string[]} */
106
- const parts = []
107
- for (const k of keys) {
108
- const v = await cells[k]()
109
- parts.push(k + ':' + stringify(v))
60
+ export async function* executePlan({ plan, context }) {
61
+ if (plan.type === 'Scan') {
62
+ yield* executeScan(plan, context)
63
+ } else if (plan.type === 'Filter') {
64
+ yield* executeFilter(plan, context)
65
+ } else if (plan.type === 'Project') {
66
+ yield* executeProject(plan, context)
67
+ } else if (plan.type === 'HashJoin') {
68
+ yield* executeHashJoin(plan, context)
69
+ } else if (plan.type === 'NestedLoopJoin') {
70
+ yield* executeNestedLoopJoin(plan, context)
71
+ } else if (plan.type === 'PositionalJoin') {
72
+ yield* executePositionalJoin(plan, context)
73
+ } else if (plan.type === 'HashAggregate') {
74
+ yield* executeHashAggregate(plan, context)
75
+ } else if (plan.type === 'ScalarAggregate') {
76
+ yield* executeScalarAggregate(plan, context)
77
+ } else if (plan.type === 'Sort') {
78
+ yield* executeSort(plan, context)
79
+ } else if (plan.type === 'Distinct') {
80
+ yield* executeDistinct(plan, context)
81
+ } else if (plan.type === 'Limit') {
82
+ yield* executeLimit(plan, context)
110
83
  }
111
- return parts.join('|')
112
84
  }
113
85
 
114
86
  /**
115
- * Applies DISTINCT filtering to remove duplicate rows
87
+ * Executes a table scan
116
88
  *
117
- * @param {AsyncRow[]} rows - the input rows
118
- * @param {boolean} distinct - whether to apply deduplication
119
- * @returns {Promise<AsyncRow[]>} the deduplicated rows
89
+ * @param {ScanNode} plan
90
+ * @param {ExecuteContext} context
91
+ * @yields {AsyncRow}
120
92
  */
121
- async function applyDistinct(rows, distinct) {
122
- if (!distinct) return rows
123
- /** @type {Set<string>} */
124
- const seen = new Set()
125
- /** @type {AsyncRow[]} */
126
- const result = []
127
- for (const row of rows) {
128
- const key = await stableRowKey(row.cells)
129
- if (seen.has(key)) continue
130
- seen.add(key)
131
- result.push(row)
93
+ async function* executeScan(plan, context) {
94
+ const { tables, signal } = context
95
+ const dataSource = tables[plan.table]
96
+ if (dataSource === undefined) {
97
+ throw tableNotFoundError({ tableName: plan.table })
132
98
  }
133
- return result
134
- }
135
-
136
- /**
137
- * Applies ORDER BY sorting to rows using multi-pass lazy evaluation.
138
- * Secondary ORDER BY columns are only evaluated for rows that tie on
139
- * previous columns, reducing expensive cell evaluations.
140
- *
141
- * @param {Object} options
142
- * @param {AsyncRow[]} options.rows - the input rows
143
- * @param {OrderByItem[]} options.orderBy - the sort specifications
144
- * @param {Record<string, AsyncDataSource>} options.tables
145
- * @param {Record<string, UserDefinedFunction>} [options.functions]
146
- * @param {Map<string, ExprNode>} [options.aliases] - SELECT column aliases for ORDER BY resolution
147
- * @returns {Promise<AsyncRow[]>} the sorted rows
148
- */
149
- async function sortRows({ rows, orderBy, tables, functions, aliases }) {
150
- if (!orderBy.length) return rows
151
99
 
152
- // Cache for evaluated values: evaluatedValues[rowIdx][colIdx]
153
- /** @type {(SqlPrimitive | undefined)[][]} */
154
- const evaluatedValues = rows.map(() => Array(orderBy.length))
100
+ const scanResult = dataSource.scan({ ...plan.hints, signal })
101
+ if (!scanResult.rows) {
102
+ throw new Error(`Data source "${plan.table}" scan() must return a ScanResults object with { rows, appliedWhere, appliedLimitOffset }`)
103
+ }
104
+ const { rows, appliedWhere, appliedLimitOffset } = scanResult
155
105
 
156
- // Start with all indices in one group
157
- /** @type {number[][]} */
158
- let groups = [rows.map((_, i) => i)]
106
+ // Applied limit/offset without applied where is invalid
107
+ const hasLimitOffset = plan.hints?.limit !== undefined || plan.hints?.offset // 0 offset is noop
108
+ if (!appliedWhere && appliedLimitOffset && plan.hints?.where && hasLimitOffset) {
109
+ throw new Error(`Data source "${plan.table}" applied limit/offset without applying where`)
110
+ }
159
111
 
160
- // Process each ORDER BY column incrementally
161
- for (let orderByIdx = 0; orderByIdx < orderBy.length; orderByIdx++) {
162
- const term = orderBy[orderByIdx]
163
- /** @type {number[][]} */
164
- const nextGroups = []
112
+ let result = rows
165
113
 
166
- for (const group of groups) {
167
- // Single-element groups don't need sorting or evaluation
168
- if (group.length <= 1) {
169
- nextGroups.push(group)
170
- continue
171
- }
114
+ // Apply WHERE if data source did not
115
+ if (!appliedWhere && plan.hints?.where) {
116
+ result = filterRows(result, plan.hints.where, context)
117
+ }
172
118
 
173
- // Evaluate this column for all rows in the group
174
- for (const idx of group) {
175
- if (evaluatedValues[idx][orderByIdx] === undefined) {
176
- evaluatedValues[idx][orderByIdx] = await evaluateExpr({
177
- node: term.expr,
178
- row: rows[idx],
179
- tables,
180
- functions,
181
- aliases,
182
- })
183
- }
184
- }
119
+ // Apply LIMIT/OFFSET if data source did not
120
+ if (!appliedLimitOffset && hasLimitOffset) {
121
+ result = limitRows(result, plan.hints.limit, plan.hints.offset, signal)
122
+ }
185
123
 
186
- // Sort the group by this column
187
- group.sort((aIdx, bIdx) => {
188
- const av = evaluatedValues[aIdx][orderByIdx]
189
- const bv = evaluatedValues[bIdx][orderByIdx]
190
- return compareForTerm(av, bv, term)
191
- })
124
+ yield* result
125
+ }
192
126
 
193
- // Split into sub-groups based on ties (for next column)
194
- if (orderByIdx < orderBy.length - 1) {
195
- /** @type {number[]} */
196
- let currentSubGroup = [group[0]]
197
- for (let i = 1; i < group.length; i++) {
198
- const prevIdx = group[i - 1]
199
- const currIdx = group[i]
200
- const prevVal = evaluatedValues[prevIdx][orderByIdx]
201
- const currVal = evaluatedValues[currIdx][orderByIdx]
127
+ /**
128
+ * Filters rows by a condition
129
+ *
130
+ * @param {AsyncIterable<AsyncRow>} rows
131
+ * @param {ExprNode} condition
132
+ * @param {ExecuteContext} context
133
+ * @yields {AsyncRow}
134
+ */
135
+ async function* filterRows(rows, condition, context) {
136
+ let rowIndex = 0
137
+ for await (const row of rows) {
138
+ if (context.signal?.aborted) return
139
+ rowIndex++
140
+ const pass = await evaluateExpr({ node: condition, row, rowIndex, context })
141
+ if (pass) yield row
142
+ }
143
+ }
202
144
 
203
- if (compareForTerm(prevVal, currVal, term) === 0) {
204
- // Same value, extend current sub-group
205
- currentSubGroup.push(currIdx)
206
- } else {
207
- // Different value, start new sub-group
208
- nextGroups.push(currentSubGroup)
209
- currentSubGroup = [currIdx]
210
- }
211
- }
212
- nextGroups.push(currentSubGroup)
213
- } else {
214
- // Last column, no need to split
215
- nextGroups.push(group)
216
- }
145
+ /**
146
+ * Skips the first `offset` rows, then yields at most `limit` rows
147
+ *
148
+ * @param {AsyncIterable<AsyncRow>} rows
149
+ * @param {number} [limit]
150
+ * @param {number} [offset]
151
+ * @param {AbortSignal} [signal]
152
+ * @yields {AsyncRow}
153
+ */
154
+ async function* limitRows(rows, limit, offset, signal) {
155
+ const skip = offset ?? 0
156
+ const max = limit ?? Infinity
157
+ if (max <= 0) return
158
+ let skipped = 0
159
+ let yielded = 0
160
+ for await (const row of rows) {
161
+ if (signal?.aborted) return
162
+ if (skipped < skip) {
163
+ skipped++
164
+ continue
217
165
  }
218
-
219
- groups = nextGroups
166
+ yield row
167
+ yielded++
168
+ if (yielded >= max) return
220
169
  }
221
-
222
- // Flatten groups to get final sorted indices
223
- return groups.flat().map(i => rows[i])
224
170
  }
225
171
 
226
172
  /**
227
- * Evaluates a select with a resolved FROM data source
173
+ * Executes a filter operation (WHERE clause)
228
174
  *
229
- * @param {Object} options
230
- * @param {SelectStatement} options.select
231
- * @param {AsyncDataSource} options.dataSource
232
- * @param {Record<string, AsyncDataSource>} options.tables
233
- * @param {Record<string, UserDefinedFunction>} [options.functions]
234
- * @param {AbortSignal} [options.signal]
175
+ * @param {FilterNode} plan
176
+ * @param {ExecuteContext} context
235
177
  * @yields {AsyncRow}
236
178
  */
237
- async function* evaluateSelectAst({ select, dataSource, tables, functions, signal }) {
238
- // SQL priority: from, where, group by, having, select, order by, offset, limit
239
-
240
- const hasAggregate = select.columns.some(col => col.kind === 'derived' && containsAggregate(col.expr))
241
- const useGrouping = hasAggregate || select.groupBy.length > 0
242
- const needsBuffering = useGrouping || select.orderBy.length > 0
243
-
244
- if (needsBuffering) {
245
- // BUFFERING PATH: Collect all rows, process, then yield
246
- yield* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal })
247
- } else {
248
- // STREAMING PATH: Yield rows one by one
249
- yield* evaluateStreaming({ select, dataSource, tables, functions, signal })
250
- }
179
+ async function* executeFilter(plan, context) {
180
+ yield* filterRows(executePlan({ plan: plan.child, context }), plan.condition, context)
251
181
  }
252
182
 
253
183
  /**
254
- * Streaming evaluation for simple queries (no ORDER BY or GROUP BY)
255
- * Supports DISTINCT by tracking seen row keys without buffering full rows
184
+ * Executes a projection operation (SELECT columns)
256
185
  *
257
- * @param {Object} options
258
- * @param {SelectStatement} options.select
259
- * @param {AsyncDataSource} options.dataSource
260
- * @param {Record<string, AsyncDataSource>} options.tables
261
- * @param {Record<string, UserDefinedFunction>} [options.functions]
262
- * @param {AbortSignal} [options.signal]
186
+ * @param {ProjectNode} plan
187
+ * @param {ExecuteContext} context
263
188
  * @yields {AsyncRow}
264
189
  */
265
- async function* evaluateStreaming({ select, dataSource, tables, functions, signal }) {
266
- let rowsYielded = 0
267
- let rowsSkipped = 0
190
+ async function* executeProject(plan, context) {
268
191
  let rowIndex = 0
269
- const offset = select.offset ?? 0
270
- const limit = select.limit ?? Infinity
271
- if (limit <= 0) return
272
192
 
273
- // For DISTINCT, track seen row keys
274
- /** @type {Set<string> | undefined} */
275
- const seen = select.distinct ? new Set() : undefined
276
-
277
- // hints for data source optimization
278
- /** @type {QueryHints} */
279
- const hints = {
280
- columns: extractColumns(select),
281
- where: select.where,
282
- limit: select.limit,
283
- offset: select.offset,
284
- }
285
-
286
- for await (const row of dataSource.scan({ hints, signal })) {
193
+ for await (const row of executePlan({ plan: plan.child, context })) {
194
+ if (context.signal?.aborted) return
287
195
  rowIndex++
288
- // WHERE filter
289
- if (select.where) {
290
- const pass = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
291
- if (!pass) continue
292
- }
293
-
294
- // For non-DISTINCT queries, we can skip rows before projection (optimization)
295
- if (!seen && rowsSkipped < offset) {
296
- rowsSkipped++
297
- continue
298
- }
196
+ const currentRowIndex = rowIndex
299
197
 
300
- // SELECT projection
301
198
  /** @type {string[]} */
302
199
  const columns = []
303
200
  /** @type {AsyncCells} */
304
201
  const cells = {}
305
- const currentRowIndex = rowIndex
306
- for (const col of select.columns) {
202
+
203
+ for (const col of plan.columns) {
307
204
  if (col.kind === 'star') {
308
205
  for (const key of row.columns) {
309
206
  columns.push(key)
@@ -312,217 +209,50 @@ async function* evaluateStreaming({ select, dataSource, tables, functions, signa
312
209
  } else if (col.kind === 'derived') {
313
210
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
314
211
  columns.push(alias)
315
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions, rowIndex: currentRowIndex })
316
- }
317
- }
318
-
319
- // DISTINCT: skip duplicate rows
320
- if (seen) {
321
- const key = await stableRowKey(cells)
322
- if (seen.has(key)) continue
323
- seen.add(key)
324
- // OFFSET applies to distinct rows
325
- if (rowsSkipped < offset) {
326
- rowsSkipped++
327
- continue
212
+ cells[alias] = () => evaluateExpr({
213
+ node: col.expr,
214
+ row,
215
+ rowIndex: currentRowIndex,
216
+ context,
217
+ })
328
218
  }
329
219
  }
330
220
 
331
221
  yield { columns, cells }
332
- rowsYielded++
333
- if (rowsYielded >= limit) {
334
- break
335
- }
336
222
  }
337
223
  }
338
224
 
339
225
  /**
340
- * Buffered evaluation for complex queries (with ORDER BY or GROUP BY)
226
+ * Executes a distinct operation
341
227
  *
342
- * @param {Object} options
343
- * @param {SelectStatement} options.select
344
- * @param {AsyncDataSource} options.dataSource
345
- * @param {Record<string, AsyncDataSource>} options.tables
346
- * @param {Record<string, UserDefinedFunction>} [options.functions]
347
- * @param {boolean} options.hasAggregate
348
- * @param {boolean} options.useGrouping
349
- * @param {AbortSignal} [options.signal]
228
+ * @param {DistinctNode} plan
229
+ * @param {ExecuteContext} context
350
230
  * @yields {AsyncRow}
351
231
  */
352
- async function* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal }) {
353
- // Build hints for data source optimization
354
- // Note: limit/offset not passed here since buffering needs all rows for sorting/grouping
355
- /** @type {QueryHints} */
356
- const hints = {
357
- where: select.where,
358
- columns: extractColumns(select),
359
- }
360
-
361
- // Step 1: Collect all rows from data source
362
- /** @type {AsyncRow[]} */
363
- const working = []
364
- for await (const row of dataSource.scan({ hints, signal })) {
365
- working.push(row)
366
- }
367
-
368
- // Step 2: WHERE clause filtering
369
- /** @type {AsyncRow[]} */
370
- const filtered = []
371
-
372
- for (let i = 0; i < working.length; i++) {
373
- const row = working[i]
374
- const rowIndex = i + 1 // 1-based
375
- if (select.where) {
376
- const passes = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
377
-
378
- if (!passes) {
379
- continue
380
- }
381
- }
382
- filtered.push(row)
383
- }
384
-
385
- // Step 3: Projection (grouping vs non-grouping)
386
- /** @type {AsyncRow[]} */
387
- let projected = []
388
-
389
- if (useGrouping) {
390
- // Grouping due to GROUP BY or aggregate functions
391
- /** @type {AsyncRow[][]} */
392
- const groups = []
393
-
394
- if (select.groupBy.length) {
395
- /** @type {Map<string, AsyncRow[]>} */
396
- const map = new Map()
397
- for (const row of filtered) {
398
- /** @type {string[]} */
399
- const keyParts = []
400
- for (const expr of select.groupBy) {
401
- const v = await evaluateExpr({ node: expr, row, tables, functions })
402
- keyParts.push(stringify(v))
403
- }
404
- const key = keyParts.join('|')
405
- let group = map.get(key)
406
- if (!group) {
407
- group = []
408
- map.set(key, group)
409
- groups.push(group)
410
- }
411
- group.push(row)
412
- }
413
- } else {
414
- groups.push(filtered)
415
- }
416
-
417
- const hasStar = select.columns.some(col => col.kind === 'star')
418
- if (hasStar && hasAggregate) {
419
- throw unsupportedOperationError({
420
- operation: 'SELECT * with aggregate functions is not supported',
421
- hint: 'Replace * with specific column names when using aggregate functions.',
422
- })
423
- }
424
-
425
- for (const group of groups) {
426
- const columns = []
427
- /** @type {AsyncCells} */
428
- const cells = {}
429
- for (const col of select.columns) {
430
- if (col.kind === 'star') {
431
- const firstRow = group[0]
432
- if (firstRow) {
433
- for (const key of firstRow.columns) {
434
- columns.push(key)
435
- cells[key] = firstRow.cells[key]
436
- }
437
- }
438
- continue
439
- }
440
-
441
- if (col.kind === 'derived') {
442
- const alias = col.alias ?? defaultDerivedAlias(col.expr)
443
- columns.push(alias)
444
- // Pass group to evaluateExpr so it can handle aggregate functions within expressions
445
- // For empty groups, still provide an empty row context for aggregates to return appropriate values
446
- cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, functions, rows: group })
447
- continue
448
- }
449
- }
450
- const asyncRow = { columns, cells }
451
-
452
- // Apply HAVING filter before adding to projected results
453
- if (select.having) {
454
- if (!await evaluateHavingExpr({ expr: select.having, row: asyncRow, group, tables, functions })) {
455
- continue
456
- }
457
- }
458
-
459
- projected.push(asyncRow)
460
- }
461
- } else {
462
- // No grouping, simple projection
463
- // Sort before projection so ORDER BY can access columns not in SELECT
464
-
465
- // Pass aliases so ORDER BY can reference SELECT column aliases
466
- /** @type {Map<string, ExprNode>} */
467
- const aliases = new Map()
468
- for (const col of select.columns) {
469
- if (col.kind === 'derived' && col.alias) {
470
- aliases.set(col.alias, col.expr)
471
- }
472
- }
473
- const sorted = await sortRows({ rows: filtered, orderBy: select.orderBy, tables, functions, aliases })
474
-
475
- // OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
476
- // to avoid reading expensive cells for rows that won't be in the final result
477
- let rowsToProject = sorted
478
- if (!select.distinct) {
479
- const start = select.offset ?? 0
480
- const end = select.limit ? start + select.limit : sorted.length
481
- rowsToProject = sorted.slice(start, end)
482
- }
483
-
484
- for (const row of rowsToProject) {
485
- const columns = []
486
- /** @type {AsyncCells} */
487
- const cells = {}
488
- for (const col of select.columns) {
489
- if (col.kind === 'star') {
490
- for (const key of row.columns) {
491
- columns.push(key)
492
- cells[key] = row.cells[key]
493
- }
494
- } else if (col.kind === 'derived') {
495
- const alias = col.alias ?? defaultDerivedAlias(col.expr)
496
- columns.push(alias)
497
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions })
498
- }
499
- }
500
- projected.push({ columns, cells })
501
- }
502
- }
232
+ async function* executeDistinct(plan, context) {
233
+ const { signal } = context
503
234
 
504
- // Step 4: DISTINCT
505
- projected = await applyDistinct(projected, select.distinct)
506
-
507
- // Step 5: ORDER BY (final sort for grouped queries)
508
- if (useGrouping) {
509
- projected = await sortRows({ rows: projected, orderBy: select.orderBy, tables, functions })
510
- }
235
+ /** @type {Set<string>} */
236
+ const seen = new Set()
511
237
 
512
- // Step 6: OFFSET and LIMIT
513
- // For non-DISTINCT, non-grouping queries, OFFSET/LIMIT was already applied before projection
514
- if (select.distinct || useGrouping) {
515
- const start = select.offset ?? 0
516
- const end = select.limit ? start + select.limit : projected.length
238
+ for await (const row of executePlan({ plan: plan.child, context })) {
239
+ if (signal?.aborted) return
517
240
 
518
- // Step 7: Yield results
519
- for (let i = start; i < end && i < projected.length; i++) {
520
- yield projected[i]
521
- }
522
- } else {
523
- // Already limited, yield all projected rows
524
- for (const row of projected) {
241
+ const key = await stableRowKey(row.cells)
242
+ if (!seen.has(key)) {
243
+ seen.add(key)
525
244
  yield row
526
245
  }
527
246
  }
528
247
  }
248
+
249
+ /**
250
+ * Executes a limit operation (LIMIT/OFFSET)
251
+ *
252
+ * @param {LimitNode} plan
253
+ * @param {ExecuteContext} context
254
+ * @yields {AsyncRow}
255
+ */
256
+ async function* executeLimit(plan, context) {
257
+ yield* limitRows(executePlan({ plan: plan.child, context }), plan.limit, plan.offset, context.signal)
258
+ }