squirreling 0.7.10 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/package.json +4 -4
- package/src/backend/dataSource.js +52 -47
- package/src/execute/aggregates.js +150 -0
- package/src/execute/columns.js +0 -39
- package/src/execute/execute.js +158 -415
- package/src/execute/join.js +179 -333
- package/src/execute/sort.js +99 -0
- package/src/execute/utils.js +18 -49
- package/src/executionErrors.js +10 -10
- package/src/expression/binary.js +51 -0
- package/src/{execute → expression}/date.js +18 -18
- package/src/{execute/expression.js → expression/evaluate.js} +56 -64
- package/src/{execute → expression}/math.js +46 -81
- package/src/{execute → expression}/regexp.js +7 -7
- package/src/{execute → expression}/strings.js +33 -45
- package/src/index.d.ts +2 -1
- package/src/parse/expression.js +42 -50
- package/src/parse/joins.js +7 -2
- package/src/parse/parse.js +14 -3
- package/src/parse/state.js +2 -1
- package/src/parse/types.d.ts +30 -0
- package/src/plan/plan.js +234 -0
- package/src/plan/types.d.ts +101 -0
- package/src/types.d.ts +19 -39
- package/src/validation.js +64 -1
- package/src/validationErrors.js +7 -7
- package/src/execute/having.js +0 -202
- package/src/execute/tableSource.js +0 -63
package/src/execute/execute.js
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { memorySource } from '../backend/dataSource.js'
|
|
2
|
+
import { tableNotFoundError } from '../executionErrors.js'
|
|
3
|
+
import { evaluateExpr } from '../expression/evaluate.js'
|
|
4
4
|
import { parseSql } from '../parse/parse.js'
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
5
|
+
import { missingClauseError } from '../parseErrors.js'
|
|
6
|
+
import { queryPlan } from '../plan/plan.js'
|
|
7
|
+
import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
|
|
8
|
+
import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
|
|
9
|
+
import { executeSort } from './sort.js'
|
|
10
|
+
import { defaultDerivedAlias, stableRowKey } from './utils.js'
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, ExprNode,
|
|
13
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, ExprNode, SelectStatement, UserDefinedFunction } from '../types.js'
|
|
14
|
+
* @import { DistinctNode, ExecuteContext, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode } from '../plan/types.js'
|
|
14
15
|
*/
|
|
15
16
|
|
|
16
17
|
/**
|
|
@@ -41,7 +42,7 @@ export async function* executeSql({ tables, query, functions, signal }) {
|
|
|
41
42
|
}
|
|
42
43
|
}
|
|
43
44
|
|
|
44
|
-
yield* executeSelect({ select, tables: normalizedTables,
|
|
45
|
+
yield* executeSelect({ select, tables: normalizedTables, functions, signal })
|
|
45
46
|
}
|
|
46
47
|
|
|
47
48
|
/**
|
|
@@ -50,260 +51,167 @@ export async function* executeSql({ tables, query, functions, signal }) {
|
|
|
50
51
|
* @param {Object} options
|
|
51
52
|
* @param {SelectStatement} options.select
|
|
52
53
|
* @param {Record<string, AsyncDataSource>} options.tables
|
|
53
|
-
* @param {WithClause} [options.withClause] - WITH clause containing CTE definitions
|
|
54
54
|
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
55
55
|
* @param {AbortSignal} [options.signal]
|
|
56
56
|
* @yields {AsyncRow}
|
|
57
57
|
*/
|
|
58
|
-
export async function* executeSelect({ select, tables,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
/** @type {string} */
|
|
62
|
-
let leftTable
|
|
63
|
-
|
|
64
|
-
if (select.from.kind === 'table') {
|
|
65
|
-
const tableName = select.from.table
|
|
66
|
-
leftTable = select.from.alias ?? tableName
|
|
67
|
-
dataSource = resolveTableSource(tableName, tables, withClause, executeSelect, functions, signal)
|
|
68
|
-
} else {
|
|
69
|
-
// Nested subquery - recursively resolve
|
|
70
|
-
leftTable = select.from.alias
|
|
71
|
-
dataSource = generatorSource(executeSelect({
|
|
72
|
-
select: select.from.query,
|
|
73
|
-
tables,
|
|
74
|
-
withClause,
|
|
75
|
-
functions,
|
|
76
|
-
signal,
|
|
77
|
-
}))
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Execute JOINs if present
|
|
81
|
-
if (select.joins.length) {
|
|
82
|
-
dataSource = await executeJoins({
|
|
83
|
-
leftSource: dataSource,
|
|
84
|
-
joins: select.joins,
|
|
85
|
-
leftTable,
|
|
86
|
-
tables,
|
|
87
|
-
withClause,
|
|
88
|
-
functions,
|
|
89
|
-
executeSelectFn: executeSelect,
|
|
90
|
-
signal,
|
|
91
|
-
})
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
|
|
58
|
+
export async function* executeSelect({ select, tables, functions, signal }) {
|
|
59
|
+
const plan = queryPlan(select)
|
|
60
|
+
yield* executePlan(plan, { tables, functions, signal })
|
|
95
61
|
}
|
|
96
62
|
|
|
97
63
|
/**
|
|
98
|
-
*
|
|
64
|
+
* Executes a query plan and yields result rows
|
|
99
65
|
*
|
|
100
|
-
* @param {
|
|
101
|
-
* @
|
|
66
|
+
* @param {QueryPlan} plan - the query plan to execute
|
|
67
|
+
* @param {ExecuteContext} context - execution context
|
|
68
|
+
* @returns {AsyncGenerator<AsyncRow>}
|
|
102
69
|
*/
|
|
103
|
-
async function
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
70
|
+
export async function* executePlan(plan, context) {
|
|
71
|
+
if (plan.type === 'Scan') {
|
|
72
|
+
yield* executeScan(plan, context)
|
|
73
|
+
} else if (plan.type === 'Filter') {
|
|
74
|
+
yield* executeFilter(plan, context)
|
|
75
|
+
} else if (plan.type === 'Project') {
|
|
76
|
+
yield* executeProject(plan, context)
|
|
77
|
+
} else if (plan.type === 'HashJoin') {
|
|
78
|
+
yield* executeHashJoin(plan, context)
|
|
79
|
+
} else if (plan.type === 'NestedLoopJoin') {
|
|
80
|
+
yield* executeNestedLoopJoin(plan, context)
|
|
81
|
+
} else if (plan.type === 'PositionalJoin') {
|
|
82
|
+
yield* executePositionalJoin(plan, context)
|
|
83
|
+
} else if (plan.type === 'HashAggregate') {
|
|
84
|
+
yield* executeHashAggregate(plan, context)
|
|
85
|
+
} else if (plan.type === 'ScalarAggregate') {
|
|
86
|
+
yield* executeScalarAggregate(plan, context)
|
|
87
|
+
} else if (plan.type === 'Sort') {
|
|
88
|
+
yield* executeSort(plan, context)
|
|
89
|
+
} else if (plan.type === 'Distinct') {
|
|
90
|
+
yield* executeDistinct(plan, context)
|
|
91
|
+
} else if (plan.type === 'Limit') {
|
|
92
|
+
yield* executeLimit(plan, context)
|
|
110
93
|
}
|
|
111
|
-
return parts.join('|')
|
|
112
94
|
}
|
|
113
95
|
|
|
114
96
|
/**
|
|
115
|
-
*
|
|
97
|
+
* Executes a table scan
|
|
116
98
|
*
|
|
117
|
-
* @param {
|
|
118
|
-
* @param {
|
|
119
|
-
* @
|
|
99
|
+
* @param {ScanNode} plan
|
|
100
|
+
* @param {ExecuteContext} context
|
|
101
|
+
* @yields {AsyncRow}
|
|
120
102
|
*/
|
|
121
|
-
async function
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const result = []
|
|
127
|
-
for (const row of rows) {
|
|
128
|
-
const key = await stableRowKey(row.cells)
|
|
129
|
-
if (seen.has(key)) continue
|
|
130
|
-
seen.add(key)
|
|
131
|
-
result.push(row)
|
|
103
|
+
async function* executeScan(plan, context) {
|
|
104
|
+
const { tables, signal } = context
|
|
105
|
+
const dataSource = tables[plan.table]
|
|
106
|
+
if (dataSource === undefined) {
|
|
107
|
+
throw tableNotFoundError({ tableName: plan.table })
|
|
132
108
|
}
|
|
133
|
-
return result
|
|
134
|
-
}
|
|
135
109
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
* @param {Object} options
|
|
142
|
-
* @param {AsyncRow[]} options.rows - the input rows
|
|
143
|
-
* @param {OrderByItem[]} options.orderBy - the sort specifications
|
|
144
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
145
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
146
|
-
* @param {Map<string, ExprNode>} [options.aliases] - SELECT column aliases for ORDER BY resolution
|
|
147
|
-
* @returns {Promise<AsyncRow[]>} the sorted rows
|
|
148
|
-
*/
|
|
149
|
-
async function sortRows({ rows, orderBy, tables, functions, aliases }) {
|
|
150
|
-
if (!orderBy.length) return rows
|
|
151
|
-
|
|
152
|
-
// Cache for evaluated values: evaluatedValues[rowIdx][colIdx]
|
|
153
|
-
/** @type {(SqlPrimitive | undefined)[][]} */
|
|
154
|
-
const evaluatedValues = rows.map(() => Array(orderBy.length))
|
|
110
|
+
const scanResult = dataSource.scan({ ...plan.hints, signal })
|
|
111
|
+
if (!scanResult.rows) {
|
|
112
|
+
throw new Error(`Data source "${plan.table}" scan() must return a ScanResults object with { rows, appliedWhere, appliedLimitOffset }`)
|
|
113
|
+
}
|
|
114
|
+
const { rows, appliedWhere, appliedLimitOffset } = scanResult
|
|
155
115
|
|
|
156
|
-
//
|
|
157
|
-
|
|
158
|
-
|
|
116
|
+
// Applied limit/offset without applied where is invalid
|
|
117
|
+
const hasLimitOffset = plan.hints?.limit !== undefined || plan.hints?.offset // 0 offset is noop
|
|
118
|
+
if (!appliedWhere && appliedLimitOffset && plan.hints?.where && hasLimitOffset) {
|
|
119
|
+
throw new Error(`Data source "${plan.table}" applied limit/offset without applying where`)
|
|
120
|
+
}
|
|
159
121
|
|
|
160
|
-
|
|
161
|
-
for (let orderByIdx = 0; orderByIdx < orderBy.length; orderByIdx++) {
|
|
162
|
-
const term = orderBy[orderByIdx]
|
|
163
|
-
/** @type {number[][]} */
|
|
164
|
-
const nextGroups = []
|
|
122
|
+
let result = rows
|
|
165
123
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
continue
|
|
171
|
-
}
|
|
124
|
+
// Apply WHERE if data source did not
|
|
125
|
+
if (!appliedWhere && plan.hints?.where) {
|
|
126
|
+
result = filterRows(result, plan.hints.where, context)
|
|
127
|
+
}
|
|
172
128
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
node: term.expr,
|
|
178
|
-
row: rows[idx],
|
|
179
|
-
tables,
|
|
180
|
-
functions,
|
|
181
|
-
aliases,
|
|
182
|
-
})
|
|
183
|
-
}
|
|
184
|
-
}
|
|
129
|
+
// Apply LIMIT/OFFSET if data source did not
|
|
130
|
+
if (!appliedLimitOffset && hasLimitOffset) {
|
|
131
|
+
result = limitRows(result, plan.hints.limit, plan.hints.offset, signal)
|
|
132
|
+
}
|
|
185
133
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
const av = evaluatedValues[aIdx][orderByIdx]
|
|
189
|
-
const bv = evaluatedValues[bIdx][orderByIdx]
|
|
190
|
-
return compareForTerm(av, bv, term)
|
|
191
|
-
})
|
|
134
|
+
yield* result
|
|
135
|
+
}
|
|
192
136
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
137
|
+
/**
|
|
138
|
+
* Filters rows by a condition
|
|
139
|
+
*
|
|
140
|
+
* @param {AsyncIterable<AsyncRow>} rows
|
|
141
|
+
* @param {ExprNode} condition
|
|
142
|
+
* @param {ExecuteContext} context
|
|
143
|
+
* @yields {AsyncRow}
|
|
144
|
+
*/
|
|
145
|
+
async function* filterRows(rows, condition, context) {
|
|
146
|
+
let rowIndex = 0
|
|
147
|
+
for await (const row of rows) {
|
|
148
|
+
if (context.signal?.aborted) return
|
|
149
|
+
rowIndex++
|
|
150
|
+
const pass = await evaluateExpr({ node: condition, row, rowIndex, ...context })
|
|
151
|
+
if (pass) yield row
|
|
152
|
+
}
|
|
153
|
+
}
|
|
202
154
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
155
|
+
/**
|
|
156
|
+
* Skips the first `offset` rows, then yields at most `limit` rows
|
|
157
|
+
*
|
|
158
|
+
* @param {AsyncIterable<AsyncRow>} rows
|
|
159
|
+
* @param {number} [limit]
|
|
160
|
+
* @param {number} [offset]
|
|
161
|
+
* @param {AbortSignal} [signal]
|
|
162
|
+
* @yields {AsyncRow}
|
|
163
|
+
*/
|
|
164
|
+
async function* limitRows(rows, limit, offset, signal) {
|
|
165
|
+
const skip = offset ?? 0
|
|
166
|
+
const max = limit ?? Infinity
|
|
167
|
+
if (max <= 0) return
|
|
168
|
+
let skipped = 0
|
|
169
|
+
let yielded = 0
|
|
170
|
+
for await (const row of rows) {
|
|
171
|
+
if (signal?.aborted) return
|
|
172
|
+
if (skipped < skip) {
|
|
173
|
+
skipped++
|
|
174
|
+
continue
|
|
217
175
|
}
|
|
218
|
-
|
|
219
|
-
|
|
176
|
+
yield row
|
|
177
|
+
yielded++
|
|
178
|
+
if (yielded >= max) return
|
|
220
179
|
}
|
|
221
|
-
|
|
222
|
-
// Flatten groups to get final sorted indices
|
|
223
|
-
return groups.flat().map(i => rows[i])
|
|
224
180
|
}
|
|
225
181
|
|
|
226
182
|
/**
|
|
227
|
-
*
|
|
183
|
+
* Executes a filter operation (WHERE clause)
|
|
228
184
|
*
|
|
229
|
-
* @param {
|
|
230
|
-
* @param {
|
|
231
|
-
* @param {AsyncDataSource} options.dataSource
|
|
232
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
233
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
234
|
-
* @param {AbortSignal} [options.signal]
|
|
185
|
+
* @param {FilterNode} plan
|
|
186
|
+
* @param {ExecuteContext} context
|
|
235
187
|
* @yields {AsyncRow}
|
|
236
188
|
*/
|
|
237
|
-
async function*
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
const hasAggregate = select.columns.some(col => col.kind === 'derived' && containsAggregate(col.expr))
|
|
241
|
-
const useGrouping = hasAggregate || select.groupBy.length > 0
|
|
242
|
-
const needsBuffering = useGrouping || select.orderBy.length > 0
|
|
243
|
-
|
|
244
|
-
if (needsBuffering) {
|
|
245
|
-
// BUFFERING PATH: Collect all rows, process, then yield
|
|
246
|
-
yield* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal })
|
|
247
|
-
} else {
|
|
248
|
-
// STREAMING PATH: Yield rows one by one
|
|
249
|
-
yield* evaluateStreaming({ select, dataSource, tables, functions, signal })
|
|
250
|
-
}
|
|
189
|
+
async function* executeFilter(plan, context) {
|
|
190
|
+
yield* filterRows(executePlan(plan.child, context), plan.condition, context)
|
|
251
191
|
}
|
|
252
192
|
|
|
253
193
|
/**
|
|
254
|
-
*
|
|
255
|
-
* Supports DISTINCT by tracking seen row keys without buffering full rows
|
|
194
|
+
* Executes a projection operation (SELECT columns)
|
|
256
195
|
*
|
|
257
|
-
* @param {
|
|
258
|
-
* @param {
|
|
259
|
-
* @param {AsyncDataSource} options.dataSource
|
|
260
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
261
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
262
|
-
* @param {AbortSignal} [options.signal]
|
|
196
|
+
* @param {ProjectNode} plan
|
|
197
|
+
* @param {ExecuteContext} context
|
|
263
198
|
* @yields {AsyncRow}
|
|
264
199
|
*/
|
|
265
|
-
async function*
|
|
266
|
-
|
|
267
|
-
let rowsSkipped = 0
|
|
200
|
+
async function* executeProject(plan, context) {
|
|
201
|
+
const { tables, functions, signal } = context
|
|
268
202
|
let rowIndex = 0
|
|
269
|
-
const offset = select.offset ?? 0
|
|
270
|
-
const limit = select.limit ?? Infinity
|
|
271
|
-
if (limit <= 0) return
|
|
272
|
-
|
|
273
|
-
// For DISTINCT, track seen row keys
|
|
274
|
-
/** @type {Set<string> | undefined} */
|
|
275
|
-
const seen = select.distinct ? new Set() : undefined
|
|
276
|
-
|
|
277
|
-
// hints for data source optimization
|
|
278
|
-
/** @type {QueryHints} */
|
|
279
|
-
const hints = {
|
|
280
|
-
columns: extractColumns(select),
|
|
281
|
-
where: select.where,
|
|
282
|
-
limit: select.limit,
|
|
283
|
-
offset: select.offset,
|
|
284
|
-
}
|
|
285
203
|
|
|
286
|
-
for await (const row of
|
|
204
|
+
for await (const row of executePlan(plan.child, context)) {
|
|
205
|
+
if (signal?.aborted) return
|
|
287
206
|
rowIndex++
|
|
288
|
-
|
|
289
|
-
if (select.where) {
|
|
290
|
-
const pass = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
|
|
291
|
-
if (!pass) continue
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
// For non-DISTINCT queries, we can skip rows before projection (optimization)
|
|
295
|
-
if (!seen && rowsSkipped < offset) {
|
|
296
|
-
rowsSkipped++
|
|
297
|
-
continue
|
|
298
|
-
}
|
|
207
|
+
const currentRowIndex = rowIndex
|
|
299
208
|
|
|
300
|
-
// SELECT projection
|
|
301
209
|
/** @type {string[]} */
|
|
302
210
|
const columns = []
|
|
303
211
|
/** @type {AsyncCells} */
|
|
304
212
|
const cells = {}
|
|
305
|
-
|
|
306
|
-
for (const col of
|
|
213
|
+
|
|
214
|
+
for (const col of plan.columns) {
|
|
307
215
|
if (col.kind === 'star') {
|
|
308
216
|
for (const key of row.columns) {
|
|
309
217
|
columns.push(key)
|
|
@@ -312,217 +220,52 @@ async function* evaluateStreaming({ select, dataSource, tables, functions, signa
|
|
|
312
220
|
} else if (col.kind === 'derived') {
|
|
313
221
|
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
314
222
|
columns.push(alias)
|
|
315
|
-
cells[alias] = () => evaluateExpr({
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
seen.add(key)
|
|
324
|
-
// OFFSET applies to distinct rows
|
|
325
|
-
if (rowsSkipped < offset) {
|
|
326
|
-
rowsSkipped++
|
|
327
|
-
continue
|
|
223
|
+
cells[alias] = () => evaluateExpr({
|
|
224
|
+
node: col.expr,
|
|
225
|
+
row,
|
|
226
|
+
tables,
|
|
227
|
+
functions,
|
|
228
|
+
rowIndex: currentRowIndex,
|
|
229
|
+
signal,
|
|
230
|
+
})
|
|
328
231
|
}
|
|
329
232
|
}
|
|
330
233
|
|
|
331
234
|
yield { columns, cells }
|
|
332
|
-
rowsYielded++
|
|
333
|
-
if (rowsYielded >= limit) {
|
|
334
|
-
break
|
|
335
|
-
}
|
|
336
235
|
}
|
|
337
236
|
}
|
|
338
237
|
|
|
339
238
|
/**
|
|
340
|
-
*
|
|
239
|
+
* Executes a distinct operation
|
|
341
240
|
*
|
|
342
|
-
* @param {
|
|
343
|
-
* @param {
|
|
344
|
-
* @param {AsyncDataSource} options.dataSource
|
|
345
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
346
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
347
|
-
* @param {boolean} options.hasAggregate
|
|
348
|
-
* @param {boolean} options.useGrouping
|
|
349
|
-
* @param {AbortSignal} [options.signal]
|
|
241
|
+
* @param {DistinctNode} plan
|
|
242
|
+
* @param {ExecuteContext} context
|
|
350
243
|
* @yields {AsyncRow}
|
|
351
244
|
*/
|
|
352
|
-
async function*
|
|
353
|
-
|
|
354
|
-
// Note: limit/offset not passed here since buffering needs all rows for sorting/grouping
|
|
355
|
-
/** @type {QueryHints} */
|
|
356
|
-
const hints = {
|
|
357
|
-
where: select.where,
|
|
358
|
-
columns: extractColumns(select),
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
// Step 1: Collect all rows from data source
|
|
362
|
-
/** @type {AsyncRow[]} */
|
|
363
|
-
const working = []
|
|
364
|
-
for await (const row of dataSource.scan({ hints, signal })) {
|
|
365
|
-
working.push(row)
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
// Step 2: WHERE clause filtering
|
|
369
|
-
/** @type {AsyncRow[]} */
|
|
370
|
-
const filtered = []
|
|
245
|
+
async function* executeDistinct(plan, context) {
|
|
246
|
+
const { signal } = context
|
|
371
247
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
const rowIndex = i + 1 // 1-based
|
|
375
|
-
if (select.where) {
|
|
376
|
-
const passes = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
|
|
377
|
-
|
|
378
|
-
if (!passes) {
|
|
379
|
-
continue
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
filtered.push(row)
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// Step 3: Projection (grouping vs non-grouping)
|
|
386
|
-
/** @type {AsyncRow[]} */
|
|
387
|
-
let projected = []
|
|
388
|
-
|
|
389
|
-
if (useGrouping) {
|
|
390
|
-
// Grouping due to GROUP BY or aggregate functions
|
|
391
|
-
/** @type {AsyncRow[][]} */
|
|
392
|
-
const groups = []
|
|
393
|
-
|
|
394
|
-
if (select.groupBy.length) {
|
|
395
|
-
/** @type {Map<string, AsyncRow[]>} */
|
|
396
|
-
const map = new Map()
|
|
397
|
-
for (const row of filtered) {
|
|
398
|
-
/** @type {string[]} */
|
|
399
|
-
const keyParts = []
|
|
400
|
-
for (const expr of select.groupBy) {
|
|
401
|
-
const v = await evaluateExpr({ node: expr, row, tables, functions })
|
|
402
|
-
keyParts.push(stringify(v))
|
|
403
|
-
}
|
|
404
|
-
const key = keyParts.join('|')
|
|
405
|
-
let group = map.get(key)
|
|
406
|
-
if (!group) {
|
|
407
|
-
group = []
|
|
408
|
-
map.set(key, group)
|
|
409
|
-
groups.push(group)
|
|
410
|
-
}
|
|
411
|
-
group.push(row)
|
|
412
|
-
}
|
|
413
|
-
} else {
|
|
414
|
-
groups.push(filtered)
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
const hasStar = select.columns.some(col => col.kind === 'star')
|
|
418
|
-
if (hasStar && hasAggregate) {
|
|
419
|
-
throw unsupportedOperationError({
|
|
420
|
-
operation: 'SELECT * with aggregate functions is not supported',
|
|
421
|
-
hint: 'Replace * with specific column names when using aggregate functions.',
|
|
422
|
-
})
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
for (const group of groups) {
|
|
426
|
-
const columns = []
|
|
427
|
-
/** @type {AsyncCells} */
|
|
428
|
-
const cells = {}
|
|
429
|
-
for (const col of select.columns) {
|
|
430
|
-
if (col.kind === 'star') {
|
|
431
|
-
const firstRow = group[0]
|
|
432
|
-
if (firstRow) {
|
|
433
|
-
for (const key of firstRow.columns) {
|
|
434
|
-
columns.push(key)
|
|
435
|
-
cells[key] = firstRow.cells[key]
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
continue
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
if (col.kind === 'derived') {
|
|
442
|
-
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
443
|
-
columns.push(alias)
|
|
444
|
-
// Pass group to evaluateExpr so it can handle aggregate functions within expressions
|
|
445
|
-
// For empty groups, still provide an empty row context for aggregates to return appropriate values
|
|
446
|
-
cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, functions, rows: group })
|
|
447
|
-
continue
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
const asyncRow = { columns, cells }
|
|
451
|
-
|
|
452
|
-
// Apply HAVING filter before adding to projected results
|
|
453
|
-
if (select.having) {
|
|
454
|
-
if (!await evaluateHavingExpr({ expr: select.having, row: asyncRow, group, tables, functions })) {
|
|
455
|
-
continue
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
projected.push(asyncRow)
|
|
460
|
-
}
|
|
461
|
-
} else {
|
|
462
|
-
// No grouping, simple projection
|
|
463
|
-
// Sort before projection so ORDER BY can access columns not in SELECT
|
|
464
|
-
|
|
465
|
-
// Pass aliases so ORDER BY can reference SELECT column aliases
|
|
466
|
-
/** @type {Map<string, ExprNode>} */
|
|
467
|
-
const aliases = new Map()
|
|
468
|
-
for (const col of select.columns) {
|
|
469
|
-
if (col.kind === 'derived' && col.alias) {
|
|
470
|
-
aliases.set(col.alias, col.expr)
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
const sorted = await sortRows({ rows: filtered, orderBy: select.orderBy, tables, functions, aliases })
|
|
474
|
-
|
|
475
|
-
// OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
|
|
476
|
-
// to avoid reading expensive cells for rows that won't be in the final result
|
|
477
|
-
let rowsToProject = sorted
|
|
478
|
-
if (!select.distinct) {
|
|
479
|
-
const start = select.offset ?? 0
|
|
480
|
-
const end = select.limit ? start + select.limit : sorted.length
|
|
481
|
-
rowsToProject = sorted.slice(start, end)
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
for (const row of rowsToProject) {
|
|
485
|
-
const columns = []
|
|
486
|
-
/** @type {AsyncCells} */
|
|
487
|
-
const cells = {}
|
|
488
|
-
for (const col of select.columns) {
|
|
489
|
-
if (col.kind === 'star') {
|
|
490
|
-
for (const key of row.columns) {
|
|
491
|
-
columns.push(key)
|
|
492
|
-
cells[key] = row.cells[key]
|
|
493
|
-
}
|
|
494
|
-
} else if (col.kind === 'derived') {
|
|
495
|
-
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
496
|
-
columns.push(alias)
|
|
497
|
-
cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions })
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
projected.push({ columns, cells })
|
|
501
|
-
}
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
// Step 4: DISTINCT
|
|
505
|
-
projected = await applyDistinct(projected, select.distinct)
|
|
506
|
-
|
|
507
|
-
// Step 5: ORDER BY (final sort for grouped queries)
|
|
508
|
-
if (useGrouping) {
|
|
509
|
-
projected = await sortRows({ rows: projected, orderBy: select.orderBy, tables, functions })
|
|
510
|
-
}
|
|
248
|
+
/** @type {Set<string>} */
|
|
249
|
+
const seen = new Set()
|
|
511
250
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if (select.distinct || useGrouping) {
|
|
515
|
-
const start = select.offset ?? 0
|
|
516
|
-
const end = select.limit ? start + select.limit : projected.length
|
|
251
|
+
for await (const row of executePlan(plan.child, context)) {
|
|
252
|
+
if (signal?.aborted) return
|
|
517
253
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
}
|
|
522
|
-
} else {
|
|
523
|
-
// Already limited, yield all projected rows
|
|
524
|
-
for (const row of projected) {
|
|
254
|
+
const key = await stableRowKey(row.cells)
|
|
255
|
+
if (!seen.has(key)) {
|
|
256
|
+
seen.add(key)
|
|
525
257
|
yield row
|
|
526
258
|
}
|
|
527
259
|
}
|
|
528
260
|
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Executes a limit operation (LIMIT/OFFSET)
|
|
264
|
+
*
|
|
265
|
+
* @param {LimitNode} plan
|
|
266
|
+
* @param {ExecuteContext} context
|
|
267
|
+
* @yields {AsyncRow}
|
|
268
|
+
*/
|
|
269
|
+
async function* executeLimit(plan, context) {
|
|
270
|
+
yield* limitRows(executePlan(plan.child, context), plan.limit, plan.offset, context.signal)
|
|
271
|
+
}
|