squirreling 0.7.10 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/package.json +4 -4
- package/src/backend/dataSource.js +52 -47
- package/src/execute/aggregates.js +138 -0
- package/src/execute/execute.js +159 -429
- package/src/execute/join.js +168 -338
- package/src/execute/sort.js +94 -0
- package/src/execute/utils.js +18 -49
- package/src/executionErrors.js +10 -10
- package/src/expression/binary.js +51 -0
- package/src/{execute → expression}/date.js +18 -18
- package/src/{execute/expression.js → expression/evaluate.js} +77 -89
- package/src/{execute → expression}/math.js +46 -81
- package/src/{execute → expression}/regexp.js +7 -7
- package/src/{execute → expression}/strings.js +33 -45
- package/src/index.d.ts +26 -3
- package/src/index.js +2 -2
- package/src/parse/comparison.js +7 -7
- package/src/parse/expression.js +55 -63
- package/src/parse/functions.js +23 -6
- package/src/parse/joins.js +7 -2
- package/src/parse/parse.js +68 -70
- package/src/parse/state.js +2 -10
- package/src/parse/tokenize.js +2 -2
- package/src/parse/types.d.ts +30 -0
- package/src/parseErrors.js +5 -4
- package/src/plan/columns.js +149 -0
- package/src/plan/plan.js +304 -0
- package/src/plan/types.d.ts +98 -0
- package/src/types.d.ts +35 -36
- package/src/validation.js +64 -1
- package/src/validationErrors.js +16 -11
- package/src/execute/columns.js +0 -141
- package/src/execute/having.js +0 -202
- package/src/execute/tableSource.js +0 -63
package/src/execute/execute.js
CHANGED
|
@@ -1,35 +1,27 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { memorySource } from '../backend/dataSource.js'
|
|
2
|
+
import { tableNotFoundError } from '../executionErrors.js'
|
|
3
|
+
import { evaluateExpr } from '../expression/evaluate.js'
|
|
4
4
|
import { parseSql } from '../parse/parse.js'
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
|
|
5
|
+
import { planSql } from '../plan/plan.js'
|
|
6
|
+
import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
|
|
7
|
+
import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
|
|
8
|
+
import { executeSort } from './sort.js'
|
|
9
|
+
import { defaultDerivedAlias, stableRowKey } from './utils.js'
|
|
11
10
|
|
|
12
11
|
/**
|
|
13
|
-
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, ExprNode,
|
|
12
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, SelectStatement } from '../types.js'
|
|
13
|
+
* @import { DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode } from '../plan/types.js'
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
/**
|
|
17
|
-
* Executes a SQL SELECT query against
|
|
17
|
+
* Executes a SQL SELECT query against tables
|
|
18
18
|
*
|
|
19
|
-
* @param {ExecuteSqlOptions} options
|
|
20
|
-
* @yields {AsyncRow}
|
|
19
|
+
* @param {ExecuteSqlOptions} options
|
|
20
|
+
* @yields {AsyncRow}
|
|
21
21
|
*/
|
|
22
22
|
export async function* executeSql({ tables, query, functions, signal }) {
|
|
23
23
|
const select = typeof query === 'string' ? parseSql({ query, functions }) : query
|
|
24
24
|
|
|
25
|
-
// Check for unsupported operations
|
|
26
|
-
if (!select.from) {
|
|
27
|
-
throw missingClauseError({
|
|
28
|
-
missing: 'FROM clause',
|
|
29
|
-
context: 'SELECT statement',
|
|
30
|
-
})
|
|
31
|
-
}
|
|
32
|
-
|
|
33
25
|
// Normalize tables: convert arrays to AsyncDataSource
|
|
34
26
|
/** @type {Record<string, AsyncDataSource>} */
|
|
35
27
|
const normalizedTables = {}
|
|
@@ -41,7 +33,7 @@ export async function* executeSql({ tables, query, functions, signal }) {
|
|
|
41
33
|
}
|
|
42
34
|
}
|
|
43
35
|
|
|
44
|
-
yield* executeSelect({ select,
|
|
36
|
+
yield* executeSelect({ select, context: { tables: normalizedTables, functions, signal } })
|
|
45
37
|
}
|
|
46
38
|
|
|
47
39
|
/**
|
|
@@ -49,261 +41,166 @@ export async function* executeSql({ tables, query, functions, signal }) {
|
|
|
49
41
|
*
|
|
50
42
|
* @param {Object} options
|
|
51
43
|
* @param {SelectStatement} options.select
|
|
52
|
-
* @param {
|
|
53
|
-
* @param {WithClause} [options.withClause] - WITH clause containing CTE definitions
|
|
54
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
55
|
-
* @param {AbortSignal} [options.signal]
|
|
44
|
+
* @param {ExecuteContext} options.context
|
|
56
45
|
* @yields {AsyncRow}
|
|
57
46
|
*/
|
|
58
|
-
export async function* executeSelect({ select,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
/** @type {string} */
|
|
62
|
-
let leftTable
|
|
63
|
-
|
|
64
|
-
if (select.from.kind === 'table') {
|
|
65
|
-
const tableName = select.from.table
|
|
66
|
-
leftTable = select.from.alias ?? tableName
|
|
67
|
-
dataSource = resolveTableSource(tableName, tables, withClause, executeSelect, functions, signal)
|
|
68
|
-
} else {
|
|
69
|
-
// Nested subquery - recursively resolve
|
|
70
|
-
leftTable = select.from.alias
|
|
71
|
-
dataSource = generatorSource(executeSelect({
|
|
72
|
-
select: select.from.query,
|
|
73
|
-
tables,
|
|
74
|
-
withClause,
|
|
75
|
-
functions,
|
|
76
|
-
signal,
|
|
77
|
-
}))
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Execute JOINs if present
|
|
81
|
-
if (select.joins.length) {
|
|
82
|
-
dataSource = await executeJoins({
|
|
83
|
-
leftSource: dataSource,
|
|
84
|
-
joins: select.joins,
|
|
85
|
-
leftTable,
|
|
86
|
-
tables,
|
|
87
|
-
withClause,
|
|
88
|
-
functions,
|
|
89
|
-
executeSelectFn: executeSelect,
|
|
90
|
-
signal,
|
|
91
|
-
})
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
|
|
47
|
+
export async function* executeSelect({ select, context }) {
|
|
48
|
+
const plan = planSql({ query: select, functions: context.functions })
|
|
49
|
+
yield* executePlan({ plan, context })
|
|
95
50
|
}
|
|
96
51
|
|
|
97
52
|
/**
|
|
98
|
-
*
|
|
53
|
+
* Executes a query plan and yields result rows
|
|
99
54
|
*
|
|
100
|
-
* @param {
|
|
101
|
-
* @
|
|
55
|
+
* @param {Object} options
|
|
56
|
+
* @param {QueryPlan} options.plan - the query plan to execute
|
|
57
|
+
* @param {ExecuteContext} options.context - execution context
|
|
58
|
+
* @returns {AsyncGenerator<AsyncRow>}
|
|
102
59
|
*/
|
|
103
|
-
async function
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
60
|
+
export async function* executePlan({ plan, context }) {
|
|
61
|
+
if (plan.type === 'Scan') {
|
|
62
|
+
yield* executeScan(plan, context)
|
|
63
|
+
} else if (plan.type === 'Filter') {
|
|
64
|
+
yield* executeFilter(plan, context)
|
|
65
|
+
} else if (plan.type === 'Project') {
|
|
66
|
+
yield* executeProject(plan, context)
|
|
67
|
+
} else if (plan.type === 'HashJoin') {
|
|
68
|
+
yield* executeHashJoin(plan, context)
|
|
69
|
+
} else if (plan.type === 'NestedLoopJoin') {
|
|
70
|
+
yield* executeNestedLoopJoin(plan, context)
|
|
71
|
+
} else if (plan.type === 'PositionalJoin') {
|
|
72
|
+
yield* executePositionalJoin(plan, context)
|
|
73
|
+
} else if (plan.type === 'HashAggregate') {
|
|
74
|
+
yield* executeHashAggregate(plan, context)
|
|
75
|
+
} else if (plan.type === 'ScalarAggregate') {
|
|
76
|
+
yield* executeScalarAggregate(plan, context)
|
|
77
|
+
} else if (plan.type === 'Sort') {
|
|
78
|
+
yield* executeSort(plan, context)
|
|
79
|
+
} else if (plan.type === 'Distinct') {
|
|
80
|
+
yield* executeDistinct(plan, context)
|
|
81
|
+
} else if (plan.type === 'Limit') {
|
|
82
|
+
yield* executeLimit(plan, context)
|
|
110
83
|
}
|
|
111
|
-
return parts.join('|')
|
|
112
84
|
}
|
|
113
85
|
|
|
114
86
|
/**
|
|
115
|
-
*
|
|
87
|
+
* Executes a table scan
|
|
116
88
|
*
|
|
117
|
-
* @param {
|
|
118
|
-
* @param {
|
|
119
|
-
* @
|
|
89
|
+
* @param {ScanNode} plan
|
|
90
|
+
* @param {ExecuteContext} context
|
|
91
|
+
* @yields {AsyncRow}
|
|
120
92
|
*/
|
|
121
|
-
async function
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const result = []
|
|
127
|
-
for (const row of rows) {
|
|
128
|
-
const key = await stableRowKey(row.cells)
|
|
129
|
-
if (seen.has(key)) continue
|
|
130
|
-
seen.add(key)
|
|
131
|
-
result.push(row)
|
|
93
|
+
async function* executeScan(plan, context) {
|
|
94
|
+
const { tables, signal } = context
|
|
95
|
+
const dataSource = tables[plan.table]
|
|
96
|
+
if (dataSource === undefined) {
|
|
97
|
+
throw tableNotFoundError({ tableName: plan.table })
|
|
132
98
|
}
|
|
133
|
-
return result
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Applies ORDER BY sorting to rows using multi-pass lazy evaluation.
|
|
138
|
-
* Secondary ORDER BY columns are only evaluated for rows that tie on
|
|
139
|
-
* previous columns, reducing expensive cell evaluations.
|
|
140
|
-
*
|
|
141
|
-
* @param {Object} options
|
|
142
|
-
* @param {AsyncRow[]} options.rows - the input rows
|
|
143
|
-
* @param {OrderByItem[]} options.orderBy - the sort specifications
|
|
144
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
145
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
146
|
-
* @param {Map<string, ExprNode>} [options.aliases] - SELECT column aliases for ORDER BY resolution
|
|
147
|
-
* @returns {Promise<AsyncRow[]>} the sorted rows
|
|
148
|
-
*/
|
|
149
|
-
async function sortRows({ rows, orderBy, tables, functions, aliases }) {
|
|
150
|
-
if (!orderBy.length) return rows
|
|
151
99
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
100
|
+
const scanResult = dataSource.scan({ ...plan.hints, signal })
|
|
101
|
+
if (!scanResult.rows) {
|
|
102
|
+
throw new Error(`Data source "${plan.table}" scan() must return a ScanResults object with { rows, appliedWhere, appliedLimitOffset }`)
|
|
103
|
+
}
|
|
104
|
+
const { rows, appliedWhere, appliedLimitOffset } = scanResult
|
|
155
105
|
|
|
156
|
-
//
|
|
157
|
-
|
|
158
|
-
|
|
106
|
+
// Applied limit/offset without applied where is invalid
|
|
107
|
+
const hasLimitOffset = plan.hints?.limit !== undefined || plan.hints?.offset // 0 offset is noop
|
|
108
|
+
if (!appliedWhere && appliedLimitOffset && plan.hints?.where && hasLimitOffset) {
|
|
109
|
+
throw new Error(`Data source "${plan.table}" applied limit/offset without applying where`)
|
|
110
|
+
}
|
|
159
111
|
|
|
160
|
-
|
|
161
|
-
for (let orderByIdx = 0; orderByIdx < orderBy.length; orderByIdx++) {
|
|
162
|
-
const term = orderBy[orderByIdx]
|
|
163
|
-
/** @type {number[][]} */
|
|
164
|
-
const nextGroups = []
|
|
112
|
+
let result = rows
|
|
165
113
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
continue
|
|
171
|
-
}
|
|
114
|
+
// Apply WHERE if data source did not
|
|
115
|
+
if (!appliedWhere && plan.hints?.where) {
|
|
116
|
+
result = filterRows(result, plan.hints.where, context)
|
|
117
|
+
}
|
|
172
118
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
node: term.expr,
|
|
178
|
-
row: rows[idx],
|
|
179
|
-
tables,
|
|
180
|
-
functions,
|
|
181
|
-
aliases,
|
|
182
|
-
})
|
|
183
|
-
}
|
|
184
|
-
}
|
|
119
|
+
// Apply LIMIT/OFFSET if data source did not
|
|
120
|
+
if (!appliedLimitOffset && hasLimitOffset) {
|
|
121
|
+
result = limitRows(result, plan.hints.limit, plan.hints.offset, signal)
|
|
122
|
+
}
|
|
185
123
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
const av = evaluatedValues[aIdx][orderByIdx]
|
|
189
|
-
const bv = evaluatedValues[bIdx][orderByIdx]
|
|
190
|
-
return compareForTerm(av, bv, term)
|
|
191
|
-
})
|
|
124
|
+
yield* result
|
|
125
|
+
}
|
|
192
126
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
127
|
+
/**
|
|
128
|
+
* Filters rows by a condition
|
|
129
|
+
*
|
|
130
|
+
* @param {AsyncIterable<AsyncRow>} rows
|
|
131
|
+
* @param {ExprNode} condition
|
|
132
|
+
* @param {ExecuteContext} context
|
|
133
|
+
* @yields {AsyncRow}
|
|
134
|
+
*/
|
|
135
|
+
async function* filterRows(rows, condition, context) {
|
|
136
|
+
let rowIndex = 0
|
|
137
|
+
for await (const row of rows) {
|
|
138
|
+
if (context.signal?.aborted) return
|
|
139
|
+
rowIndex++
|
|
140
|
+
const pass = await evaluateExpr({ node: condition, row, rowIndex, context })
|
|
141
|
+
if (pass) yield row
|
|
142
|
+
}
|
|
143
|
+
}
|
|
202
144
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
145
|
+
/**
|
|
146
|
+
* Skips the first `offset` rows, then yields at most `limit` rows
|
|
147
|
+
*
|
|
148
|
+
* @param {AsyncIterable<AsyncRow>} rows
|
|
149
|
+
* @param {number} [limit]
|
|
150
|
+
* @param {number} [offset]
|
|
151
|
+
* @param {AbortSignal} [signal]
|
|
152
|
+
* @yields {AsyncRow}
|
|
153
|
+
*/
|
|
154
|
+
async function* limitRows(rows, limit, offset, signal) {
|
|
155
|
+
const skip = offset ?? 0
|
|
156
|
+
const max = limit ?? Infinity
|
|
157
|
+
if (max <= 0) return
|
|
158
|
+
let skipped = 0
|
|
159
|
+
let yielded = 0
|
|
160
|
+
for await (const row of rows) {
|
|
161
|
+
if (signal?.aborted) return
|
|
162
|
+
if (skipped < skip) {
|
|
163
|
+
skipped++
|
|
164
|
+
continue
|
|
217
165
|
}
|
|
218
|
-
|
|
219
|
-
|
|
166
|
+
yield row
|
|
167
|
+
yielded++
|
|
168
|
+
if (yielded >= max) return
|
|
220
169
|
}
|
|
221
|
-
|
|
222
|
-
// Flatten groups to get final sorted indices
|
|
223
|
-
return groups.flat().map(i => rows[i])
|
|
224
170
|
}
|
|
225
171
|
|
|
226
172
|
/**
|
|
227
|
-
*
|
|
173
|
+
* Executes a filter operation (WHERE clause)
|
|
228
174
|
*
|
|
229
|
-
* @param {
|
|
230
|
-
* @param {
|
|
231
|
-
* @param {AsyncDataSource} options.dataSource
|
|
232
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
233
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
234
|
-
* @param {AbortSignal} [options.signal]
|
|
175
|
+
* @param {FilterNode} plan
|
|
176
|
+
* @param {ExecuteContext} context
|
|
235
177
|
* @yields {AsyncRow}
|
|
236
178
|
*/
|
|
237
|
-
async function*
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
const hasAggregate = select.columns.some(col => col.kind === 'derived' && containsAggregate(col.expr))
|
|
241
|
-
const useGrouping = hasAggregate || select.groupBy.length > 0
|
|
242
|
-
const needsBuffering = useGrouping || select.orderBy.length > 0
|
|
243
|
-
|
|
244
|
-
if (needsBuffering) {
|
|
245
|
-
// BUFFERING PATH: Collect all rows, process, then yield
|
|
246
|
-
yield* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal })
|
|
247
|
-
} else {
|
|
248
|
-
// STREAMING PATH: Yield rows one by one
|
|
249
|
-
yield* evaluateStreaming({ select, dataSource, tables, functions, signal })
|
|
250
|
-
}
|
|
179
|
+
async function* executeFilter(plan, context) {
|
|
180
|
+
yield* filterRows(executePlan({ plan: plan.child, context }), plan.condition, context)
|
|
251
181
|
}
|
|
252
182
|
|
|
253
183
|
/**
|
|
254
|
-
*
|
|
255
|
-
* Supports DISTINCT by tracking seen row keys without buffering full rows
|
|
184
|
+
* Executes a projection operation (SELECT columns)
|
|
256
185
|
*
|
|
257
|
-
* @param {
|
|
258
|
-
* @param {
|
|
259
|
-
* @param {AsyncDataSource} options.dataSource
|
|
260
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
261
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
262
|
-
* @param {AbortSignal} [options.signal]
|
|
186
|
+
* @param {ProjectNode} plan
|
|
187
|
+
* @param {ExecuteContext} context
|
|
263
188
|
* @yields {AsyncRow}
|
|
264
189
|
*/
|
|
265
|
-
async function*
|
|
266
|
-
let rowsYielded = 0
|
|
267
|
-
let rowsSkipped = 0
|
|
190
|
+
async function* executeProject(plan, context) {
|
|
268
191
|
let rowIndex = 0
|
|
269
|
-
const offset = select.offset ?? 0
|
|
270
|
-
const limit = select.limit ?? Infinity
|
|
271
|
-
if (limit <= 0) return
|
|
272
192
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
const seen = select.distinct ? new Set() : undefined
|
|
276
|
-
|
|
277
|
-
// hints for data source optimization
|
|
278
|
-
/** @type {QueryHints} */
|
|
279
|
-
const hints = {
|
|
280
|
-
columns: extractColumns(select),
|
|
281
|
-
where: select.where,
|
|
282
|
-
limit: select.limit,
|
|
283
|
-
offset: select.offset,
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
for await (const row of dataSource.scan({ hints, signal })) {
|
|
193
|
+
for await (const row of executePlan({ plan: plan.child, context })) {
|
|
194
|
+
if (context.signal?.aborted) return
|
|
287
195
|
rowIndex++
|
|
288
|
-
|
|
289
|
-
if (select.where) {
|
|
290
|
-
const pass = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
|
|
291
|
-
if (!pass) continue
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
// For non-DISTINCT queries, we can skip rows before projection (optimization)
|
|
295
|
-
if (!seen && rowsSkipped < offset) {
|
|
296
|
-
rowsSkipped++
|
|
297
|
-
continue
|
|
298
|
-
}
|
|
196
|
+
const currentRowIndex = rowIndex
|
|
299
197
|
|
|
300
|
-
// SELECT projection
|
|
301
198
|
/** @type {string[]} */
|
|
302
199
|
const columns = []
|
|
303
200
|
/** @type {AsyncCells} */
|
|
304
201
|
const cells = {}
|
|
305
|
-
|
|
306
|
-
for (const col of
|
|
202
|
+
|
|
203
|
+
for (const col of plan.columns) {
|
|
307
204
|
if (col.kind === 'star') {
|
|
308
205
|
for (const key of row.columns) {
|
|
309
206
|
columns.push(key)
|
|
@@ -312,217 +209,50 @@ async function* evaluateStreaming({ select, dataSource, tables, functions, signa
|
|
|
312
209
|
} else if (col.kind === 'derived') {
|
|
313
210
|
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
314
211
|
columns.push(alias)
|
|
315
|
-
cells[alias] = () => evaluateExpr({
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
const key = await stableRowKey(cells)
|
|
322
|
-
if (seen.has(key)) continue
|
|
323
|
-
seen.add(key)
|
|
324
|
-
// OFFSET applies to distinct rows
|
|
325
|
-
if (rowsSkipped < offset) {
|
|
326
|
-
rowsSkipped++
|
|
327
|
-
continue
|
|
212
|
+
cells[alias] = () => evaluateExpr({
|
|
213
|
+
node: col.expr,
|
|
214
|
+
row,
|
|
215
|
+
rowIndex: currentRowIndex,
|
|
216
|
+
context,
|
|
217
|
+
})
|
|
328
218
|
}
|
|
329
219
|
}
|
|
330
220
|
|
|
331
221
|
yield { columns, cells }
|
|
332
|
-
rowsYielded++
|
|
333
|
-
if (rowsYielded >= limit) {
|
|
334
|
-
break
|
|
335
|
-
}
|
|
336
222
|
}
|
|
337
223
|
}
|
|
338
224
|
|
|
339
225
|
/**
|
|
340
|
-
*
|
|
226
|
+
* Executes a distinct operation
|
|
341
227
|
*
|
|
342
|
-
* @param {
|
|
343
|
-
* @param {
|
|
344
|
-
* @param {AsyncDataSource} options.dataSource
|
|
345
|
-
* @param {Record<string, AsyncDataSource>} options.tables
|
|
346
|
-
* @param {Record<string, UserDefinedFunction>} [options.functions]
|
|
347
|
-
* @param {boolean} options.hasAggregate
|
|
348
|
-
* @param {boolean} options.useGrouping
|
|
349
|
-
* @param {AbortSignal} [options.signal]
|
|
228
|
+
* @param {DistinctNode} plan
|
|
229
|
+
* @param {ExecuteContext} context
|
|
350
230
|
* @yields {AsyncRow}
|
|
351
231
|
*/
|
|
352
|
-
async function*
|
|
353
|
-
|
|
354
|
-
// Note: limit/offset not passed here since buffering needs all rows for sorting/grouping
|
|
355
|
-
/** @type {QueryHints} */
|
|
356
|
-
const hints = {
|
|
357
|
-
where: select.where,
|
|
358
|
-
columns: extractColumns(select),
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
// Step 1: Collect all rows from data source
|
|
362
|
-
/** @type {AsyncRow[]} */
|
|
363
|
-
const working = []
|
|
364
|
-
for await (const row of dataSource.scan({ hints, signal })) {
|
|
365
|
-
working.push(row)
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
// Step 2: WHERE clause filtering
|
|
369
|
-
/** @type {AsyncRow[]} */
|
|
370
|
-
const filtered = []
|
|
371
|
-
|
|
372
|
-
for (let i = 0; i < working.length; i++) {
|
|
373
|
-
const row = working[i]
|
|
374
|
-
const rowIndex = i + 1 // 1-based
|
|
375
|
-
if (select.where) {
|
|
376
|
-
const passes = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
|
|
377
|
-
|
|
378
|
-
if (!passes) {
|
|
379
|
-
continue
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
filtered.push(row)
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// Step 3: Projection (grouping vs non-grouping)
|
|
386
|
-
/** @type {AsyncRow[]} */
|
|
387
|
-
let projected = []
|
|
388
|
-
|
|
389
|
-
if (useGrouping) {
|
|
390
|
-
// Grouping due to GROUP BY or aggregate functions
|
|
391
|
-
/** @type {AsyncRow[][]} */
|
|
392
|
-
const groups = []
|
|
393
|
-
|
|
394
|
-
if (select.groupBy.length) {
|
|
395
|
-
/** @type {Map<string, AsyncRow[]>} */
|
|
396
|
-
const map = new Map()
|
|
397
|
-
for (const row of filtered) {
|
|
398
|
-
/** @type {string[]} */
|
|
399
|
-
const keyParts = []
|
|
400
|
-
for (const expr of select.groupBy) {
|
|
401
|
-
const v = await evaluateExpr({ node: expr, row, tables, functions })
|
|
402
|
-
keyParts.push(stringify(v))
|
|
403
|
-
}
|
|
404
|
-
const key = keyParts.join('|')
|
|
405
|
-
let group = map.get(key)
|
|
406
|
-
if (!group) {
|
|
407
|
-
group = []
|
|
408
|
-
map.set(key, group)
|
|
409
|
-
groups.push(group)
|
|
410
|
-
}
|
|
411
|
-
group.push(row)
|
|
412
|
-
}
|
|
413
|
-
} else {
|
|
414
|
-
groups.push(filtered)
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
const hasStar = select.columns.some(col => col.kind === 'star')
|
|
418
|
-
if (hasStar && hasAggregate) {
|
|
419
|
-
throw unsupportedOperationError({
|
|
420
|
-
operation: 'SELECT * with aggregate functions is not supported',
|
|
421
|
-
hint: 'Replace * with specific column names when using aggregate functions.',
|
|
422
|
-
})
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
for (const group of groups) {
|
|
426
|
-
const columns = []
|
|
427
|
-
/** @type {AsyncCells} */
|
|
428
|
-
const cells = {}
|
|
429
|
-
for (const col of select.columns) {
|
|
430
|
-
if (col.kind === 'star') {
|
|
431
|
-
const firstRow = group[0]
|
|
432
|
-
if (firstRow) {
|
|
433
|
-
for (const key of firstRow.columns) {
|
|
434
|
-
columns.push(key)
|
|
435
|
-
cells[key] = firstRow.cells[key]
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
continue
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
if (col.kind === 'derived') {
|
|
442
|
-
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
443
|
-
columns.push(alias)
|
|
444
|
-
// Pass group to evaluateExpr so it can handle aggregate functions within expressions
|
|
445
|
-
// For empty groups, still provide an empty row context for aggregates to return appropriate values
|
|
446
|
-
cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, functions, rows: group })
|
|
447
|
-
continue
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
const asyncRow = { columns, cells }
|
|
451
|
-
|
|
452
|
-
// Apply HAVING filter before adding to projected results
|
|
453
|
-
if (select.having) {
|
|
454
|
-
if (!await evaluateHavingExpr({ expr: select.having, row: asyncRow, group, tables, functions })) {
|
|
455
|
-
continue
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
projected.push(asyncRow)
|
|
460
|
-
}
|
|
461
|
-
} else {
|
|
462
|
-
// No grouping, simple projection
|
|
463
|
-
// Sort before projection so ORDER BY can access columns not in SELECT
|
|
464
|
-
|
|
465
|
-
// Pass aliases so ORDER BY can reference SELECT column aliases
|
|
466
|
-
/** @type {Map<string, ExprNode>} */
|
|
467
|
-
const aliases = new Map()
|
|
468
|
-
for (const col of select.columns) {
|
|
469
|
-
if (col.kind === 'derived' && col.alias) {
|
|
470
|
-
aliases.set(col.alias, col.expr)
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
const sorted = await sortRows({ rows: filtered, orderBy: select.orderBy, tables, functions, aliases })
|
|
474
|
-
|
|
475
|
-
// OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
|
|
476
|
-
// to avoid reading expensive cells for rows that won't be in the final result
|
|
477
|
-
let rowsToProject = sorted
|
|
478
|
-
if (!select.distinct) {
|
|
479
|
-
const start = select.offset ?? 0
|
|
480
|
-
const end = select.limit ? start + select.limit : sorted.length
|
|
481
|
-
rowsToProject = sorted.slice(start, end)
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
for (const row of rowsToProject) {
|
|
485
|
-
const columns = []
|
|
486
|
-
/** @type {AsyncCells} */
|
|
487
|
-
const cells = {}
|
|
488
|
-
for (const col of select.columns) {
|
|
489
|
-
if (col.kind === 'star') {
|
|
490
|
-
for (const key of row.columns) {
|
|
491
|
-
columns.push(key)
|
|
492
|
-
cells[key] = row.cells[key]
|
|
493
|
-
}
|
|
494
|
-
} else if (col.kind === 'derived') {
|
|
495
|
-
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
496
|
-
columns.push(alias)
|
|
497
|
-
cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions })
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
projected.push({ columns, cells })
|
|
501
|
-
}
|
|
502
|
-
}
|
|
232
|
+
async function* executeDistinct(plan, context) {
|
|
233
|
+
const { signal } = context
|
|
503
234
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
// Step 5: ORDER BY (final sort for grouped queries)
|
|
508
|
-
if (useGrouping) {
|
|
509
|
-
projected = await sortRows({ rows: projected, orderBy: select.orderBy, tables, functions })
|
|
510
|
-
}
|
|
235
|
+
/** @type {Set<string>} */
|
|
236
|
+
const seen = new Set()
|
|
511
237
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if (select.distinct || useGrouping) {
|
|
515
|
-
const start = select.offset ?? 0
|
|
516
|
-
const end = select.limit ? start + select.limit : projected.length
|
|
238
|
+
for await (const row of executePlan({ plan: plan.child, context })) {
|
|
239
|
+
if (signal?.aborted) return
|
|
517
240
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
}
|
|
522
|
-
} else {
|
|
523
|
-
// Already limited, yield all projected rows
|
|
524
|
-
for (const row of projected) {
|
|
241
|
+
const key = await stableRowKey(row.cells)
|
|
242
|
+
if (!seen.has(key)) {
|
|
243
|
+
seen.add(key)
|
|
525
244
|
yield row
|
|
526
245
|
}
|
|
527
246
|
}
|
|
528
247
|
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Executes a limit operation (LIMIT/OFFSET)
|
|
251
|
+
*
|
|
252
|
+
* @param {LimitNode} plan
|
|
253
|
+
* @param {ExecuteContext} context
|
|
254
|
+
* @yields {AsyncRow}
|
|
255
|
+
*/
|
|
256
|
+
async function* executeLimit(plan, context) {
|
|
257
|
+
yield* limitRows(executePlan({ plan: plan.child, context }), plan.limit, plan.offset, context.signal)
|
|
258
|
+
}
|