squirreling 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/package.json +5 -5
- package/src/ast.d.ts +32 -15
- package/src/backend/dataSource.js +4 -3
- package/src/execute/aggregates.js +160 -19
- package/src/execute/execute.js +129 -23
- package/src/execute/join.js +20 -21
- package/src/execute/utils.js +19 -7
- package/src/expression/alias.js +3 -2
- package/src/expression/evaluate.js +87 -61
- package/src/expression/math.js +2 -0
- package/src/expression/regexp.js +11 -9
- package/src/expression/strings.js +11 -9
- package/src/index.d.ts +10 -5
- package/src/index.js +1 -1
- package/src/parse/expression.js +187 -351
- package/src/parse/functions.js +63 -51
- package/src/parse/joins.js +24 -38
- package/src/parse/parse.js +244 -200
- package/src/parse/primary.js +281 -0
- package/src/parse/state.js +11 -25
- package/src/parse/tokenize.js +77 -196
- package/src/plan/columns.js +115 -17
- package/src/plan/plan.js +121 -44
- package/src/plan/types.d.ts +11 -1
- package/src/spatial/bbox.js +3 -3
- package/src/spatial/geometry.d.ts +1 -1
- package/src/spatial/index.d.ts +6 -0
- package/src/spatial/index.js +3 -0
- package/src/spatial/spatial.js +19 -53
- package/src/types.d.ts +17 -5
- package/src/validation/executionErrors.js +20 -12
- package/src/validation/functions.js +28 -53
- package/src/validation/keywords.js +35 -0
- package/src/validation/parseErrors.js +101 -82
- package/src/validation/planErrors.js +41 -33
- package/src/parse/comparison.js +0 -233
- package/src/validation/expressionErrors.js +0 -57
package/README.md
CHANGED
|
@@ -137,6 +137,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
137
137
|
- Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
|
|
138
138
|
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
|
|
139
139
|
- `GROUP BY` and `HAVING` clauses
|
|
140
|
+
- Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
|
|
140
141
|
- Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
|
|
141
142
|
|
|
142
143
|
### Quoting
|
|
@@ -147,14 +148,14 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
147
148
|
|
|
148
149
|
### Functions
|
|
149
150
|
|
|
150
|
-
- Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`
|
|
151
|
-
- String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`
|
|
151
|
+
- Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`
|
|
152
|
+
- String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
|
|
152
153
|
- Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
|
|
153
154
|
- Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
|
|
154
155
|
- Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
|
|
155
|
-
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_OBJECT`
|
|
156
|
+
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`
|
|
156
157
|
- Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
|
|
157
|
-
- Regex: `REGEXP_SUBSTR`, `REGEXP_REPLACE`
|
|
158
|
+
- Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`
|
|
158
159
|
- Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
|
|
159
160
|
- Conditional: `COALESCE`, `NULLIF`
|
|
160
161
|
- User-defined functions (UDFs)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "squirreling",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.0",
|
|
4
4
|
"description": "Squirreling Async SQL Engine",
|
|
5
5
|
"author": "Hyperparam",
|
|
6
6
|
"homepage": "https://hyperparam.app",
|
|
@@ -40,10 +40,10 @@
|
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
42
|
"@types/node": "25.5.0",
|
|
43
|
-
"@vitest/coverage-v8": "4.1.
|
|
43
|
+
"@vitest/coverage-v8": "4.1.2",
|
|
44
44
|
"eslint": "9.39.2",
|
|
45
|
-
"eslint-plugin-jsdoc": "62.8.
|
|
46
|
-
"typescript": "
|
|
47
|
-
"vitest": "4.1.
|
|
45
|
+
"eslint-plugin-jsdoc": "62.8.1",
|
|
46
|
+
"typescript": "6.0.2",
|
|
47
|
+
"vitest": "4.1.2"
|
|
48
48
|
}
|
|
49
49
|
}
|
package/src/ast.d.ts
CHANGED
|
@@ -8,8 +8,8 @@ export type SqlPrimitive =
|
|
|
8
8
|
| SqlPrimitive[]
|
|
9
9
|
| Record<string, any>
|
|
10
10
|
|
|
11
|
-
export interface SelectStatement {
|
|
12
|
-
|
|
11
|
+
export interface SelectStatement extends AstBase {
|
|
12
|
+
type: 'select'
|
|
13
13
|
distinct: boolean
|
|
14
14
|
columns: SelectColumn[]
|
|
15
15
|
from: FromTable | FromSubquery
|
|
@@ -22,25 +22,42 @@ export interface SelectStatement {
|
|
|
22
22
|
offset?: number
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export
|
|
25
|
+
export type SetOperator = 'UNION' | 'INTERSECT' | 'EXCEPT'
|
|
26
|
+
|
|
27
|
+
export interface SetOperationStatement extends AstBase {
|
|
28
|
+
type: 'compound'
|
|
29
|
+
operator: SetOperator
|
|
30
|
+
all: boolean
|
|
31
|
+
left: Statement
|
|
32
|
+
right: Statement
|
|
33
|
+
orderBy: OrderByItem[]
|
|
34
|
+
limit?: number
|
|
35
|
+
offset?: number
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface WithStatement extends AstBase {
|
|
39
|
+
type: 'with'
|
|
26
40
|
ctes: CTEDefinition[]
|
|
41
|
+
query: Statement
|
|
27
42
|
}
|
|
28
43
|
|
|
29
|
-
export
|
|
44
|
+
export type Statement = SelectStatement | SetOperationStatement | WithStatement
|
|
45
|
+
|
|
46
|
+
export interface CTEDefinition extends AstBase {
|
|
30
47
|
name: string
|
|
31
|
-
query:
|
|
48
|
+
query: Statement
|
|
32
49
|
}
|
|
33
50
|
|
|
34
51
|
export interface FromTable extends AstBase {
|
|
35
|
-
|
|
52
|
+
type: 'table'
|
|
36
53
|
table: string
|
|
37
54
|
alias?: string
|
|
38
55
|
}
|
|
39
56
|
|
|
40
|
-
export interface FromSubquery {
|
|
41
|
-
|
|
42
|
-
query:
|
|
43
|
-
alias
|
|
57
|
+
export interface FromSubquery extends AstBase {
|
|
58
|
+
type: 'subquery'
|
|
59
|
+
query: Statement
|
|
60
|
+
alias?: string
|
|
44
61
|
}
|
|
45
62
|
|
|
46
63
|
export type ArithmeticOp = '+' | '-' | '*' | '/' | '%'
|
|
@@ -91,7 +108,7 @@ export interface CastNode extends AstBase {
|
|
|
91
108
|
export interface InSubqueryNode extends AstBase {
|
|
92
109
|
type: 'in'
|
|
93
110
|
expr: ExprNode
|
|
94
|
-
subquery:
|
|
111
|
+
subquery: Statement
|
|
95
112
|
}
|
|
96
113
|
|
|
97
114
|
export interface InValuesNode extends AstBase {
|
|
@@ -102,7 +119,7 @@ export interface InValuesNode extends AstBase {
|
|
|
102
119
|
|
|
103
120
|
export interface ExistsNode extends AstBase {
|
|
104
121
|
type: 'exists' | 'not exists'
|
|
105
|
-
subquery:
|
|
122
|
+
subquery: Statement
|
|
106
123
|
}
|
|
107
124
|
|
|
108
125
|
export interface WhenClause extends AstBase {
|
|
@@ -119,7 +136,7 @@ export interface CaseNode extends AstBase {
|
|
|
119
136
|
|
|
120
137
|
export interface SubqueryNode extends AstBase {
|
|
121
138
|
type: 'subquery'
|
|
122
|
-
subquery:
|
|
139
|
+
subquery: Statement
|
|
123
140
|
}
|
|
124
141
|
|
|
125
142
|
export type IntervalUnit = 'DAY' | 'MONTH' | 'YEAR' | 'HOUR' | 'MINUTE' | 'SECOND'
|
|
@@ -150,12 +167,12 @@ export type ExprNode =
|
|
|
150
167
|
| StarNode
|
|
151
168
|
|
|
152
169
|
export interface StarColumn {
|
|
153
|
-
|
|
170
|
+
type: 'star'
|
|
154
171
|
table?: string
|
|
155
172
|
}
|
|
156
173
|
|
|
157
174
|
export interface DerivedColumn {
|
|
158
|
-
|
|
175
|
+
type: 'derived'
|
|
159
176
|
expr: ExprNode
|
|
160
177
|
alias?: string
|
|
161
178
|
}
|
|
@@ -32,19 +32,20 @@ export function memorySource({ data, columns }) {
|
|
|
32
32
|
if (!data.length) {
|
|
33
33
|
throw new Error('Unknown columns: data is empty and no columns provided')
|
|
34
34
|
}
|
|
35
|
-
|
|
35
|
+
const firstColumns = Object.keys(data[0])
|
|
36
36
|
// Check first 1000 rows for consistent columns
|
|
37
37
|
for (let i = 1; i < data.length && i < 1000; i++) {
|
|
38
38
|
const rowColumns = Object.keys(data[i])
|
|
39
|
-
const missing =
|
|
39
|
+
const missing = firstColumns.find(col => !rowColumns.includes(col))
|
|
40
40
|
if (missing) {
|
|
41
41
|
throw new Error(`Inconsistent data, column "${missing}" not found in row ${i}`)
|
|
42
42
|
}
|
|
43
|
-
const extra = rowColumns.find(col => !
|
|
43
|
+
const extra = rowColumns.find(col => !firstColumns.includes(col))
|
|
44
44
|
if (extra) {
|
|
45
45
|
throw new Error(`Inconsistent data, unexpected column "${extra}" found in row ${i}`)
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
|
+
columns = firstColumns
|
|
48
49
|
}
|
|
49
50
|
return {
|
|
50
51
|
numRows: data.length,
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { derivedAlias } from '../expression/alias.js'
|
|
2
2
|
import { evaluateExpr } from '../expression/evaluate.js'
|
|
3
3
|
import { executePlan } from './execute.js'
|
|
4
|
-
import {
|
|
4
|
+
import { keyify } from './utils.js'
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
* @import { AsyncCells, AsyncRow, ExecuteContext, SelectColumn } from '../types.js'
|
|
7
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, SelectColumn, SqlPrimitive } from '../types.js'
|
|
8
8
|
* @import { HashAggregateNode, ScalarAggregateNode } from '../plan/types.js'
|
|
9
9
|
*/
|
|
10
10
|
|
|
@@ -23,7 +23,7 @@ function projectAggregateColumns(selectColumns, group, context) {
|
|
|
23
23
|
const cells = {}
|
|
24
24
|
|
|
25
25
|
for (const col of selectColumns) {
|
|
26
|
-
if (col.
|
|
26
|
+
if (col.type === 'star') {
|
|
27
27
|
const firstRow = group[0]
|
|
28
28
|
if (firstRow) {
|
|
29
29
|
for (const key of firstRow.columns) {
|
|
@@ -31,7 +31,7 @@ function projectAggregateColumns(selectColumns, group, context) {
|
|
|
31
31
|
cells[key] = firstRow.cells[key]
|
|
32
32
|
}
|
|
33
33
|
}
|
|
34
|
-
} else
|
|
34
|
+
} else {
|
|
35
35
|
const alias = col.alias ?? derivedAlias(col.expr)
|
|
36
36
|
columns.push(alias)
|
|
37
37
|
cells[alias] = () => evaluateExpr({
|
|
@@ -63,30 +63,21 @@ export async function* executeHashAggregate(plan, context) {
|
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
// Group rows by GROUP BY keys
|
|
66
|
-
/** @type {Map<
|
|
67
|
-
const
|
|
68
|
-
/** @type {AsyncRow[][]} */
|
|
69
|
-
const groups = []
|
|
66
|
+
/** @type {Map<any, AsyncRow[]>} */
|
|
67
|
+
const groups = new Map()
|
|
70
68
|
|
|
71
69
|
for (const row of allRows) {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
for (const expr of plan.groupBy) {
|
|
75
|
-
const v = await evaluateExpr({ node: expr, row, context })
|
|
76
|
-
keyParts.push(stringify(v))
|
|
77
|
-
}
|
|
78
|
-
const key = keyParts.join('|')
|
|
79
|
-
let group = groupMap.get(key)
|
|
70
|
+
const key = keyify(...await Promise.all(plan.groupBy.map(expr => evaluateExpr({ node: expr, row, context }))))
|
|
71
|
+
let group = groups.get(key)
|
|
80
72
|
if (!group) {
|
|
81
73
|
group = []
|
|
82
|
-
|
|
83
|
-
groups.push(group)
|
|
74
|
+
groups.set(key, group)
|
|
84
75
|
}
|
|
85
76
|
group.push(row)
|
|
86
77
|
}
|
|
87
78
|
|
|
88
79
|
// Yield one row per group
|
|
89
|
-
for (const group of groups) {
|
|
80
|
+
for (const group of groups.values()) {
|
|
90
81
|
const asyncRow = projectAggregateColumns(plan.columns, group, context)
|
|
91
82
|
|
|
92
83
|
// Apply HAVING filter
|
|
@@ -117,6 +108,13 @@ export async function* executeHashAggregate(plan, context) {
|
|
|
117
108
|
* @yields {AsyncRow}
|
|
118
109
|
*/
|
|
119
110
|
export async function* executeScalarAggregate(plan, context) {
|
|
111
|
+
// Fast path: use scanColumn when available
|
|
112
|
+
const fast = tryColumnScanAggregate(plan, context)
|
|
113
|
+
if (fast) {
|
|
114
|
+
yield* fast
|
|
115
|
+
return
|
|
116
|
+
}
|
|
117
|
+
|
|
120
118
|
// Collect all rows into single group
|
|
121
119
|
/** @type {AsyncRow[]} */
|
|
122
120
|
const group = []
|
|
@@ -145,3 +143,146 @@ export async function* executeScalarAggregate(plan, context) {
|
|
|
145
143
|
|
|
146
144
|
yield asyncRow
|
|
147
145
|
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* @typedef {{
|
|
149
|
+
* funcName: string,
|
|
150
|
+
* column: string,
|
|
151
|
+
* alias: string,
|
|
152
|
+
* distinct?: boolean,
|
|
153
|
+
* }} ColumnAggSpec
|
|
154
|
+
*/
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Checks if a scalar aggregate can use the scanColumn fast path.
|
|
158
|
+
* Returns an async generator if so, undefined otherwise.
|
|
159
|
+
*
|
|
160
|
+
* @param {ScalarAggregateNode} plan
|
|
161
|
+
* @param {ExecuteContext} context
|
|
162
|
+
* @returns {AsyncGenerator<AsyncRow> | undefined}
|
|
163
|
+
*/
|
|
164
|
+
function tryColumnScanAggregate(plan, context) {
|
|
165
|
+
// No HAVING support in fast path
|
|
166
|
+
if (plan.having) return
|
|
167
|
+
// Child must be a direct table scan
|
|
168
|
+
if (plan.child.type !== 'Scan') return
|
|
169
|
+
const scanNode = plan.child
|
|
170
|
+
// No WHERE in scan (scanColumn doesn't support filtering)
|
|
171
|
+
if (scanNode.hints.where) return
|
|
172
|
+
|
|
173
|
+
const table = context.tables[scanNode.table]
|
|
174
|
+
if (!table?.scanColumn) return
|
|
175
|
+
|
|
176
|
+
// All columns must be simple aggregates on plain identifiers
|
|
177
|
+
/** @type {ColumnAggSpec[]} */
|
|
178
|
+
const specs = []
|
|
179
|
+
for (const col of plan.columns) {
|
|
180
|
+
if (col.type !== 'derived') return
|
|
181
|
+
const spec = extractColumnAggSpec(col)
|
|
182
|
+
if (!spec) return
|
|
183
|
+
specs.push(spec)
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return (async function* () {
|
|
187
|
+
/** @type {string[]} */
|
|
188
|
+
const columns = []
|
|
189
|
+
/** @type {AsyncCells} */
|
|
190
|
+
const cells = {}
|
|
191
|
+
|
|
192
|
+
for (const spec of specs) {
|
|
193
|
+
const value = await scanColumnAggregate(table, spec, context.signal)
|
|
194
|
+
columns.push(spec.alias)
|
|
195
|
+
cells[spec.alias] = () => Promise.resolve(value)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
yield { columns, cells }
|
|
199
|
+
})()
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Extracts aggregate spec from a simple aggregate expression node.
|
|
204
|
+
* Returns undefined if the expression is not a supported simple aggregate.
|
|
205
|
+
*
|
|
206
|
+
* @param {DerivedColumn} col
|
|
207
|
+
* @returns {ColumnAggSpec | undefined}
|
|
208
|
+
*/
|
|
209
|
+
function extractColumnAggSpec({ expr, alias }) {
|
|
210
|
+
if (expr.type !== 'function') return
|
|
211
|
+
if (expr.filter) return // FILTER not supported in fast path
|
|
212
|
+
const funcName = expr.funcName.toUpperCase()
|
|
213
|
+
if (!['COUNT', 'SUM', 'AVG', 'MIN', 'MAX'].includes(funcName)) return
|
|
214
|
+
|
|
215
|
+
// Argument must be a plain column identifier
|
|
216
|
+
const arg = expr.args[0]
|
|
217
|
+
if (arg.type !== 'identifier') return
|
|
218
|
+
return {
|
|
219
|
+
funcName,
|
|
220
|
+
column: derivedAlias(arg),
|
|
221
|
+
alias: alias ?? derivedAlias(expr),
|
|
222
|
+
distinct: expr.distinct,
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Scans a single column and computes an aggregate value.
|
|
228
|
+
*
|
|
229
|
+
* @param {AsyncDataSource} table
|
|
230
|
+
* @param {ColumnAggSpec} spec
|
|
231
|
+
* @param {AbortSignal} [signal]
|
|
232
|
+
* @returns {Promise<SqlPrimitive>}
|
|
233
|
+
*/
|
|
234
|
+
async function scanColumnAggregate(table, spec, signal) {
|
|
235
|
+
const values = table.scanColumn({ column: spec.column, signal })
|
|
236
|
+
|
|
237
|
+
if (spec.funcName === 'COUNT' && spec.distinct) {
|
|
238
|
+
const seen = new Set()
|
|
239
|
+
for await (const chunk of values) {
|
|
240
|
+
if (signal?.aborted) return null
|
|
241
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
242
|
+
const v = chunk[i]
|
|
243
|
+
if (v == null) continue
|
|
244
|
+
seen.add(keyify(v))
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return seen.size
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (spec.funcName === 'COUNT') {
|
|
251
|
+
let count = 0
|
|
252
|
+
for await (const chunk of values) {
|
|
253
|
+
if (signal?.aborted) return null
|
|
254
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
255
|
+
if (chunk[i] != null) count++
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return count
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// SUM, AVG, MIN, MAX
|
|
262
|
+
let sum = 0
|
|
263
|
+
let count = 0
|
|
264
|
+
/** @type {SqlPrimitive} */
|
|
265
|
+
let min = null
|
|
266
|
+
/** @type {SqlPrimitive} */
|
|
267
|
+
let max = null
|
|
268
|
+
|
|
269
|
+
for await (const chunk of values) {
|
|
270
|
+
if (signal?.aborted) return null
|
|
271
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
272
|
+
const v = chunk[i]
|
|
273
|
+
if (v == null) continue
|
|
274
|
+
if (min === null || v < min) min = v
|
|
275
|
+
if (max === null || v > max) max = v
|
|
276
|
+
const num = Number(v)
|
|
277
|
+
if (!Number.isFinite(num)) continue
|
|
278
|
+
sum += num
|
|
279
|
+
count++
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (spec.funcName === 'SUM') return count === 0 ? null : sum
|
|
284
|
+
if (spec.funcName === 'AVG') return count === 0 ? null : sum / count
|
|
285
|
+
if (spec.funcName === 'MIN') return min
|
|
286
|
+
if (spec.funcName === 'MAX') return max
|
|
287
|
+
return null
|
|
288
|
+
}
|
package/src/execute/execute.js
CHANGED
|
@@ -3,15 +3,15 @@ import { derivedAlias } from '../expression/alias.js'
|
|
|
3
3
|
import { evaluateExpr } from '../expression/evaluate.js'
|
|
4
4
|
import { parseSql } from '../parse/parse.js'
|
|
5
5
|
import { planSql } from '../plan/plan.js'
|
|
6
|
-
import {
|
|
6
|
+
import { TableNotFoundError } from '../validation/planErrors.js'
|
|
7
7
|
import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
|
|
8
8
|
import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
|
|
9
9
|
import { executeSort } from './sort.js'
|
|
10
10
|
import { stableRowKey } from './utils.js'
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode,
|
|
14
|
-
* @import { CountNode, DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode } from '../plan/types.js'
|
|
13
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, Statement } from '../types.js'
|
|
14
|
+
* @import { CountNode, DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode, SetOperationNode } from '../plan/types.js'
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -21,7 +21,7 @@ import { stableRowKey } from './utils.js'
|
|
|
21
21
|
* @yields {AsyncRow}
|
|
22
22
|
*/
|
|
23
23
|
export async function* executeSql({ tables, query, functions, signal }) {
|
|
24
|
-
const
|
|
24
|
+
const parsed = typeof query === 'string' ? parseSql({ query, functions }) : query
|
|
25
25
|
|
|
26
26
|
// Normalize tables: convert arrays to AsyncDataSource
|
|
27
27
|
/** @type {Record<string, AsyncDataSource>} */
|
|
@@ -34,19 +34,19 @@ export async function* executeSql({ tables, query, functions, signal }) {
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
yield*
|
|
37
|
+
yield* executeStatement({ query: parsed, context: { tables: normalizedTables, functions, signal } })
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
/**
|
|
41
|
-
* Executes a
|
|
41
|
+
* Executes a statement against the provided tables
|
|
42
42
|
*
|
|
43
43
|
* @param {Object} options
|
|
44
|
-
* @param {
|
|
44
|
+
* @param {Statement} options.query
|
|
45
45
|
* @param {ExecuteContext} options.context
|
|
46
46
|
* @yields {AsyncRow}
|
|
47
47
|
*/
|
|
48
|
-
export async function*
|
|
49
|
-
const plan = planSql({ query
|
|
48
|
+
export async function* executeStatement({ query, context }) {
|
|
49
|
+
const plan = planSql({ query, functions: context.functions, tables: context.tables })
|
|
50
50
|
yield* executePlan({ plan, context })
|
|
51
51
|
}
|
|
52
52
|
|
|
@@ -83,12 +83,12 @@ export async function* executePlan({ plan, context }) {
|
|
|
83
83
|
yield* executeDistinct(plan, context)
|
|
84
84
|
} else if (plan.type === 'Limit') {
|
|
85
85
|
yield* executeLimit(plan, context)
|
|
86
|
+
} else if (plan.type === 'SetOperation') {
|
|
87
|
+
yield* executeSetOperation(plan, context)
|
|
86
88
|
}
|
|
87
89
|
}
|
|
88
90
|
|
|
89
91
|
/**
|
|
90
|
-
* Executes a table scan
|
|
91
|
-
*
|
|
92
92
|
* @param {ScanNode} plan
|
|
93
93
|
* @param {ExecuteContext} context
|
|
94
94
|
* @yields {AsyncRow}
|
|
@@ -98,7 +98,7 @@ async function* executeScan(plan, context) {
|
|
|
98
98
|
// check table
|
|
99
99
|
const table = tables[plan.table]
|
|
100
100
|
if (!table) {
|
|
101
|
-
throw
|
|
101
|
+
throw new TableNotFoundError({ table: plan.table, tables })
|
|
102
102
|
}
|
|
103
103
|
// check columns
|
|
104
104
|
const missingColumn = plan.hints.columns?.find(col => !table.columns.includes(col))
|
|
@@ -140,12 +140,12 @@ async function* executeScan(plan, context) {
|
|
|
140
140
|
async function* executeCount(plan, { tables, signal }) {
|
|
141
141
|
const table = tables[plan.table]
|
|
142
142
|
if (!table) {
|
|
143
|
-
throw
|
|
143
|
+
throw new TableNotFoundError({ table: plan.table, tables })
|
|
144
144
|
}
|
|
145
145
|
|
|
146
146
|
// Use source numRows if available
|
|
147
147
|
let count = table.numRows
|
|
148
|
-
if (
|
|
148
|
+
if (count === undefined) {
|
|
149
149
|
// Fall back to counting rows via scan
|
|
150
150
|
count = 0
|
|
151
151
|
const { rows } = table.scan({ signal })
|
|
@@ -272,12 +272,12 @@ async function* executeProject(plan, context) {
|
|
|
272
272
|
const cells = {}
|
|
273
273
|
|
|
274
274
|
for (const col of plan.columns) {
|
|
275
|
-
if (col.
|
|
275
|
+
if (col.type === 'star') {
|
|
276
276
|
for (const key of row.columns) {
|
|
277
277
|
columns.push(key)
|
|
278
278
|
cells[key] = row.cells[key]
|
|
279
279
|
}
|
|
280
|
-
} else
|
|
280
|
+
} else {
|
|
281
281
|
const alias = col.alias ?? derivedAlias(col.expr)
|
|
282
282
|
columns.push(alias)
|
|
283
283
|
cells[alias] = () => evaluateExpr({
|
|
@@ -304,7 +304,6 @@ async function* executeDistinct(plan, context) {
|
|
|
304
304
|
const { signal } = context
|
|
305
305
|
const MAX_CHUNK = 256
|
|
306
306
|
|
|
307
|
-
/** @type {Set<string>} */
|
|
308
307
|
const seen = new Set()
|
|
309
308
|
|
|
310
309
|
/** @type {AsyncRow[]} */
|
|
@@ -315,10 +314,11 @@ async function* executeDistinct(plan, context) {
|
|
|
315
314
|
buffer.push(row)
|
|
316
315
|
|
|
317
316
|
if (buffer.length >= MAX_CHUNK) {
|
|
318
|
-
const keys =
|
|
317
|
+
const keys = buffer.map(stableRowKey)
|
|
319
318
|
for (let i = 0; i < buffer.length; i++) {
|
|
320
|
-
|
|
321
|
-
|
|
319
|
+
const key = await keys[i]
|
|
320
|
+
if (!seen.has(key)) {
|
|
321
|
+
seen.add(key)
|
|
322
322
|
yield buffer[i]
|
|
323
323
|
}
|
|
324
324
|
}
|
|
@@ -328,10 +328,11 @@ async function* executeDistinct(plan, context) {
|
|
|
328
328
|
|
|
329
329
|
// Flush remaining
|
|
330
330
|
if (buffer.length > 0) {
|
|
331
|
-
const keys =
|
|
331
|
+
const keys = buffer.map(stableRowKey)
|
|
332
332
|
for (let i = 0; i < buffer.length; i++) {
|
|
333
|
-
|
|
334
|
-
|
|
333
|
+
const key = await keys[i]
|
|
334
|
+
if (!seen.has(key)) {
|
|
335
|
+
seen.add(key)
|
|
335
336
|
yield buffer[i]
|
|
336
337
|
}
|
|
337
338
|
}
|
|
@@ -348,3 +349,108 @@ async function* executeDistinct(plan, context) {
|
|
|
348
349
|
async function* executeLimit(plan, context) {
|
|
349
350
|
yield* limitRows(executePlan({ plan: plan.child, context }), plan.limit, plan.offset, context.signal)
|
|
350
351
|
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Executes a set operation (UNION, INTERSECT, EXCEPT)
|
|
355
|
+
*
|
|
356
|
+
* @param {SetOperationNode} plan
|
|
357
|
+
* @param {ExecuteContext} context
|
|
358
|
+
* @yields {AsyncRow}
|
|
359
|
+
*/
|
|
360
|
+
async function* executeSetOperation(plan, context) {
|
|
361
|
+
const { signal } = context
|
|
362
|
+
|
|
363
|
+
if (plan.operator === 'UNION') {
|
|
364
|
+
if (plan.all) {
|
|
365
|
+
// UNION ALL: yield all rows from both sides
|
|
366
|
+
yield* executePlan({ plan: plan.left, context })
|
|
367
|
+
yield* executePlan({ plan: plan.right, context })
|
|
368
|
+
} else {
|
|
369
|
+
// UNION: yield deduplicated rows from both sides
|
|
370
|
+
const seen = new Set()
|
|
371
|
+
for await (const row of executePlan({ plan: plan.left, context })) {
|
|
372
|
+
if (signal?.aborted) return
|
|
373
|
+
const key = await stableRowKey(row)
|
|
374
|
+
if (!seen.has(key)) {
|
|
375
|
+
seen.add(key)
|
|
376
|
+
yield row
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
for await (const row of executePlan({ plan: plan.right, context })) {
|
|
380
|
+
if (signal?.aborted) return
|
|
381
|
+
const key = await stableRowKey(row)
|
|
382
|
+
if (!seen.has(key)) {
|
|
383
|
+
seen.add(key)
|
|
384
|
+
yield row
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
} else if (plan.operator === 'INTERSECT') {
|
|
389
|
+
// Materialize right side keys
|
|
390
|
+
/** @type {Map<any, number>} */
|
|
391
|
+
const rightKeys = new Map()
|
|
392
|
+
for await (const row of executePlan({ plan: plan.right, context })) {
|
|
393
|
+
if (signal?.aborted) return
|
|
394
|
+
const key = await stableRowKey(row)
|
|
395
|
+
rightKeys.set(key, (rightKeys.get(key) ?? 0) + 1)
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
if (plan.all) {
|
|
399
|
+
// INTERSECT ALL: yield each left row that matches, consuming right counts
|
|
400
|
+
for await (const row of executePlan({ plan: plan.left, context })) {
|
|
401
|
+
if (signal?.aborted) return
|
|
402
|
+
const key = await stableRowKey(row)
|
|
403
|
+
const count = rightKeys.get(key)
|
|
404
|
+
if (count) {
|
|
405
|
+
rightKeys.set(key, count - 1)
|
|
406
|
+
yield row
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
} else {
|
|
410
|
+
// INTERSECT: yield deduplicated rows present in both
|
|
411
|
+
const seen = new Set()
|
|
412
|
+
for await (const row of executePlan({ plan: plan.left, context })) {
|
|
413
|
+
if (signal?.aborted) return
|
|
414
|
+
const key = await stableRowKey(row)
|
|
415
|
+
if (rightKeys.has(key) && !seen.has(key)) {
|
|
416
|
+
seen.add(key)
|
|
417
|
+
yield row
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
} else if (plan.operator === 'EXCEPT') {
|
|
422
|
+
// Materialize right side keys
|
|
423
|
+
/** @type {Map<any, number>} */
|
|
424
|
+
const rightKeys = new Map()
|
|
425
|
+
for await (const row of executePlan({ plan: plan.right, context })) {
|
|
426
|
+
if (signal?.aborted) return
|
|
427
|
+
const key = await stableRowKey(row)
|
|
428
|
+
rightKeys.set(key, (rightKeys.get(key) ?? 0) + 1)
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if (plan.all) {
|
|
432
|
+
// EXCEPT ALL: yield left rows, consuming right counts
|
|
433
|
+
for await (const row of executePlan({ plan: plan.left, context })) {
|
|
434
|
+
if (signal?.aborted) return
|
|
435
|
+
const key = await stableRowKey(row)
|
|
436
|
+
const count = rightKeys.get(key)
|
|
437
|
+
if (count) {
|
|
438
|
+
rightKeys.set(key, count - 1)
|
|
439
|
+
} else {
|
|
440
|
+
yield row
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
} else {
|
|
444
|
+
// EXCEPT: yield deduplicated left rows not in right
|
|
445
|
+
const seen = new Set()
|
|
446
|
+
for await (const row of executePlan({ plan: plan.left, context })) {
|
|
447
|
+
if (signal?.aborted) return
|
|
448
|
+
const key = await stableRowKey(row)
|
|
449
|
+
if (!rightKeys.has(key) && !seen.has(key)) {
|
|
450
|
+
seen.add(key)
|
|
451
|
+
yield row
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|