squirreling 0.12.10 → 0.12.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/execute/execute.js +4 -14
- package/src/execute/join.js +30 -21
- package/src/expression/evaluate.js +15 -1
- package/src/expression/strings.js +19 -4
- package/src/parse/joins.js +93 -2
- package/src/plan/columns.js +37 -0
- package/src/plan/plan.js +97 -21
- package/src/plan/types.d.ts +17 -2
- package/src/validation/functions.js +2 -1
- package/src/validation/keywords.js +1 -1
- package/src/validation/tables.js +21 -16
package/README.md
CHANGED
|
@@ -141,7 +141,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
141
141
|
- `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
|
|
142
142
|
- `WITH` clause for Common Table Expressions (CTEs)
|
|
143
143
|
- Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
|
|
144
|
-
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
|
|
144
|
+
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`
|
|
145
145
|
- `GROUP BY` and `HAVING` clauses
|
|
146
146
|
- Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
|
|
147
147
|
- Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
|
|
@@ -161,7 +161,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
161
161
|
- Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
|
|
162
162
|
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
|
|
163
163
|
- Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
|
|
164
|
-
- Table functions: `UNNEST`, `JSON_EACH`
|
|
164
|
+
- Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
|
|
165
165
|
- Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
|
|
166
166
|
- Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
|
|
167
167
|
- Conditional: `COALESCE`, `NULLIF`, `GREATEST`, `LEAST`
|
package/package.json
CHANGED
package/src/execute/execute.js
CHANGED
|
@@ -3,7 +3,7 @@ import { derivedAlias } from '../expression/alias.js'
|
|
|
3
3
|
import { evaluateExpr } from '../expression/evaluate.js'
|
|
4
4
|
import { parseSql } from '../parse/parse.js'
|
|
5
5
|
import { planSql, planStatement } from '../plan/plan.js'
|
|
6
|
-
import {
|
|
6
|
+
import { statementScope } from '../plan/columns.js'
|
|
7
7
|
import { validateScan, validateTable } from '../validation/tables.js'
|
|
8
8
|
import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
|
|
9
9
|
import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
|
|
@@ -72,18 +72,6 @@ export function executeStatement({ query, context, outerScope }) {
|
|
|
72
72
|
return executePlan({ plan, context: scope ? { ...context, scope } : context })
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
/**
|
|
76
|
-
* Extracts the table aliases from a statement's FROM and JOIN clauses.
|
|
77
|
-
*
|
|
78
|
-
* @param {Statement} stmt
|
|
79
|
-
* @returns {string[] | undefined}
|
|
80
|
-
*/
|
|
81
|
-
function statementScope(stmt) {
|
|
82
|
-
if (stmt.type === 'with') return statementScope(stmt.query)
|
|
83
|
-
if (stmt.type === 'compound') return undefined
|
|
84
|
-
return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
|
|
85
|
-
}
|
|
86
|
-
|
|
87
75
|
/**
|
|
88
76
|
* Executes a query plan and returns query results with row count estimates
|
|
89
77
|
*
|
|
@@ -119,6 +107,8 @@ export function executePlan({ plan, context }) {
|
|
|
119
107
|
return executeLimit(plan, context)
|
|
120
108
|
} else if (plan.type === 'SetOperation') {
|
|
121
109
|
return executeSetOperation(plan, context)
|
|
110
|
+
} else if (plan.type === 'Subquery') {
|
|
111
|
+
return executePlan({ plan: plan.child, context: { ...context, scope: plan.scope } })
|
|
122
112
|
} else if (plan.type === 'TableFunction') {
|
|
123
113
|
return executeTableFunction(plan, context)
|
|
124
114
|
} else if (plan.type === 'Window') {
|
|
@@ -137,7 +127,7 @@ export function executePlan({ plan, context }) {
|
|
|
137
127
|
* @returns {QueryResults}
|
|
138
128
|
*/
|
|
139
129
|
function executeTableFunction(plan, context) {
|
|
140
|
-
if (plan.funcName === 'UNNEST') {
|
|
130
|
+
if (plan.funcName === 'UNNEST' || plan.funcName === 'EXPLODE') {
|
|
141
131
|
return executeUnnest(plan, context)
|
|
142
132
|
} else if (plan.funcName === 'JSON_EACH') {
|
|
143
133
|
return executeJsonEach(plan, context)
|
package/src/execute/join.js
CHANGED
|
@@ -200,6 +200,7 @@ export function executeHashJoin(plan, context) {
|
|
|
200
200
|
async *rows() {
|
|
201
201
|
const leftTable = plan.leftAlias
|
|
202
202
|
const rightTable = plan.rightAlias
|
|
203
|
+
const { leftKeys, rightKeys, residual } = plan
|
|
203
204
|
|
|
204
205
|
// Buffer right rows and build hash map
|
|
205
206
|
/** @type {AsyncRow[]} */
|
|
@@ -209,16 +210,16 @@ export function executeHashJoin(plan, context) {
|
|
|
209
210
|
rightRows.push(row)
|
|
210
211
|
}
|
|
211
212
|
|
|
212
|
-
/** @type {Map<
|
|
213
|
+
/** @type {Map<string | number | bigint | boolean, AsyncRow[]>} */
|
|
213
214
|
const hashMap = new Map()
|
|
214
215
|
for (const rightRow of rightRows) {
|
|
215
|
-
const
|
|
216
|
-
node:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (
|
|
221
|
-
const key = keyify(
|
|
216
|
+
const keyValues = await Promise.all(
|
|
217
|
+
rightKeys.map(node => evaluateExpr({ node, row: rightRow, context }))
|
|
218
|
+
)
|
|
219
|
+
// SQL semantics: NULL never equals anything, so a row with any NULL
|
|
220
|
+
// join key is excluded from the hash table.
|
|
221
|
+
if (keyValues.some(v => v == null)) continue
|
|
222
|
+
const key = keyify(...keyValues)
|
|
222
223
|
let bucket = hashMap.get(key)
|
|
223
224
|
if (!bucket) {
|
|
224
225
|
bucket = []
|
|
@@ -243,20 +244,28 @@ export function executeHashJoin(plan, context) {
|
|
|
243
244
|
leftCols = leftRow.columns
|
|
244
245
|
}
|
|
245
246
|
|
|
246
|
-
const
|
|
247
|
-
node:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
247
|
+
const keyValues = await Promise.all(
|
|
248
|
+
leftKeys.map(node => evaluateExpr({ node, row: leftRow, context }))
|
|
249
|
+
)
|
|
250
|
+
let matched = false
|
|
251
|
+
if (!keyValues.some(v => v == null)) {
|
|
252
|
+
const key = keyify(...keyValues)
|
|
253
|
+
const candidates = hashMap.get(key)
|
|
254
|
+
if (candidates?.length) {
|
|
255
|
+
for (const rightRow of candidates) {
|
|
256
|
+
const merged = mergeRows(leftRow, rightRow, leftTable, rightTable)
|
|
257
|
+
if (residual) {
|
|
258
|
+
const ok = await evaluateExpr({ node: residual, row: merged, context })
|
|
259
|
+
if (!ok) continue
|
|
260
|
+
}
|
|
261
|
+
matched = true
|
|
262
|
+
matchedRightRows?.add(rightRow)
|
|
263
|
+
yield merged
|
|
264
|
+
}
|
|
258
265
|
}
|
|
259
|
-
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (!matched && (plan.joinType === 'LEFT' || plan.joinType === 'FULL')) {
|
|
260
269
|
const nullRight = createNullRow(rightCols)
|
|
261
270
|
yield mergeRows(leftRow, nullRight, leftTable, rightTable)
|
|
262
271
|
}
|
|
@@ -47,6 +47,17 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
47
47
|
return value[node.name]
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
|
+
// Struct dot access where the prefix is itself a column name (bare or
|
|
51
|
+
// table-qualified), e.g. `item.name` reading field `name` from a struct
|
|
52
|
+
// column `item` (often introduced via UNNEST AS tc(item)).
|
|
53
|
+
const suffix = '.' + node.prefix
|
|
54
|
+
const baseColumns = row.columns.filter(col => col === node.prefix || col.endsWith(suffix))
|
|
55
|
+
if (baseColumns.length === 1) {
|
|
56
|
+
const value = await row.cells[baseColumns[0]]()
|
|
57
|
+
if (isPlainObject(value) && Object.prototype.hasOwnProperty.call(value, node.name)) {
|
|
58
|
+
return value[node.name]
|
|
59
|
+
}
|
|
60
|
+
}
|
|
50
61
|
// Check outer row for correlated subquery references
|
|
51
62
|
if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
|
|
52
63
|
return context.outerRow.cells[node.name]()
|
|
@@ -603,7 +614,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
603
614
|
return Math.trunc(num)
|
|
604
615
|
}
|
|
605
616
|
if (toType === 'BIGINT') {
|
|
606
|
-
|
|
617
|
+
if (typeof val === 'bigint') return val
|
|
618
|
+
const num = Number(val)
|
|
619
|
+
if (isNaN(num)) return null
|
|
620
|
+
return BigInt(Math.trunc(num))
|
|
607
621
|
}
|
|
608
622
|
if (toType === 'FLOAT' || toType === 'REAL' || toType === 'DOUBLE') {
|
|
609
623
|
const num = Number(val)
|
|
@@ -32,6 +32,25 @@ export function evaluateStringFunc({ funcName, node, args, rowIndex }) {
|
|
|
32
32
|
// String first arg
|
|
33
33
|
const [val] = args
|
|
34
34
|
if (val == null) return null
|
|
35
|
+
|
|
36
|
+
if (funcName === 'LENGTH') {
|
|
37
|
+
if (typeof val === 'string' || Array.isArray(val)) return val.length
|
|
38
|
+
throw new ArgValueError({
|
|
39
|
+
...node,
|
|
40
|
+
message: `expected string or array, got ${typeof val === 'object' ? val instanceof Date ? 'date' : 'object' : typeof val}`,
|
|
41
|
+
hint: 'Use CAST to convert to a string first.',
|
|
42
|
+
rowIndex,
|
|
43
|
+
})
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (typeof val === 'object' && !(val instanceof Date)) {
|
|
47
|
+
throw new ArgValueError({
|
|
48
|
+
...node,
|
|
49
|
+
message: `does not support ${Array.isArray(val) ? 'array' : 'object'} arguments`,
|
|
50
|
+
hint: 'Use CAST to convert to a string first.',
|
|
51
|
+
rowIndex,
|
|
52
|
+
})
|
|
53
|
+
}
|
|
35
54
|
const str = String(val)
|
|
36
55
|
|
|
37
56
|
if (funcName === 'UPPER') {
|
|
@@ -42,10 +61,6 @@ export function evaluateStringFunc({ funcName, node, args, rowIndex }) {
|
|
|
42
61
|
return str.toLowerCase()
|
|
43
62
|
}
|
|
44
63
|
|
|
45
|
-
if (funcName === 'LENGTH') {
|
|
46
|
-
return str.length
|
|
47
|
-
}
|
|
48
|
-
|
|
49
64
|
if (funcName === 'SUBSTRING' || funcName === 'SUBSTR') {
|
|
50
65
|
const start = Number(args[1])
|
|
51
66
|
if (!Number.isInteger(start) || start < 1) {
|
package/src/parse/joins.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { expectNoAggregate } from '../validation/aggregates.js'
|
|
2
|
+
import { isTableFunction, validateFunctionArgs } from '../validation/functions.js'
|
|
2
3
|
import { ParseError } from '../validation/parseErrors.js'
|
|
3
4
|
import { parseExpression } from './expression.js'
|
|
4
|
-
import { isTableFunctionStart, parseFromFunction, parseTableAlias } from './parse.js'
|
|
5
|
-
import { current, expect, match } from './state.js'
|
|
5
|
+
import { isTableFunctionStart, parseFromFunction, parseTableAlias, tableFunctionColumnCount, tableFunctionDefaultColumns } from './parse.js'
|
|
6
|
+
import { consume, current, expect, match } from './state.js'
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* @import { ExprNode, JoinClause, JoinType, ParserState } from '../types.js'
|
|
@@ -19,6 +20,96 @@ export function parseJoins(state) {
|
|
|
19
20
|
while (true) {
|
|
20
21
|
const tok = current(state)
|
|
21
22
|
|
|
23
|
+
// LATERAL VIEW [OUTER] func(args) tableAlias AS colAlias[, ...] (Spark/Hive style)
|
|
24
|
+
if (current(state).type === 'keyword' && current(state).value === 'LATERAL') {
|
|
25
|
+
const lateralStart = tok.positionStart
|
|
26
|
+
consume(state)
|
|
27
|
+
expect(state, 'keyword', 'VIEW')
|
|
28
|
+
const isOuter = match(state, 'keyword', 'OUTER')
|
|
29
|
+
const funcTok = current(state)
|
|
30
|
+
if (funcTok.type !== 'identifier' || !isTableFunction(funcTok.value.toUpperCase())) {
|
|
31
|
+
throw new ParseError({
|
|
32
|
+
message: 'LATERAL VIEW requires a table function like EXPLODE',
|
|
33
|
+
positionStart: funcTok.positionStart,
|
|
34
|
+
positionEnd: funcTok.positionEnd,
|
|
35
|
+
})
|
|
36
|
+
}
|
|
37
|
+
consume(state)
|
|
38
|
+
const funcName = funcTok.value.toUpperCase()
|
|
39
|
+
expect(state, 'paren', '(')
|
|
40
|
+
/** @type {ExprNode[]} */
|
|
41
|
+
const args = []
|
|
42
|
+
if (!match(state, 'paren', ')')) {
|
|
43
|
+
while (true) {
|
|
44
|
+
args.push(parseExpression(state))
|
|
45
|
+
if (!match(state, 'comma')) break
|
|
46
|
+
}
|
|
47
|
+
expect(state, 'paren', ')')
|
|
48
|
+
}
|
|
49
|
+
validateFunctionArgs(funcName, args.length, funcTok.positionStart, state.lastPos, state.functions)
|
|
50
|
+
|
|
51
|
+
const aliasTok = current(state)
|
|
52
|
+
if (aliasTok.type !== 'identifier') {
|
|
53
|
+
throw new ParseError({
|
|
54
|
+
message: 'LATERAL VIEW requires a table alias before AS',
|
|
55
|
+
positionStart: aliasTok.positionStart,
|
|
56
|
+
positionEnd: aliasTok.positionEnd,
|
|
57
|
+
})
|
|
58
|
+
}
|
|
59
|
+
consume(state)
|
|
60
|
+
const tableAlias = aliasTok.value
|
|
61
|
+
|
|
62
|
+
expect(state, 'keyword', 'AS')
|
|
63
|
+
/** @type {string[]} */
|
|
64
|
+
const columnAliases = []
|
|
65
|
+
const colStart = state.lastPos
|
|
66
|
+
while (true) {
|
|
67
|
+
const colTok = expect(state, 'identifier')
|
|
68
|
+
columnAliases.push(colTok.value)
|
|
69
|
+
if (!match(state, 'comma')) break
|
|
70
|
+
}
|
|
71
|
+
const maxCols = tableFunctionColumnCount(funcName)
|
|
72
|
+
if (columnAliases.length > maxCols) {
|
|
73
|
+
const colLabels = tableFunctionDefaultColumns(funcName).join(', ')
|
|
74
|
+
throw new ParseError({
|
|
75
|
+
message: maxCols === 1
|
|
76
|
+
? `${funcName} produces a single column; only one column alias is allowed`
|
|
77
|
+
: `${funcName} produces at most ${maxCols} columns (${colLabels}); too many column aliases`,
|
|
78
|
+
positionStart: colStart,
|
|
79
|
+
positionEnd: state.lastPos,
|
|
80
|
+
})
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** @type {import('../ast.js').FromFunction} */
|
|
84
|
+
const fromFunction = {
|
|
85
|
+
type: 'function',
|
|
86
|
+
funcName,
|
|
87
|
+
args,
|
|
88
|
+
alias: tableAlias,
|
|
89
|
+
columnAliases,
|
|
90
|
+
positionStart: funcTok.positionStart,
|
|
91
|
+
positionEnd: state.lastPos,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** @type {JoinType} */
|
|
95
|
+
const joinType = isOuter ? 'LEFT' : 'CROSS'
|
|
96
|
+
/** @type {ExprNode | undefined} */
|
|
97
|
+
const condition = isOuter
|
|
98
|
+
? { type: 'literal', value: true, positionStart: lateralStart, positionEnd: state.lastPos }
|
|
99
|
+
: undefined
|
|
100
|
+
|
|
101
|
+
joins.push({
|
|
102
|
+
joinType,
|
|
103
|
+
table: funcName,
|
|
104
|
+
alias: tableAlias,
|
|
105
|
+
on: condition,
|
|
106
|
+
fromFunction,
|
|
107
|
+
positionStart: lateralStart,
|
|
108
|
+
positionEnd: state.lastPos,
|
|
109
|
+
})
|
|
110
|
+
continue
|
|
111
|
+
}
|
|
112
|
+
|
|
22
113
|
// Comma-join: implicit CROSS JOIN LATERAL, currently only for table functions.
|
|
23
114
|
if (match(state, 'comma')) {
|
|
24
115
|
if (!isTableFunctionStart(state)) {
|
package/src/plan/columns.js
CHANGED
|
@@ -18,6 +18,22 @@ export function fromAlias(from) {
|
|
|
18
18
|
return 'table'
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
/**
|
|
22
|
+
* Returns the FROM/JOIN aliases visible inside a statement's body — its
|
|
23
|
+
* lexical scope. Used to set context.scope when entering a derived-table
|
|
24
|
+
* subplan, so correlated subqueries inside resolve outer references against
|
|
25
|
+
* the right aliases. Returns undefined for compound statements (UNION etc.)
|
|
26
|
+
* which have no single scope.
|
|
27
|
+
*
|
|
28
|
+
* @param {Statement} stmt
|
|
29
|
+
* @returns {string[] | undefined}
|
|
30
|
+
*/
|
|
31
|
+
export function statementScope(stmt) {
|
|
32
|
+
if (stmt.type === 'with') return statementScope(stmt.query)
|
|
33
|
+
if (stmt.type === 'compound') return undefined
|
|
34
|
+
return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
|
|
35
|
+
}
|
|
36
|
+
|
|
21
37
|
/**
|
|
22
38
|
* Returns the output column names for a FROM table function, applying any
|
|
23
39
|
* column aliases over the function's default column names.
|
|
@@ -400,3 +416,24 @@ export function inferSelectSourceColumns({ select, cteColumns, tables }) {
|
|
|
400
416
|
function lookupTableColumns(table, cteColumns, tables) {
|
|
401
417
|
return cteColumns?.get(table.toLowerCase()) ?? tables?.[table]?.columns ?? []
|
|
402
418
|
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Collects bare column names exposed by a SELECT's FROM and joins. Used by
|
|
422
|
+
* validation to recognize struct-field dot access (e.g. `item.name` on a
|
|
423
|
+
* struct-valued column `item`) instead of rejecting the prefix as an unknown
|
|
424
|
+
* table.
|
|
425
|
+
*
|
|
426
|
+
* @param {object} options
|
|
427
|
+
* @param {SelectStatement} options.select
|
|
428
|
+
* @param {Map<string, string[]>} [options.cteColumns]
|
|
429
|
+
* @param {Record<string, AsyncDataSource>} [options.tables]
|
|
430
|
+
* @returns {Set<string>}
|
|
431
|
+
*/
|
|
432
|
+
export function collectScopeColumns({ select, cteColumns, tables }) {
|
|
433
|
+
const result = new Set()
|
|
434
|
+
for (const col of inferSelectSourceColumns({ select, cteColumns, tables })) {
|
|
435
|
+
const dot = col.indexOf('.')
|
|
436
|
+
result.add(dot >= 0 ? col.slice(dot + 1) : col)
|
|
437
|
+
}
|
|
438
|
+
return result
|
|
439
|
+
}
|
package/src/plan/plan.js
CHANGED
|
@@ -4,7 +4,7 @@ import { findAggregate } from '../validation/aggregates.js'
|
|
|
4
4
|
import { ParseError } from '../validation/parseErrors.js'
|
|
5
5
|
import { ColumnNotFoundError, TableNotFoundError } from '../validation/tables.js'
|
|
6
6
|
import { validateNoIdentifiers, validateScan, validateTableRefs } from '../validation/tables.js'
|
|
7
|
-
import { extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, tableFunctionColumnNames } from './columns.js'
|
|
7
|
+
import { collectScopeColumns, extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, statementScope, tableFunctionColumnNames } from './columns.js'
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, OrderByItem, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement, WindowFunctionNode } from '../types.js'
|
|
@@ -159,11 +159,15 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
|
|
|
159
159
|
// Resolve aliases (and validate qualified references)
|
|
160
160
|
// Include outerScope aliases so correlated references pass validation
|
|
161
161
|
const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table), ...outerScope ?? []].map(a => [a, true]))
|
|
162
|
+
// Bare column names in scope, so the validator can recognize struct-field
|
|
163
|
+
// dot access on a column (e.g. `item.name` where `item` is an unnested
|
|
164
|
+
// struct column) rather than rejecting `item` as an unknown table.
|
|
165
|
+
const scopeColumns = collectScopeColumns({ select, cteColumns, tables })
|
|
162
166
|
/** @type {Map<string, ExprNode>} */
|
|
163
167
|
const aliases = new Map()
|
|
164
168
|
const columns = select.columns.map(col => {
|
|
165
169
|
if (col.type === 'derived') {
|
|
166
|
-
validateTableRefs(col.expr, scopeTables)
|
|
170
|
+
validateTableRefs(col.expr, scopeTables, scopeColumns)
|
|
167
171
|
const expr = resolveAliases(col.expr, aliases)
|
|
168
172
|
if (col.alias) {
|
|
169
173
|
aliases.set(col.alias, expr)
|
|
@@ -180,16 +184,16 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
|
|
|
180
184
|
const orderBy = resolveOrderByAliases(select.orderBy, aliases)
|
|
181
185
|
|
|
182
186
|
// Validate qualified references in other clauses
|
|
183
|
-
validateTableRefs(select.where, scopeTables)
|
|
184
|
-
validateTableRefs(select.having, scopeTables)
|
|
187
|
+
validateTableRefs(select.where, scopeTables, scopeColumns)
|
|
188
|
+
validateTableRefs(select.having, scopeTables, scopeColumns)
|
|
185
189
|
for (const expr of select.groupBy) {
|
|
186
|
-
validateTableRefs(expr, scopeTables)
|
|
190
|
+
validateTableRefs(expr, scopeTables, scopeColumns)
|
|
187
191
|
}
|
|
188
192
|
for (const term of select.orderBy) {
|
|
189
|
-
validateTableRefs(term.expr, scopeTables)
|
|
193
|
+
validateTableRefs(term.expr, scopeTables, scopeColumns)
|
|
190
194
|
}
|
|
191
195
|
for (const join of select.joins) {
|
|
192
|
-
validateTableRefs(join.on, scopeTables)
|
|
196
|
+
validateTableRefs(join.on, scopeTables, scopeColumns)
|
|
193
197
|
}
|
|
194
198
|
|
|
195
199
|
// Determine scan hints for direct table scans (WHERE and LIMIT/OFFSET are
|
|
@@ -359,6 +363,14 @@ function planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope }) {
|
|
|
359
363
|
throw new ColumnNotFoundError({ missingColumn, availableColumns, ...select.from })
|
|
360
364
|
}
|
|
361
365
|
}
|
|
366
|
+
// Wrap with the inner SELECT's scope so correlated subqueries inside the
|
|
367
|
+
// derived table resolve outer references against the inner aliases, not
|
|
368
|
+
// the enclosing query's. Compound subqueries (UNION etc.) have no single
|
|
369
|
+
// scope and pass through unwrapped.
|
|
370
|
+
const innerScope = statementScope(select.from.query)
|
|
371
|
+
if (innerScope) {
|
|
372
|
+
return { type: 'Subquery', scope: innerScope, child: subPlan }
|
|
373
|
+
}
|
|
362
374
|
return subPlan
|
|
363
375
|
}
|
|
364
376
|
}
|
|
@@ -443,18 +455,21 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
|
|
|
443
455
|
if (join.joinType === 'POSITIONAL') {
|
|
444
456
|
plan = { type: 'PositionalJoin', leftAlias: currentLeftTable, rightAlias: rightTable, left: plan, right: rightScan }
|
|
445
457
|
} else {
|
|
446
|
-
const keys = join.on &&
|
|
458
|
+
const keys = join.on && extractEquiKeys({ condition: join.on, leftTable: currentLeftTable, rightTable })
|
|
447
459
|
if (keys) {
|
|
448
|
-
|
|
460
|
+
/** @type {import('./types.d.ts').HashJoinNode} */
|
|
461
|
+
const hashJoin = {
|
|
449
462
|
type: 'HashJoin',
|
|
450
463
|
joinType: join.joinType,
|
|
451
464
|
leftAlias: currentLeftTable,
|
|
452
465
|
rightAlias: rightTable,
|
|
453
|
-
|
|
454
|
-
|
|
466
|
+
leftKeys: keys.leftKeys,
|
|
467
|
+
rightKeys: keys.rightKeys,
|
|
455
468
|
left: plan,
|
|
456
469
|
right: rightScan,
|
|
457
470
|
}
|
|
471
|
+
if (keys.residual) hashJoin.residual = keys.residual
|
|
472
|
+
plan = hashJoin
|
|
458
473
|
} else {
|
|
459
474
|
plan = {
|
|
460
475
|
type: 'NestedLoopJoin',
|
|
@@ -601,28 +616,89 @@ function normalizeIdentifiers(node, sourceColumns) {
|
|
|
601
616
|
}
|
|
602
617
|
|
|
603
618
|
/**
|
|
604
|
-
*
|
|
605
|
-
*
|
|
619
|
+
* Splits a join ON expression into equi-key pairs and a residual predicate so
|
|
620
|
+
* the planner can route AND-of-equis (with optional range/inequality
|
|
621
|
+
* conjuncts) to the hash-join path. Conjuncts of the form
|
|
622
|
+
* `<left-ref> = <right-ref>` between two identifiers become hash keys; every
|
|
623
|
+
* other conjunct stays as part of the residual that will run after the hash
|
|
624
|
+
* lookup. Returns undefined when no equi conjunct is present so the caller
|
|
625
|
+
* falls back to the nested-loop path.
|
|
606
626
|
*
|
|
607
627
|
* @param {object} options
|
|
608
628
|
* @param {ExprNode} options.condition
|
|
609
629
|
* @param {string} options.leftTable
|
|
610
630
|
* @param {string} options.rightTable
|
|
611
|
-
* @returns {{
|
|
631
|
+
* @returns {{ leftKeys: ExprNode[], rightKeys: ExprNode[], residual?: ExprNode } | undefined}
|
|
632
|
+
*/
|
|
633
|
+
function extractEquiKeys({ condition, leftTable, rightTable }) {
|
|
634
|
+
/** @type {ExprNode[]} */
|
|
635
|
+
const conjuncts = []
|
|
636
|
+
collectConjuncts(condition, conjuncts)
|
|
637
|
+
/** @type {ExprNode[]} */
|
|
638
|
+
const leftKeys = []
|
|
639
|
+
/** @type {ExprNode[]} */
|
|
640
|
+
const rightKeys = []
|
|
641
|
+
/** @type {ExprNode[]} */
|
|
642
|
+
const residuals = []
|
|
643
|
+
for (const conjunct of conjuncts) {
|
|
644
|
+
const eq = classifyEquiConjunct(conjunct, leftTable, rightTable)
|
|
645
|
+
if (eq) {
|
|
646
|
+
leftKeys.push(eq.leftKey)
|
|
647
|
+
rightKeys.push(eq.rightKey)
|
|
648
|
+
} else {
|
|
649
|
+
residuals.push(conjunct)
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
if (!leftKeys.length) return undefined
|
|
653
|
+
/** @type {ExprNode | undefined} */
|
|
654
|
+
let residual
|
|
655
|
+
for (const r of residuals) {
|
|
656
|
+
residual = residual === undefined
|
|
657
|
+
? r
|
|
658
|
+
: { type: 'binary', op: 'AND', left: residual, right: r, positionStart: residual.positionStart, positionEnd: r.positionEnd }
|
|
659
|
+
}
|
|
660
|
+
return residual ? { leftKeys, rightKeys, residual } : { leftKeys, rightKeys }
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Walks an ON expression, flattening top-level AND conjuncts. Non-AND nodes
|
|
665
|
+
* are pushed verbatim. Used to expose individual predicates so equi-keys can
|
|
666
|
+
* be lifted out of an AND chain.
|
|
667
|
+
*
|
|
668
|
+
* @param {ExprNode} node
|
|
669
|
+
* @param {ExprNode[]} out
|
|
612
670
|
*/
|
|
613
|
-
function
|
|
614
|
-
if (
|
|
615
|
-
|
|
616
|
-
|
|
671
|
+
function collectConjuncts(node, out) {
|
|
672
|
+
if (node.type === 'binary' && node.op === 'AND') {
|
|
673
|
+
collectConjuncts(node.left, out)
|
|
674
|
+
collectConjuncts(node.right, out)
|
|
675
|
+
return
|
|
676
|
+
}
|
|
677
|
+
out.push(node)
|
|
678
|
+
}
|
|
617
679
|
|
|
618
|
-
|
|
680
|
+
/**
|
|
681
|
+
* Returns the (leftKey, rightKey) pair for an equi conjunct, oriented so
|
|
682
|
+
* leftKey references the left input and rightKey references the right input.
|
|
683
|
+
* Returns undefined when the conjunct is not a `<identifier> = <identifier>`
|
|
684
|
+
* predicate. When the prefixes don't unambiguously identify a side, falls
|
|
685
|
+
* through to the original orientation — matches the prior single-equi
|
|
686
|
+
* behavior so unprefixed columns still produce a hash join.
|
|
687
|
+
*
|
|
688
|
+
* @param {ExprNode} conjunct
|
|
689
|
+
* @param {string} leftTable
|
|
690
|
+
* @param {string} rightTable
|
|
691
|
+
* @returns {{ leftKey: ExprNode, rightKey: ExprNode } | undefined}
|
|
692
|
+
*/
|
|
693
|
+
function classifyEquiConjunct(conjunct, leftTable, rightTable) {
|
|
694
|
+
if (conjunct.type !== 'binary' || conjunct.op !== '=') return undefined
|
|
695
|
+
const { left, right } = conjunct
|
|
696
|
+
if (left.type !== 'identifier' || right.type !== 'identifier') return undefined
|
|
619
697
|
const leftRefsRight = left.prefix === rightTable
|
|
620
698
|
const rightRefsLeft = right.prefix === leftTable
|
|
621
|
-
|
|
622
699
|
if (leftRefsRight && rightRefsLeft) {
|
|
623
700
|
return { leftKey: right, rightKey: left }
|
|
624
701
|
}
|
|
625
|
-
|
|
626
702
|
return { leftKey: left, rightKey: right }
|
|
627
703
|
}
|
|
628
704
|
|
package/src/plan/types.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export type QueryPlan =
|
|
|
14
14
|
| NestedLoopJoinNode
|
|
15
15
|
| PositionalJoinNode
|
|
16
16
|
| SetOperationNode
|
|
17
|
+
| SubqueryNode
|
|
17
18
|
| TableFunctionNode
|
|
18
19
|
| WindowNode
|
|
19
20
|
|
|
@@ -85,8 +86,11 @@ export interface HashJoinNode {
|
|
|
85
86
|
joinType: JoinType
|
|
86
87
|
leftAlias: string
|
|
87
88
|
rightAlias: string
|
|
88
|
-
|
|
89
|
-
|
|
89
|
+
leftKeys: ExprNode[]
|
|
90
|
+
rightKeys: ExprNode[]
|
|
91
|
+
// Non-equi conjuncts from the ON clause (e.g. range predicates) applied to
|
|
92
|
+
// each merged candidate after the hash lookup succeeds.
|
|
93
|
+
residual?: ExprNode
|
|
90
94
|
left: QueryPlan
|
|
91
95
|
right: QueryPlan
|
|
92
96
|
}
|
|
@@ -119,6 +123,17 @@ export interface SetOperationNode {
|
|
|
119
123
|
right: QueryPlan
|
|
120
124
|
}
|
|
121
125
|
|
|
126
|
+
// Wraps a derived-table or CTE subplan with the lexical alias scope of its
|
|
127
|
+
// inner SELECT, so the executor can set context.scope while traversing the
|
|
128
|
+
// subtree. Correlated subqueries inside the subtree resolve outer references
|
|
129
|
+
// against this scope, not whichever ancestor most recently went through
|
|
130
|
+
// executeStatement.
|
|
131
|
+
export interface SubqueryNode {
|
|
132
|
+
type: 'Subquery'
|
|
133
|
+
scope: string[]
|
|
134
|
+
child: QueryPlan
|
|
135
|
+
}
|
|
136
|
+
|
|
122
137
|
// Table-valued function (e.g. UNNEST) used in FROM clause
|
|
123
138
|
export interface TableFunctionNode {
|
|
124
139
|
type: 'TableFunction'
|
|
@@ -49,7 +49,7 @@ export function isRegexpFunc(name) {
|
|
|
49
49
|
* @returns {boolean}
|
|
50
50
|
*/
|
|
51
51
|
export function isTableFunction(name) {
|
|
52
|
-
return ['UNNEST', 'JSON_EACH'].includes(name)
|
|
52
|
+
return ['UNNEST', 'EXPLODE', 'JSON_EACH'].includes(name)
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
/**
|
|
@@ -187,6 +187,7 @@ export const FUNCTION_SIGNATURES = {
|
|
|
187
187
|
|
|
188
188
|
// Table functions (used in FROM clause)
|
|
189
189
|
UNNEST: { min: 1, max: 1, signature: 'array' },
|
|
190
|
+
EXPLODE: { min: 1, max: 1, signature: 'array' },
|
|
190
191
|
JSON_EACH: { min: 1, max: 1, signature: 'value' },
|
|
191
192
|
|
|
192
193
|
// Conditional functions
|
|
@@ -3,7 +3,7 @@ export const KEYWORDS = new Set([
|
|
|
3
3
|
'HAVING', 'ORDER', 'ASC', 'DESC', 'NULLS', 'LIMIT', 'OFFSET', 'AS', 'ALL',
|
|
4
4
|
'DISTINCT', 'TRUE', 'FALSE', 'NULL', 'LIKE', 'IN', 'EXISTS', 'BETWEEN',
|
|
5
5
|
'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'INNER', 'LEFT', 'RIGHT',
|
|
6
|
-
'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
|
|
6
|
+
'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
|
|
7
7
|
'HOUR', 'MINUTE', 'SECOND', 'FILTER', 'WITHIN',
|
|
8
8
|
'UNION', 'INTERSECT', 'EXCEPT',
|
|
9
9
|
])
|
package/src/validation/tables.js
CHANGED
|
@@ -95,13 +95,18 @@ export function validateNoIdentifiers(expr, context) {
|
|
|
95
95
|
|
|
96
96
|
/**
|
|
97
97
|
* Validates that qualified identifiers reference known table aliases.
|
|
98
|
+
* A `prefix` may also be a bare column name in scope, in which case the
|
|
99
|
+
* identifier is struct-field access (e.g. `item.name` reads field `name`
|
|
100
|
+
* from a struct-valued column `item`).
|
|
98
101
|
*
|
|
99
102
|
* @param {ExprNode} expr
|
|
100
103
|
* @param {Record<string, any>} tables
|
|
104
|
+
* @param {Set<string>} [scopeColumns] - bare column names in scope, used to
|
|
105
|
+
* recognize struct-field dot access on a column rather than a table
|
|
101
106
|
*/
|
|
102
|
-
export function validateTableRefs(expr, tables) {
|
|
107
|
+
export function validateTableRefs(expr, tables, scopeColumns) {
|
|
103
108
|
if (!expr) return
|
|
104
|
-
if (expr.type === 'identifier' && expr.prefix && !(expr.prefix in tables)) {
|
|
109
|
+
if (expr.type === 'identifier' && expr.prefix && !(expr.prefix in tables) && !scopeColumns?.has(expr.prefix)) {
|
|
105
110
|
throw new TableNotFoundError({
|
|
106
111
|
table: expr.prefix,
|
|
107
112
|
qualified: expr.prefix + '.' + expr.name,
|
|
@@ -111,32 +116,32 @@ export function validateTableRefs(expr, tables) {
|
|
|
111
116
|
})
|
|
112
117
|
}
|
|
113
118
|
if (expr.type === 'binary') {
|
|
114
|
-
validateTableRefs(expr.left, tables)
|
|
115
|
-
validateTableRefs(expr.right, tables)
|
|
119
|
+
validateTableRefs(expr.left, tables, scopeColumns)
|
|
120
|
+
validateTableRefs(expr.right, tables, scopeColumns)
|
|
116
121
|
} else if (expr.type === 'unary') {
|
|
117
|
-
validateTableRefs(expr.argument, tables)
|
|
122
|
+
validateTableRefs(expr.argument, tables, scopeColumns)
|
|
118
123
|
} else if (expr.type === 'function') {
|
|
119
124
|
for (const arg of expr.args) {
|
|
120
|
-
validateTableRefs(arg, tables)
|
|
125
|
+
validateTableRefs(arg, tables, scopeColumns)
|
|
121
126
|
}
|
|
122
127
|
} else if (expr.type === 'window') {
|
|
123
|
-
for (const arg of expr.args) validateTableRefs(arg, tables)
|
|
124
|
-
for (const p of expr.partitionBy) validateTableRefs(p, tables)
|
|
125
|
-
for (const o of expr.orderBy) validateTableRefs(o.expr, tables)
|
|
128
|
+
for (const arg of expr.args) validateTableRefs(arg, tables, scopeColumns)
|
|
129
|
+
for (const p of expr.partitionBy) validateTableRefs(p, tables, scopeColumns)
|
|
130
|
+
for (const o of expr.orderBy) validateTableRefs(o.expr, tables, scopeColumns)
|
|
126
131
|
} else if (expr.type === 'cast') {
|
|
127
|
-
validateTableRefs(expr.expr, tables)
|
|
132
|
+
validateTableRefs(expr.expr, tables, scopeColumns)
|
|
128
133
|
} else if (expr.type === 'in valuelist') {
|
|
129
|
-
validateTableRefs(expr.expr, tables)
|
|
134
|
+
validateTableRefs(expr.expr, tables, scopeColumns)
|
|
130
135
|
for (const val of expr.values) {
|
|
131
|
-
validateTableRefs(val, tables)
|
|
136
|
+
validateTableRefs(val, tables, scopeColumns)
|
|
132
137
|
}
|
|
133
138
|
} else if (expr.type === 'case') {
|
|
134
|
-
validateTableRefs(expr.caseExpr, tables)
|
|
139
|
+
validateTableRefs(expr.caseExpr, tables, scopeColumns)
|
|
135
140
|
for (const w of expr.whenClauses) {
|
|
136
|
-
validateTableRefs(w.condition, tables)
|
|
137
|
-
validateTableRefs(w.result, tables)
|
|
141
|
+
validateTableRefs(w.condition, tables, scopeColumns)
|
|
142
|
+
validateTableRefs(w.result, tables, scopeColumns)
|
|
138
143
|
}
|
|
139
|
-
validateTableRefs(expr.elseResult, tables)
|
|
144
|
+
validateTableRefs(expr.elseResult, tables, scopeColumns)
|
|
140
145
|
}
|
|
141
146
|
}
|
|
142
147
|
|