squirreling 0.12.10 → 0.12.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -141,7 +141,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
141
141
  - `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
142
142
  - `WITH` clause for Common Table Expressions (CTEs)
143
143
  - Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
144
- - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
144
+ - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`
145
145
  - `GROUP BY` and `HAVING` clauses
146
146
  - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
147
147
  - Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
@@ -161,7 +161,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
161
161
  - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
162
162
  - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
163
163
  - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
164
- - Table functions: `UNNEST`, `JSON_EACH`
164
+ - Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
165
165
  - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
166
166
  - Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
167
167
  - Conditional: `COALESCE`, `NULLIF`, `GREATEST`, `LEAST`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.10",
3
+ "version": "0.12.12",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -3,7 +3,7 @@ import { derivedAlias } from '../expression/alias.js'
3
3
  import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
5
  import { planSql, planStatement } from '../plan/plan.js'
6
- import { fromAlias } from '../plan/columns.js'
6
+ import { statementScope } from '../plan/columns.js'
7
7
  import { validateScan, validateTable } from '../validation/tables.js'
8
8
  import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
9
9
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
@@ -72,18 +72,6 @@ export function executeStatement({ query, context, outerScope }) {
72
72
  return executePlan({ plan, context: scope ? { ...context, scope } : context })
73
73
  }
74
74
 
75
- /**
76
- * Extracts the table aliases from a statement's FROM and JOIN clauses.
77
- *
78
- * @param {Statement} stmt
79
- * @returns {string[] | undefined}
80
- */
81
- function statementScope(stmt) {
82
- if (stmt.type === 'with') return statementScope(stmt.query)
83
- if (stmt.type === 'compound') return undefined
84
- return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
85
- }
86
-
87
75
  /**
88
76
  * Executes a query plan and returns query results with row count estimates
89
77
  *
@@ -119,6 +107,8 @@ export function executePlan({ plan, context }) {
119
107
  return executeLimit(plan, context)
120
108
  } else if (plan.type === 'SetOperation') {
121
109
  return executeSetOperation(plan, context)
110
+ } else if (plan.type === 'Subquery') {
111
+ return executePlan({ plan: plan.child, context: { ...context, scope: plan.scope } })
122
112
  } else if (plan.type === 'TableFunction') {
123
113
  return executeTableFunction(plan, context)
124
114
  } else if (plan.type === 'Window') {
@@ -137,7 +127,7 @@ export function executePlan({ plan, context }) {
137
127
  * @returns {QueryResults}
138
128
  */
139
129
  function executeTableFunction(plan, context) {
140
- if (plan.funcName === 'UNNEST') {
130
+ if (plan.funcName === 'UNNEST' || plan.funcName === 'EXPLODE') {
141
131
  return executeUnnest(plan, context)
142
132
  } else if (plan.funcName === 'JSON_EACH') {
143
133
  return executeJsonEach(plan, context)
@@ -200,6 +200,7 @@ export function executeHashJoin(plan, context) {
200
200
  async *rows() {
201
201
  const leftTable = plan.leftAlias
202
202
  const rightTable = plan.rightAlias
203
+ const { leftKeys, rightKeys, residual } = plan
203
204
 
204
205
  // Buffer right rows and build hash map
205
206
  /** @type {AsyncRow[]} */
@@ -209,16 +210,16 @@ export function executeHashJoin(plan, context) {
209
210
  rightRows.push(row)
210
211
  }
211
212
 
212
- /** @type {Map<any, AsyncRow[]>} */
213
+ /** @type {Map<string | number | bigint | boolean, AsyncRow[]>} */
213
214
  const hashMap = new Map()
214
215
  for (const rightRow of rightRows) {
215
- const keyValue = await evaluateExpr({
216
- node: plan.rightKey,
217
- row: rightRow,
218
- context,
219
- })
220
- if (keyValue == null) continue
221
- const key = keyify(keyValue)
216
+ const keyValues = await Promise.all(
217
+ rightKeys.map(node => evaluateExpr({ node, row: rightRow, context }))
218
+ )
219
+ // SQL semantics: NULL never equals anything, so a row with any NULL
220
+ // join key is excluded from the hash table.
221
+ if (keyValues.some(v => v == null)) continue
222
+ const key = keyify(...keyValues)
222
223
  let bucket = hashMap.get(key)
223
224
  if (!bucket) {
224
225
  bucket = []
@@ -243,20 +244,28 @@ export function executeHashJoin(plan, context) {
243
244
  leftCols = leftRow.columns
244
245
  }
245
246
 
246
- const keyValue = await evaluateExpr({
247
- node: plan.leftKey,
248
- row: leftRow,
249
- context,
250
- })
251
- const key = keyify(keyValue)
252
- const matchingRightRows = hashMap.get(key)
253
-
254
- if (matchingRightRows?.length) {
255
- for (const rightRow of matchingRightRows) {
256
- matchedRightRows?.add(rightRow)
257
- yield mergeRows(leftRow, rightRow, leftTable, rightTable)
247
+ const keyValues = await Promise.all(
248
+ leftKeys.map(node => evaluateExpr({ node, row: leftRow, context }))
249
+ )
250
+ let matched = false
251
+ if (!keyValues.some(v => v == null)) {
252
+ const key = keyify(...keyValues)
253
+ const candidates = hashMap.get(key)
254
+ if (candidates?.length) {
255
+ for (const rightRow of candidates) {
256
+ const merged = mergeRows(leftRow, rightRow, leftTable, rightTable)
257
+ if (residual) {
258
+ const ok = await evaluateExpr({ node: residual, row: merged, context })
259
+ if (!ok) continue
260
+ }
261
+ matched = true
262
+ matchedRightRows?.add(rightRow)
263
+ yield merged
264
+ }
258
265
  }
259
- } else if (plan.joinType === 'LEFT' || plan.joinType === 'FULL') {
266
+ }
267
+
268
+ if (!matched && (plan.joinType === 'LEFT' || plan.joinType === 'FULL')) {
260
269
  const nullRight = createNullRow(rightCols)
261
270
  yield mergeRows(leftRow, nullRight, leftTable, rightTable)
262
271
  }
@@ -47,6 +47,17 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
47
47
  return value[node.name]
48
48
  }
49
49
  }
50
+ // Struct dot access where the prefix is itself a column name (bare or
51
+ // table-qualified), e.g. `item.name` reading field `name` from a struct
52
+ // column `item` (often introduced via UNNEST AS tc(item)).
53
+ const suffix = '.' + node.prefix
54
+ const baseColumns = row.columns.filter(col => col === node.prefix || col.endsWith(suffix))
55
+ if (baseColumns.length === 1) {
56
+ const value = await row.cells[baseColumns[0]]()
57
+ if (isPlainObject(value) && Object.prototype.hasOwnProperty.call(value, node.name)) {
58
+ return value[node.name]
59
+ }
60
+ }
50
61
  // Check outer row for correlated subquery references
51
62
  if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
52
63
  return context.outerRow.cells[node.name]()
@@ -603,7 +614,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
603
614
  return Math.trunc(num)
604
615
  }
605
616
  if (toType === 'BIGINT') {
606
- return BigInt(val)
617
+ if (typeof val === 'bigint') return val
618
+ const num = Number(val)
619
+ if (isNaN(num)) return null
620
+ return BigInt(Math.trunc(num))
607
621
  }
608
622
  if (toType === 'FLOAT' || toType === 'REAL' || toType === 'DOUBLE') {
609
623
  const num = Number(val)
@@ -32,6 +32,25 @@ export function evaluateStringFunc({ funcName, node, args, rowIndex }) {
32
32
  // String first arg
33
33
  const [val] = args
34
34
  if (val == null) return null
35
+
36
+ if (funcName === 'LENGTH') {
37
+ if (typeof val === 'string' || Array.isArray(val)) return val.length
38
+ throw new ArgValueError({
39
+ ...node,
40
+ message: `expected string or array, got ${typeof val === 'object' ? val instanceof Date ? 'date' : 'object' : typeof val}`,
41
+ hint: 'Use CAST to convert to a string first.',
42
+ rowIndex,
43
+ })
44
+ }
45
+
46
+ if (typeof val === 'object' && !(val instanceof Date)) {
47
+ throw new ArgValueError({
48
+ ...node,
49
+ message: `does not support ${Array.isArray(val) ? 'array' : 'object'} arguments`,
50
+ hint: 'Use CAST to convert to a string first.',
51
+ rowIndex,
52
+ })
53
+ }
35
54
  const str = String(val)
36
55
 
37
56
  if (funcName === 'UPPER') {
@@ -42,10 +61,6 @@ export function evaluateStringFunc({ funcName, node, args, rowIndex }) {
42
61
  return str.toLowerCase()
43
62
  }
44
63
 
45
- if (funcName === 'LENGTH') {
46
- return str.length
47
- }
48
-
49
64
  if (funcName === 'SUBSTRING' || funcName === 'SUBSTR') {
50
65
  const start = Number(args[1])
51
66
  if (!Number.isInteger(start) || start < 1) {
@@ -1,8 +1,9 @@
1
1
  import { expectNoAggregate } from '../validation/aggregates.js'
2
+ import { isTableFunction, validateFunctionArgs } from '../validation/functions.js'
2
3
  import { ParseError } from '../validation/parseErrors.js'
3
4
  import { parseExpression } from './expression.js'
4
- import { isTableFunctionStart, parseFromFunction, parseTableAlias } from './parse.js'
5
- import { current, expect, match } from './state.js'
5
+ import { isTableFunctionStart, parseFromFunction, parseTableAlias, tableFunctionColumnCount, tableFunctionDefaultColumns } from './parse.js'
6
+ import { consume, current, expect, match } from './state.js'
6
7
 
7
8
  /**
8
9
  * @import { ExprNode, JoinClause, JoinType, ParserState } from '../types.js'
@@ -19,6 +20,96 @@ export function parseJoins(state) {
19
20
  while (true) {
20
21
  const tok = current(state)
21
22
 
23
+ // LATERAL VIEW [OUTER] func(args) tableAlias AS colAlias[, ...] (Spark/Hive style)
24
+ if (current(state).type === 'keyword' && current(state).value === 'LATERAL') {
25
+ const lateralStart = tok.positionStart
26
+ consume(state)
27
+ expect(state, 'keyword', 'VIEW')
28
+ const isOuter = match(state, 'keyword', 'OUTER')
29
+ const funcTok = current(state)
30
+ if (funcTok.type !== 'identifier' || !isTableFunction(funcTok.value.toUpperCase())) {
31
+ throw new ParseError({
32
+ message: 'LATERAL VIEW requires a table function like EXPLODE',
33
+ positionStart: funcTok.positionStart,
34
+ positionEnd: funcTok.positionEnd,
35
+ })
36
+ }
37
+ consume(state)
38
+ const funcName = funcTok.value.toUpperCase()
39
+ expect(state, 'paren', '(')
40
+ /** @type {ExprNode[]} */
41
+ const args = []
42
+ if (!match(state, 'paren', ')')) {
43
+ while (true) {
44
+ args.push(parseExpression(state))
45
+ if (!match(state, 'comma')) break
46
+ }
47
+ expect(state, 'paren', ')')
48
+ }
49
+ validateFunctionArgs(funcName, args.length, funcTok.positionStart, state.lastPos, state.functions)
50
+
51
+ const aliasTok = current(state)
52
+ if (aliasTok.type !== 'identifier') {
53
+ throw new ParseError({
54
+ message: 'LATERAL VIEW requires a table alias before AS',
55
+ positionStart: aliasTok.positionStart,
56
+ positionEnd: aliasTok.positionEnd,
57
+ })
58
+ }
59
+ consume(state)
60
+ const tableAlias = aliasTok.value
61
+
62
+ expect(state, 'keyword', 'AS')
63
+ /** @type {string[]} */
64
+ const columnAliases = []
65
+ const colStart = state.lastPos
66
+ while (true) {
67
+ const colTok = expect(state, 'identifier')
68
+ columnAliases.push(colTok.value)
69
+ if (!match(state, 'comma')) break
70
+ }
71
+ const maxCols = tableFunctionColumnCount(funcName)
72
+ if (columnAliases.length > maxCols) {
73
+ const colLabels = tableFunctionDefaultColumns(funcName).join(', ')
74
+ throw new ParseError({
75
+ message: maxCols === 1
76
+ ? `${funcName} produces a single column; only one column alias is allowed`
77
+ : `${funcName} produces at most ${maxCols} columns (${colLabels}); too many column aliases`,
78
+ positionStart: colStart,
79
+ positionEnd: state.lastPos,
80
+ })
81
+ }
82
+
83
+ /** @type {import('../ast.js').FromFunction} */
84
+ const fromFunction = {
85
+ type: 'function',
86
+ funcName,
87
+ args,
88
+ alias: tableAlias,
89
+ columnAliases,
90
+ positionStart: funcTok.positionStart,
91
+ positionEnd: state.lastPos,
92
+ }
93
+
94
+ /** @type {JoinType} */
95
+ const joinType = isOuter ? 'LEFT' : 'CROSS'
96
+ /** @type {ExprNode | undefined} */
97
+ const condition = isOuter
98
+ ? { type: 'literal', value: true, positionStart: lateralStart, positionEnd: state.lastPos }
99
+ : undefined
100
+
101
+ joins.push({
102
+ joinType,
103
+ table: funcName,
104
+ alias: tableAlias,
105
+ on: condition,
106
+ fromFunction,
107
+ positionStart: lateralStart,
108
+ positionEnd: state.lastPos,
109
+ })
110
+ continue
111
+ }
112
+
22
113
  // Comma-join: implicit CROSS JOIN LATERAL, currently only for table functions.
23
114
  if (match(state, 'comma')) {
24
115
  if (!isTableFunctionStart(state)) {
@@ -18,6 +18,22 @@ export function fromAlias(from) {
18
18
  return 'table'
19
19
  }
20
20
 
21
+ /**
22
+ * Returns the FROM/JOIN aliases visible inside a statement's body — its
23
+ * lexical scope. Used to set context.scope when entering a derived-table
24
+ * subplan, so correlated subqueries inside resolve outer references against
25
+ * the right aliases. Returns undefined for compound statements (UNION etc.)
26
+ * which have no single scope.
27
+ *
28
+ * @param {Statement} stmt
29
+ * @returns {string[] | undefined}
30
+ */
31
+ export function statementScope(stmt) {
32
+ if (stmt.type === 'with') return statementScope(stmt.query)
33
+ if (stmt.type === 'compound') return undefined
34
+ return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
35
+ }
36
+
21
37
  /**
22
38
  * Returns the output column names for a FROM table function, applying any
23
39
  * column aliases over the function's default column names.
@@ -400,3 +416,24 @@ export function inferSelectSourceColumns({ select, cteColumns, tables }) {
400
416
  function lookupTableColumns(table, cteColumns, tables) {
401
417
  return cteColumns?.get(table.toLowerCase()) ?? tables?.[table]?.columns ?? []
402
418
  }
419
+
420
+ /**
421
+ * Collects bare column names exposed by a SELECT's FROM and joins. Used by
422
+ * validation to recognize struct-field dot access (e.g. `item.name` on a
423
+ * struct-valued column `item`) instead of rejecting the prefix as an unknown
424
+ * table.
425
+ *
426
+ * @param {object} options
427
+ * @param {SelectStatement} options.select
428
+ * @param {Map<string, string[]>} [options.cteColumns]
429
+ * @param {Record<string, AsyncDataSource>} [options.tables]
430
+ * @returns {Set<string>}
431
+ */
432
+ export function collectScopeColumns({ select, cteColumns, tables }) {
433
+ const result = new Set()
434
+ for (const col of inferSelectSourceColumns({ select, cteColumns, tables })) {
435
+ const dot = col.indexOf('.')
436
+ result.add(dot >= 0 ? col.slice(dot + 1) : col)
437
+ }
438
+ return result
439
+ }
package/src/plan/plan.js CHANGED
@@ -4,7 +4,7 @@ import { findAggregate } from '../validation/aggregates.js'
4
4
  import { ParseError } from '../validation/parseErrors.js'
5
5
  import { ColumnNotFoundError, TableNotFoundError } from '../validation/tables.js'
6
6
  import { validateNoIdentifiers, validateScan, validateTableRefs } from '../validation/tables.js'
7
- import { extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, tableFunctionColumnNames } from './columns.js'
7
+ import { collectScopeColumns, extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, statementScope, tableFunctionColumnNames } from './columns.js'
8
8
 
9
9
  /**
10
10
  * @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, OrderByItem, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement, WindowFunctionNode } from '../types.js'
@@ -159,11 +159,15 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
159
159
  // Resolve aliases (and validate qualified references)
160
160
  // Include outerScope aliases so correlated references pass validation
161
161
  const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table), ...outerScope ?? []].map(a => [a, true]))
162
+ // Bare column names in scope, so the validator can recognize struct-field
163
+ // dot access on a column (e.g. `item.name` where `item` is an unnested
164
+ // struct column) rather than rejecting `item` as an unknown table.
165
+ const scopeColumns = collectScopeColumns({ select, cteColumns, tables })
162
166
  /** @type {Map<string, ExprNode>} */
163
167
  const aliases = new Map()
164
168
  const columns = select.columns.map(col => {
165
169
  if (col.type === 'derived') {
166
- validateTableRefs(col.expr, scopeTables)
170
+ validateTableRefs(col.expr, scopeTables, scopeColumns)
167
171
  const expr = resolveAliases(col.expr, aliases)
168
172
  if (col.alias) {
169
173
  aliases.set(col.alias, expr)
@@ -180,16 +184,16 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
180
184
  const orderBy = resolveOrderByAliases(select.orderBy, aliases)
181
185
 
182
186
  // Validate qualified references in other clauses
183
- validateTableRefs(select.where, scopeTables)
184
- validateTableRefs(select.having, scopeTables)
187
+ validateTableRefs(select.where, scopeTables, scopeColumns)
188
+ validateTableRefs(select.having, scopeTables, scopeColumns)
185
189
  for (const expr of select.groupBy) {
186
- validateTableRefs(expr, scopeTables)
190
+ validateTableRefs(expr, scopeTables, scopeColumns)
187
191
  }
188
192
  for (const term of select.orderBy) {
189
- validateTableRefs(term.expr, scopeTables)
193
+ validateTableRefs(term.expr, scopeTables, scopeColumns)
190
194
  }
191
195
  for (const join of select.joins) {
192
- validateTableRefs(join.on, scopeTables)
196
+ validateTableRefs(join.on, scopeTables, scopeColumns)
193
197
  }
194
198
 
195
199
  // Determine scan hints for direct table scans (WHERE and LIMIT/OFFSET are
@@ -359,6 +363,14 @@ function planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope }) {
359
363
  throw new ColumnNotFoundError({ missingColumn, availableColumns, ...select.from })
360
364
  }
361
365
  }
366
+ // Wrap with the inner SELECT's scope so correlated subqueries inside the
367
+ // derived table resolve outer references against the inner aliases, not
368
+ // the enclosing query's. Compound subqueries (UNION etc.) have no single
369
+ // scope and pass through unwrapped.
370
+ const innerScope = statementScope(select.from.query)
371
+ if (innerScope) {
372
+ return { type: 'Subquery', scope: innerScope, child: subPlan }
373
+ }
362
374
  return subPlan
363
375
  }
364
376
  }
@@ -443,18 +455,21 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
443
455
  if (join.joinType === 'POSITIONAL') {
444
456
  plan = { type: 'PositionalJoin', leftAlias: currentLeftTable, rightAlias: rightTable, left: plan, right: rightScan }
445
457
  } else {
446
- const keys = join.on && extractSimpleJoinKeys({ condition: join.on, leftTable: currentLeftTable, rightTable })
458
+ const keys = join.on && extractEquiKeys({ condition: join.on, leftTable: currentLeftTable, rightTable })
447
459
  if (keys) {
448
- plan = {
460
+ /** @type {import('./types.d.ts').HashJoinNode} */
461
+ const hashJoin = {
449
462
  type: 'HashJoin',
450
463
  joinType: join.joinType,
451
464
  leftAlias: currentLeftTable,
452
465
  rightAlias: rightTable,
453
- leftKey: keys.leftKey,
454
- rightKey: keys.rightKey,
466
+ leftKeys: keys.leftKeys,
467
+ rightKeys: keys.rightKeys,
455
468
  left: plan,
456
469
  right: rightScan,
457
470
  }
471
+ if (keys.residual) hashJoin.residual = keys.residual
472
+ plan = hashJoin
458
473
  } else {
459
474
  plan = {
460
475
  type: 'NestedLoopJoin',
@@ -601,28 +616,89 @@ function normalizeIdentifiers(node, sourceColumns) {
601
616
  }
602
617
 
603
618
  /**
604
- * Extracts left and right key expressions from a simple equality join condition.
605
- * Returns undefined if the condition is not a simple equality between identifiers.
619
+ * Splits a join ON expression into equi-key pairs and a residual predicate so
620
+ * the planner can route AND-of-equis (with optional range/inequality
621
+ * conjuncts) to the hash-join path. Conjuncts of the form
622
+ * `<left-ref> = <right-ref>` between two identifiers become hash keys; every
623
+ * other conjunct stays as part of the residual that will run after the hash
624
+ * lookup. Returns undefined when no equi conjunct is present so the caller
625
+ * falls back to the nested-loop path.
606
626
  *
607
627
  * @param {object} options
608
628
  * @param {ExprNode} options.condition
609
629
  * @param {string} options.leftTable
610
630
  * @param {string} options.rightTable
611
- * @returns {{ leftKey: ExprNode, rightKey: ExprNode } | undefined}
631
+ * @returns {{ leftKeys: ExprNode[], rightKeys: ExprNode[], residual?: ExprNode } | undefined}
632
+ */
633
+ function extractEquiKeys({ condition, leftTable, rightTable }) {
634
+ /** @type {ExprNode[]} */
635
+ const conjuncts = []
636
+ collectConjuncts(condition, conjuncts)
637
+ /** @type {ExprNode[]} */
638
+ const leftKeys = []
639
+ /** @type {ExprNode[]} */
640
+ const rightKeys = []
641
+ /** @type {ExprNode[]} */
642
+ const residuals = []
643
+ for (const conjunct of conjuncts) {
644
+ const eq = classifyEquiConjunct(conjunct, leftTable, rightTable)
645
+ if (eq) {
646
+ leftKeys.push(eq.leftKey)
647
+ rightKeys.push(eq.rightKey)
648
+ } else {
649
+ residuals.push(conjunct)
650
+ }
651
+ }
652
+ if (!leftKeys.length) return undefined
653
+ /** @type {ExprNode | undefined} */
654
+ let residual
655
+ for (const r of residuals) {
656
+ residual = residual === undefined
657
+ ? r
658
+ : { type: 'binary', op: 'AND', left: residual, right: r, positionStart: residual.positionStart, positionEnd: r.positionEnd }
659
+ }
660
+ return residual ? { leftKeys, rightKeys, residual } : { leftKeys, rightKeys }
661
+ }
662
+
663
+ /**
664
+ * Walks an ON expression, flattening top-level AND conjuncts. Non-AND nodes
665
+ * are pushed verbatim. Used to expose individual predicates so equi-keys can
666
+ * be lifted out of an AND chain.
667
+ *
668
+ * @param {ExprNode} node
669
+ * @param {ExprNode[]} out
612
670
  */
613
- function extractSimpleJoinKeys({ condition, leftTable, rightTable }) {
614
- if (condition.type !== 'binary' || condition.op !== '=') return
615
- const { left, right } = condition
616
- if (left.type !== 'identifier' || right.type !== 'identifier') return
671
+ function collectConjuncts(node, out) {
672
+ if (node.type === 'binary' && node.op === 'AND') {
673
+ collectConjuncts(node.left, out)
674
+ collectConjuncts(node.right, out)
675
+ return
676
+ }
677
+ out.push(node)
678
+ }
617
679
 
618
- // Check if keys are in swapped order (right table ref on left side)
680
+ /**
681
+ * Returns the (leftKey, rightKey) pair for an equi conjunct, oriented so
682
+ * leftKey references the left input and rightKey references the right input.
683
+ * Returns undefined when the conjunct is not a `<identifier> = <identifier>`
684
+ * predicate. When the prefixes don't unambiguously identify a side, falls
685
+ * through to the original orientation — matches the prior single-equi
686
+ * behavior so unprefixed columns still produce a hash join.
687
+ *
688
+ * @param {ExprNode} conjunct
689
+ * @param {string} leftTable
690
+ * @param {string} rightTable
691
+ * @returns {{ leftKey: ExprNode, rightKey: ExprNode } | undefined}
692
+ */
693
+ function classifyEquiConjunct(conjunct, leftTable, rightTable) {
694
+ if (conjunct.type !== 'binary' || conjunct.op !== '=') return undefined
695
+ const { left, right } = conjunct
696
+ if (left.type !== 'identifier' || right.type !== 'identifier') return undefined
619
697
  const leftRefsRight = left.prefix === rightTable
620
698
  const rightRefsLeft = right.prefix === leftTable
621
-
622
699
  if (leftRefsRight && rightRefsLeft) {
623
700
  return { leftKey: right, rightKey: left }
624
701
  }
625
-
626
702
  return { leftKey: left, rightKey: right }
627
703
  }
628
704
 
@@ -14,6 +14,7 @@ export type QueryPlan =
14
14
  | NestedLoopJoinNode
15
15
  | PositionalJoinNode
16
16
  | SetOperationNode
17
+ | SubqueryNode
17
18
  | TableFunctionNode
18
19
  | WindowNode
19
20
 
@@ -85,8 +86,11 @@ export interface HashJoinNode {
85
86
  joinType: JoinType
86
87
  leftAlias: string
87
88
  rightAlias: string
88
- leftKey: ExprNode
89
- rightKey: ExprNode
89
+ leftKeys: ExprNode[]
90
+ rightKeys: ExprNode[]
91
+ // Non-equi conjuncts from the ON clause (e.g. range predicates) applied to
92
+ // each merged candidate after the hash lookup succeeds.
93
+ residual?: ExprNode
90
94
  left: QueryPlan
91
95
  right: QueryPlan
92
96
  }
@@ -119,6 +123,17 @@ export interface SetOperationNode {
119
123
  right: QueryPlan
120
124
  }
121
125
 
126
+ // Wraps a derived-table or CTE subplan with the lexical alias scope of its
127
+ // inner SELECT, so the executor can set context.scope while traversing the
128
+ // subtree. Correlated subqueries inside the subtree resolve outer references
129
+ // against this scope, not whichever ancestor most recently went through
130
+ // executeStatement.
131
+ export interface SubqueryNode {
132
+ type: 'Subquery'
133
+ scope: string[]
134
+ child: QueryPlan
135
+ }
136
+
122
137
  // Table-valued function (e.g. UNNEST) used in FROM clause
123
138
  export interface TableFunctionNode {
124
139
  type: 'TableFunction'
@@ -49,7 +49,7 @@ export function isRegexpFunc(name) {
49
49
  * @returns {boolean}
50
50
  */
51
51
  export function isTableFunction(name) {
52
- return ['UNNEST', 'JSON_EACH'].includes(name)
52
+ return ['UNNEST', 'EXPLODE', 'JSON_EACH'].includes(name)
53
53
  }
54
54
 
55
55
  /**
@@ -187,6 +187,7 @@ export const FUNCTION_SIGNATURES = {
187
187
 
188
188
  // Table functions (used in FROM clause)
189
189
  UNNEST: { min: 1, max: 1, signature: 'array' },
190
+ EXPLODE: { min: 1, max: 1, signature: 'array' },
190
191
  JSON_EACH: { min: 1, max: 1, signature: 'value' },
191
192
 
192
193
  // Conditional functions
@@ -3,7 +3,7 @@ export const KEYWORDS = new Set([
3
3
  'HAVING', 'ORDER', 'ASC', 'DESC', 'NULLS', 'LIMIT', 'OFFSET', 'AS', 'ALL',
4
4
  'DISTINCT', 'TRUE', 'FALSE', 'NULL', 'LIKE', 'IN', 'EXISTS', 'BETWEEN',
5
5
  'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'INNER', 'LEFT', 'RIGHT',
6
- 'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
6
+ 'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
7
7
  'HOUR', 'MINUTE', 'SECOND', 'FILTER', 'WITHIN',
8
8
  'UNION', 'INTERSECT', 'EXCEPT',
9
9
  ])
@@ -95,13 +95,18 @@ export function validateNoIdentifiers(expr, context) {
95
95
 
96
96
  /**
97
97
  * Validates that qualified identifiers reference known table aliases.
98
+ * A `prefix` may also be a bare column name in scope, in which case the
99
+ * identifier is struct-field access (e.g. `item.name` reads field `name`
100
+ * from a struct-valued column `item`).
98
101
  *
99
102
  * @param {ExprNode} expr
100
103
  * @param {Record<string, any>} tables
104
+ * @param {Set<string>} [scopeColumns] - bare column names in scope, used to
105
+ * recognize struct-field dot access on a column rather than a table
101
106
  */
102
- export function validateTableRefs(expr, tables) {
107
+ export function validateTableRefs(expr, tables, scopeColumns) {
103
108
  if (!expr) return
104
- if (expr.type === 'identifier' && expr.prefix && !(expr.prefix in tables)) {
109
+ if (expr.type === 'identifier' && expr.prefix && !(expr.prefix in tables) && !scopeColumns?.has(expr.prefix)) {
105
110
  throw new TableNotFoundError({
106
111
  table: expr.prefix,
107
112
  qualified: expr.prefix + '.' + expr.name,
@@ -111,32 +116,32 @@ export function validateTableRefs(expr, tables) {
111
116
  })
112
117
  }
113
118
  if (expr.type === 'binary') {
114
- validateTableRefs(expr.left, tables)
115
- validateTableRefs(expr.right, tables)
119
+ validateTableRefs(expr.left, tables, scopeColumns)
120
+ validateTableRefs(expr.right, tables, scopeColumns)
116
121
  } else if (expr.type === 'unary') {
117
- validateTableRefs(expr.argument, tables)
122
+ validateTableRefs(expr.argument, tables, scopeColumns)
118
123
  } else if (expr.type === 'function') {
119
124
  for (const arg of expr.args) {
120
- validateTableRefs(arg, tables)
125
+ validateTableRefs(arg, tables, scopeColumns)
121
126
  }
122
127
  } else if (expr.type === 'window') {
123
- for (const arg of expr.args) validateTableRefs(arg, tables)
124
- for (const p of expr.partitionBy) validateTableRefs(p, tables)
125
- for (const o of expr.orderBy) validateTableRefs(o.expr, tables)
128
+ for (const arg of expr.args) validateTableRefs(arg, tables, scopeColumns)
129
+ for (const p of expr.partitionBy) validateTableRefs(p, tables, scopeColumns)
130
+ for (const o of expr.orderBy) validateTableRefs(o.expr, tables, scopeColumns)
126
131
  } else if (expr.type === 'cast') {
127
- validateTableRefs(expr.expr, tables)
132
+ validateTableRefs(expr.expr, tables, scopeColumns)
128
133
  } else if (expr.type === 'in valuelist') {
129
- validateTableRefs(expr.expr, tables)
134
+ validateTableRefs(expr.expr, tables, scopeColumns)
130
135
  for (const val of expr.values) {
131
- validateTableRefs(val, tables)
136
+ validateTableRefs(val, tables, scopeColumns)
132
137
  }
133
138
  } else if (expr.type === 'case') {
134
- validateTableRefs(expr.caseExpr, tables)
139
+ validateTableRefs(expr.caseExpr, tables, scopeColumns)
135
140
  for (const w of expr.whenClauses) {
136
- validateTableRefs(w.condition, tables)
137
- validateTableRefs(w.result, tables)
141
+ validateTableRefs(w.condition, tables, scopeColumns)
142
+ validateTableRefs(w.result, tables, scopeColumns)
138
143
  }
139
- validateTableRefs(expr.elseResult, tables)
144
+ validateTableRefs(expr.elseResult, tables, scopeColumns)
140
145
  }
141
146
  }
142
147