squirreling 0.12.2 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -140,7 +140,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
140
140
 
141
141
  - `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
142
142
  - `WITH` clause for Common Table Expressions (CTEs)
143
- - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
143
+ - Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
144
144
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
145
145
  - `GROUP BY` and `HAVING` clauses
146
146
  - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.2",
3
+ "version": "0.12.4",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -2,7 +2,8 @@ import { memorySource } from '../backend/dataSource.js'
2
2
  import { derivedAlias } from '../expression/alias.js'
3
3
  import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
- import { planSql } from '../plan/plan.js'
5
+ import { planSql, planStatement } from '../plan/plan.js'
6
+ import { fromAlias } from '../plan/columns.js'
6
7
  import { validateScan, validateTable } from '../validation/tables.js'
7
8
  import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
8
9
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
@@ -24,30 +25,18 @@ export function executeSql({ tables, query, functions, signal }) {
24
25
  const parsed = typeof query === 'string' ? parseSql({ query, functions }) : query
25
26
 
26
27
  // Normalize tables: convert arrays to AsyncDataSource
27
- // Fast path: skip normalization when no arrays are present
28
- let needsNormalization = false
29
- const tableKeys = Object.keys(tables)
30
- for (let i = 0; i < tableKeys.length; i++) {
31
- if (Array.isArray(tables[tableKeys[i]])) {
32
- needsNormalization = true
33
- break
34
- }
35
- }
36
-
37
28
  /** @type {Record<string, AsyncDataSource>} */
38
- let normalizedTables
39
- if (needsNormalization) {
40
- normalizedTables = {}
41
- for (let i = 0; i < tableKeys.length; i++) {
42
- const name = tableKeys[i]
43
- const data = tables[name]
44
- normalizedTables[name] = Array.isArray(data) ? memorySource({ data }) : data
29
+ const normalizedTables = {}
30
+ for (const [name, data] of Object.entries(tables)) {
31
+ if (Array.isArray(data)) {
32
+ normalizedTables[name] = memorySource({ data })
33
+ } else {
34
+ normalizedTables[name] = data
45
35
  }
46
- } else {
47
- normalizedTables = /** @type {Record<string, AsyncDataSource>} */ (tables)
48
36
  }
49
37
 
50
- const context = { tables: normalizedTables, functions, signal }
38
+ const scope = statementScope(parsed)
39
+ const context = { tables: normalizedTables, functions, signal, scope }
51
40
  const plan = planSql({ query: parsed, functions, tables: normalizedTables })
52
41
  return executePlan({ plan, context })
53
42
  }
@@ -58,11 +47,26 @@ export function executeSql({ tables, query, functions, signal }) {
58
47
  * @param {Object} options
59
48
  * @param {Statement} options.query
60
49
  * @param {ExecuteContext} options.context
50
+ * @param {string[]} [options.outerScope] - outer query aliases for correlated subqueries
61
51
  * @returns {QueryResults}
62
52
  */
63
- export function executeStatement({ query, context }) {
64
- const plan = planSql({ query, functions: context.functions, tables: context.tables })
65
- return executePlan({ plan, context })
53
+ export function executeStatement({ query, context, outerScope }) {
54
+ const plan = planStatement({ stmt: query, tables: context.tables, outerScope })
55
+ // Compute this query's scope (FROM alias + JOIN aliases) for nested correlated subqueries
56
+ const scope = statementScope(query)
57
+ return executePlan({ plan, context: scope ? { ...context, scope } : context })
58
+ }
59
+
60
+ /**
61
+ * Extracts the table aliases from a statement's FROM and JOIN clauses.
62
+ *
63
+ * @param {Statement} stmt
64
+ * @returns {string[] | undefined}
65
+ */
66
+ function statementScope(stmt) {
67
+ if (stmt.type === 'with') return statementScope(stmt.query)
68
+ if (stmt.type === 'compound') return undefined
69
+ return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
66
70
  }
67
71
 
68
72
  /**
@@ -39,6 +39,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
39
39
  if (qualified in row.cells) {
40
40
  return row.cells[qualified]()
41
41
  }
42
+ // Check outer row for correlated subquery references
43
+ if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
44
+ return context.outerRow.cells[node.name]()
45
+ }
42
46
  // Fall back to just the column part
43
47
  if (node.name in row.cells) {
44
48
  return row.cells[node.name]()
@@ -66,7 +70,11 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
66
70
 
67
71
  // Scalar subquery - returns a single value
68
72
  if (node.type === 'subquery') {
69
- const gen = executeStatement({ query: node.subquery, context }).rows()
73
+ const outerScope = context.scope
74
+ const subContext = outerScope
75
+ ? { ...context, outerRow: row, outerAliases: new Set(outerScope) }
76
+ : context
77
+ const gen = executeStatement({ query: node.subquery, context: subContext, outerScope }).rows()
70
78
  const { value } = await gen.next()
71
79
  gen.return(undefined)
72
80
  if (!value) return null
@@ -104,6 +104,16 @@ export function parseFunctionCall(state, positionStart) {
104
104
  expect(state, 'paren', ')')
105
105
  }
106
106
 
107
+ // Check for OVER clause (window functions not supported)
108
+ const overTok = current(state)
109
+ if (overTok.type === 'identifier' && overTok.value.toUpperCase() === 'OVER') {
110
+ throw new ParseError({
111
+ message: `Window functions are not supported: ${funcName}(...) OVER (...)`,
112
+ positionStart,
113
+ positionEnd: overTok.positionEnd,
114
+ })
115
+ }
116
+
107
117
  return {
108
118
  type: 'function',
109
119
  funcName,
@@ -163,7 +163,54 @@ function collectColumnsFromExpr(expr, columns, aliases) {
163
163
  collectColumnsFromExpr(expr.elseResult, columns, aliases)
164
164
  }
165
165
  }
166
- // No columns: count(*), literal, interval, exists, not exists, subquery
166
+ // Subqueries: collect prefixed identifiers for correlated column detection.
167
+ // Only prefixed identifiers are collected because correlated outer references
168
+ // are always qualified (e.g. users.id, a.session_id). Unprefixed identifiers
169
+ // from the inner query would incorrectly be attributed to the outer table.
170
+ if (expr.type === 'subquery' || expr.type === 'in' || expr.type === 'exists' || expr.type === 'not exists') {
171
+ if (expr.type === 'in') {
172
+ collectColumnsFromExpr(expr.expr, columns, aliases)
173
+ }
174
+ const sub = expr.subquery
175
+ if (sub) {
176
+ /** @type {IdentifierNode[]} */
177
+ const inner = []
178
+ collectColumnsFromStatement(sub, inner)
179
+ for (const id of inner) {
180
+ if (id.prefix) columns.push(id)
181
+ }
182
+ }
183
+ }
184
+ // No columns: count(*), literal, interval
185
+ }
186
+
187
+ /**
188
+ * Collects identifiers from a subquery statement for correlated column detection.
189
+ *
190
+ * @param {Statement} stmt
191
+ * @param {IdentifierNode[]} columns
192
+ */
193
+ function collectColumnsFromStatement(stmt, columns) {
194
+ if (stmt.type === 'compound') {
195
+ collectColumnsFromStatement(stmt.left, columns)
196
+ collectColumnsFromStatement(stmt.right, columns)
197
+ return
198
+ }
199
+ if (stmt.type === 'with') {
200
+ collectColumnsFromStatement(stmt.query, columns)
201
+ return
202
+ }
203
+ for (const col of stmt.columns) {
204
+ if (col.type === 'derived') collectColumnsFromExpr(col.expr, columns)
205
+ }
206
+ collectColumnsFromExpr(stmt.where, columns)
207
+ if (stmt.from?.type === 'subquery') {
208
+ collectColumnsFromStatement(stmt.from.query, columns)
209
+ }
210
+ for (const join of stmt.joins) collectColumnsFromExpr(join.on, columns)
211
+ for (const expr of stmt.groupBy) collectColumnsFromExpr(expr, columns)
212
+ collectColumnsFromExpr(stmt.having, columns)
213
+ for (const item of stmt.orderBy) collectColumnsFromExpr(item.expr, columns)
167
214
  }
168
215
 
169
216
  /**
package/src/plan/plan.js CHANGED
@@ -32,9 +32,10 @@ export function planSql({ query, functions, tables }) {
32
32
  * @param {Map<string, string[]>} [options.cteColumns]
33
33
  * @param {Record<string, AsyncDataSource>} [options.tables]
34
34
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
35
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
35
36
  * @returns {QueryPlan}
36
37
  */
37
- function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
38
+ export function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
38
39
  if (stmt.type === 'with') {
39
40
  // Build CTE plans in order (each CTE can reference preceding CTEs)
40
41
  ctePlans ??= new Map()
@@ -44,12 +45,12 @@ function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
44
45
  ctePlans.set(cte.name.toLowerCase(), ctePlan)
45
46
  cteColumns.set(cte.name.toLowerCase(), inferStatementColumns({ stmt: cte.query, cteColumns, tables }))
46
47
  }
47
- return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns })
48
+ return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns, outerScope })
48
49
  }
49
50
  if (stmt.type === 'compound') {
50
- return planSetOperation({ compound: stmt, ctePlans, cteColumns, tables })
51
+ return planSetOperation({ compound: stmt, ctePlans, cteColumns, tables, parentColumns })
51
52
  }
52
- return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns })
53
+ return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns, outerScope })
53
54
  }
54
55
 
55
56
  /**
@@ -60,11 +61,12 @@ function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
60
61
  * @param {Map<string, QueryPlan>} [options.ctePlans]
61
62
  * @param {Map<string, string[]>} [options.cteColumns]
62
63
  * @param {Record<string, AsyncDataSource>} [options.tables]
64
+ * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query
63
65
  * @returns {QueryPlan}
64
66
  */
65
- function planSetOperation({ compound, ctePlans, cteColumns, tables }) {
66
- const left = planStatement({ stmt: compound.left, ctePlans, cteColumns, tables })
67
- const right = planStatement({ stmt: compound.right, ctePlans, cteColumns, tables })
67
+ function planSetOperation({ compound, ctePlans, cteColumns, tables, parentColumns }) {
68
+ const left = planStatement({ stmt: compound.left, ctePlans, cteColumns, tables, parentColumns })
69
+ const right = planStatement({ stmt: compound.right, ctePlans, cteColumns, tables, parentColumns })
68
70
  const leftColumns = inferStatementColumns({ stmt: compound.left, cteColumns, tables })
69
71
  const rightColumns = inferStatementColumns({ stmt: compound.right, cteColumns, tables })
70
72
 
@@ -100,9 +102,10 @@ function planSetOperation({ compound, ctePlans, cteColumns, tables }) {
100
102
  * @param {Map<string, string[]>} [options.cteColumns]
101
103
  * @param {Record<string, AsyncDataSource>} [options.tables]
102
104
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
105
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
103
106
  * @returns {QueryPlan}
104
107
  */
105
- function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
108
+ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
106
109
  // Check for aggregation
107
110
  const hasAggregate = select.columns.some(col =>
108
111
  col.type === 'derived' && findAggregate(col.expr)
@@ -114,7 +117,8 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
114
117
  const sourceAlias = fromAlias(select.from)
115
118
 
116
119
  // Resolve aliases (and validate qualified references)
117
- const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table)].map(a => [a, true]))
120
+ // Include outerScope aliases so correlated references pass validation
121
+ const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table), ...outerScope ?? []].map(a => [a, true]))
118
122
  /** @type {Map<string, ExprNode>} */
119
123
  const aliases = new Map()
120
124
  const columns = select.columns.map(col => {
@@ -153,6 +157,11 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
153
157
  const hints = {}
154
158
  const perTableColumns = extractColumns({ select, parentColumns })
155
159
  hints.columns = perTableColumns.get(sourceAlias)
160
+ // Empty columns array means no columns were referenced, but a FROM subquery
161
+ // still needs its own columns (e.g. for DISTINCT). Treat empty as unrestricted.
162
+ if (hints.columns?.length === 0 && select.from.type === 'subquery') {
163
+ hints.columns = undefined
164
+ }
156
165
  if (!select.joins.length) {
157
166
  hints.where = select.where
158
167
  if (!needsBuffering && !select.distinct) {
@@ -163,7 +172,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
163
172
 
164
173
  // Start with the data source (FROM clause)
165
174
  /** @type {QueryPlan} */
166
- let plan = planFrom({ select, ctePlans, cteColumns, hints, tables })
175
+ let plan = planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope })
167
176
 
168
177
  // Add JOINs
169
178
  if (select.joins.length) {
@@ -255,9 +264,10 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
255
264
  * @param {Map<string, string[]>} [options.cteColumns]
256
265
  * @param {ScanOptions} options.hints
257
266
  * @param {Record<string, AsyncDataSource>} [options.tables]
267
+ * @param {string[]} [options.outerScope]
258
268
  * @returns {QueryPlan}
259
269
  */
260
- function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
270
+ function planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope }) {
261
271
  if (select.from.type === 'table') {
262
272
  const ctePlan = ctePlans?.get(select.from.table.toLowerCase())
263
273
  if (ctePlan) {
@@ -271,6 +281,7 @@ function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
271
281
  ctePlans,
272
282
  cteColumns,
273
283
  tables,
284
+ outerScope,
274
285
  parentColumns: hints.columns?.map(name => ({ type: 'identifier', name, positionStart: 0, positionEnd: 0 })),
275
286
  })
276
287
  // Validate that requested columns exist in subquery output
package/src/types.d.ts CHANGED
@@ -40,6 +40,12 @@ export interface ExecuteContext {
40
40
  tables: Record<string, AsyncDataSource>
41
41
  functions?: Record<string, UserDefinedFunction>
42
42
  signal?: AbortSignal
43
+ // current query's FROM + JOIN aliases (e.g. ['a', 'b'])
44
+ scope?: string[]
45
+ // the enclosing query's current row, for resolving correlated references
46
+ outerRow?: AsyncRow
47
+ // aliases from the enclosing query that are valid correlated references
48
+ outerAliases?: Set<string>
43
49
  }
44
50
 
45
51
  // AsyncRow represents a row with async cell values
@@ -1,5 +1,12 @@
1
1
  import { FUNCTION_SIGNATURES } from './functions.js'
2
2
 
3
+ /** Well-known window functions that are not supported */
4
+ const WINDOW_FUNCTIONS = new Set([
5
+ 'ROW_NUMBER', 'RANK', 'DENSE_RANK', 'NTILE',
6
+ 'LAG', 'LEAD', 'FIRST_VALUE', 'LAST_VALUE', 'NTH_VALUE',
7
+ 'CUME_DIST', 'PERCENT_RANK',
8
+ ])
9
+
3
10
  /**
4
11
  * Structured parse error with position range.
5
12
  */
@@ -103,10 +110,16 @@ export class UnknownFunctionError extends ParseError {
103
110
  * @param {number} options.positionEnd
104
111
  */
105
112
  constructor({ funcName, positionStart, positionEnd }) {
106
- const suggestions = suggestFunctions(funcName)
107
- let message = `Unknown function "${funcName}" at position ${positionStart}.`
108
- if (suggestions.length) {
109
- message += ` Did you mean ${suggestions.join(', ')}?`
113
+ const upper = funcName.toUpperCase()
114
+ let message
115
+ if (WINDOW_FUNCTIONS.has(upper)) {
116
+ message = `Window function "${funcName}" is not supported at position ${positionStart}`
117
+ } else {
118
+ const suggestions = suggestFunctions(funcName)
119
+ message = `Unknown function "${funcName}" at position ${positionStart}.`
120
+ if (suggestions.length) {
121
+ message += ` Did you mean ${suggestions.join(', ')}?`
122
+ }
110
123
  }
111
124
  super({ message, positionStart, positionEnd })
112
125
  }