squirreling 0.12.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -140,7 +140,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
140
140
 
141
141
  - `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
142
142
  - `WITH` clause for Common Table Expressions (CTEs)
143
- - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
143
+ - Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
144
144
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
145
145
  - `GROUP BY` and `HAVING` clauses
146
146
  - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.2",
3
+ "version": "0.12.3",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -2,7 +2,8 @@ import { memorySource } from '../backend/dataSource.js'
2
2
  import { derivedAlias } from '../expression/alias.js'
3
3
  import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
- import { planSql } from '../plan/plan.js'
5
+ import { planSql, planStatement } from '../plan/plan.js'
6
+ import { fromAlias } from '../plan/columns.js'
6
7
  import { validateScan, validateTable } from '../validation/tables.js'
7
8
  import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
8
9
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
@@ -24,30 +25,18 @@ export function executeSql({ tables, query, functions, signal }) {
24
25
  const parsed = typeof query === 'string' ? parseSql({ query, functions }) : query
25
26
 
26
27
  // Normalize tables: convert arrays to AsyncDataSource
27
- // Fast path: skip normalization when no arrays are present
28
- let needsNormalization = false
29
- const tableKeys = Object.keys(tables)
30
- for (let i = 0; i < tableKeys.length; i++) {
31
- if (Array.isArray(tables[tableKeys[i]])) {
32
- needsNormalization = true
33
- break
34
- }
35
- }
36
-
37
28
  /** @type {Record<string, AsyncDataSource>} */
38
- let normalizedTables
39
- if (needsNormalization) {
40
- normalizedTables = {}
41
- for (let i = 0; i < tableKeys.length; i++) {
42
- const name = tableKeys[i]
43
- const data = tables[name]
44
- normalizedTables[name] = Array.isArray(data) ? memorySource({ data }) : data
29
+ const normalizedTables = {}
30
+ for (const [name, data] of Object.entries(tables)) {
31
+ if (Array.isArray(data)) {
32
+ normalizedTables[name] = memorySource({ data })
33
+ } else {
34
+ normalizedTables[name] = data
45
35
  }
46
- } else {
47
- normalizedTables = /** @type {Record<string, AsyncDataSource>} */ (tables)
48
36
  }
49
37
 
50
- const context = { tables: normalizedTables, functions, signal }
38
+ const scope = statementScope(parsed)
39
+ const context = { tables: normalizedTables, functions, signal, scope }
51
40
  const plan = planSql({ query: parsed, functions, tables: normalizedTables })
52
41
  return executePlan({ plan, context })
53
42
  }
@@ -58,11 +47,26 @@ export function executeSql({ tables, query, functions, signal }) {
58
47
  * @param {Object} options
59
48
  * @param {Statement} options.query
60
49
  * @param {ExecuteContext} options.context
50
+ * @param {string[]} [options.outerScope] - outer query aliases for correlated subqueries
61
51
  * @returns {QueryResults}
62
52
  */
63
- export function executeStatement({ query, context }) {
64
- const plan = planSql({ query, functions: context.functions, tables: context.tables })
65
- return executePlan({ plan, context })
53
+ export function executeStatement({ query, context, outerScope }) {
54
+ const plan = planStatement({ stmt: query, tables: context.tables, outerScope })
55
+ // Compute this query's scope (FROM alias + JOIN aliases) for nested correlated subqueries
56
+ const scope = statementScope(query)
57
+ return executePlan({ plan, context: scope ? { ...context, scope } : context })
58
+ }
59
+
60
+ /**
61
+ * Extracts the table aliases from a statement's FROM and JOIN clauses.
62
+ *
63
+ * @param {Statement} stmt
64
+ * @returns {string[] | undefined}
65
+ */
66
+ function statementScope(stmt) {
67
+ if (stmt.type === 'with') return statementScope(stmt.query)
68
+ if (stmt.type === 'compound') return undefined
69
+ return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
66
70
  }
67
71
 
68
72
  /**
@@ -39,6 +39,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
39
39
  if (qualified in row.cells) {
40
40
  return row.cells[qualified]()
41
41
  }
42
+ // Check outer row for correlated subquery references
43
+ if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
44
+ return context.outerRow.cells[node.name]()
45
+ }
42
46
  // Fall back to just the column part
43
47
  if (node.name in row.cells) {
44
48
  return row.cells[node.name]()
@@ -66,7 +70,11 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
66
70
 
67
71
  // Scalar subquery - returns a single value
68
72
  if (node.type === 'subquery') {
69
- const gen = executeStatement({ query: node.subquery, context }).rows()
73
+ const outerScope = context.scope
74
+ const subContext = outerScope
75
+ ? { ...context, outerRow: row, outerAliases: new Set(outerScope) }
76
+ : context
77
+ const gen = executeStatement({ query: node.subquery, context: subContext, outerScope }).rows()
70
78
  const { value } = await gen.next()
71
79
  gen.return(undefined)
72
80
  if (!value) return null
@@ -104,6 +104,16 @@ export function parseFunctionCall(state, positionStart) {
104
104
  expect(state, 'paren', ')')
105
105
  }
106
106
 
107
+ // Check for OVER clause (window functions not supported)
108
+ const overTok = current(state)
109
+ if (overTok.type === 'identifier' && overTok.value.toUpperCase() === 'OVER') {
110
+ throw new ParseError({
111
+ message: `Window functions are not supported: ${funcName}(...) OVER (...)`,
112
+ positionStart,
113
+ positionEnd: overTok.positionEnd,
114
+ })
115
+ }
116
+
107
117
  return {
108
118
  type: 'function',
109
119
  funcName,
@@ -163,7 +163,54 @@ function collectColumnsFromExpr(expr, columns, aliases) {
163
163
  collectColumnsFromExpr(expr.elseResult, columns, aliases)
164
164
  }
165
165
  }
166
- // No columns: count(*), literal, interval, exists, not exists, subquery
166
+ // Subqueries: collect prefixed identifiers for correlated column detection.
167
+ // Only prefixed identifiers are collected because correlated outer references
168
+ // are always qualified (e.g. users.id, a.session_id). Unprefixed identifiers
169
+ // from the inner query would incorrectly be attributed to the outer table.
170
+ if (expr.type === 'subquery' || expr.type === 'in' || expr.type === 'exists' || expr.type === 'not exists') {
171
+ if (expr.type === 'in') {
172
+ collectColumnsFromExpr(expr.expr, columns, aliases)
173
+ }
174
+ const sub = expr.subquery
175
+ if (sub) {
176
+ /** @type {IdentifierNode[]} */
177
+ const inner = []
178
+ collectColumnsFromStatement(sub, inner)
179
+ for (const id of inner) {
180
+ if (id.prefix) columns.push(id)
181
+ }
182
+ }
183
+ }
184
+ // No columns: count(*), literal, interval
185
+ }
186
+
187
+ /**
188
+ * Collects identifiers from a subquery statement for correlated column detection.
189
+ *
190
+ * @param {Statement} stmt
191
+ * @param {IdentifierNode[]} columns
192
+ */
193
+ function collectColumnsFromStatement(stmt, columns) {
194
+ if (stmt.type === 'compound') {
195
+ collectColumnsFromStatement(stmt.left, columns)
196
+ collectColumnsFromStatement(stmt.right, columns)
197
+ return
198
+ }
199
+ if (stmt.type === 'with') {
200
+ collectColumnsFromStatement(stmt.query, columns)
201
+ return
202
+ }
203
+ for (const col of stmt.columns) {
204
+ if (col.type === 'derived') collectColumnsFromExpr(col.expr, columns)
205
+ }
206
+ collectColumnsFromExpr(stmt.where, columns)
207
+ if (stmt.from?.type === 'subquery') {
208
+ collectColumnsFromStatement(stmt.from.query, columns)
209
+ }
210
+ for (const join of stmt.joins) collectColumnsFromExpr(join.on, columns)
211
+ for (const expr of stmt.groupBy) collectColumnsFromExpr(expr, columns)
212
+ collectColumnsFromExpr(stmt.having, columns)
213
+ for (const item of stmt.orderBy) collectColumnsFromExpr(item.expr, columns)
167
214
  }
168
215
 
169
216
  /**
package/src/plan/plan.js CHANGED
@@ -32,9 +32,10 @@ export function planSql({ query, functions, tables }) {
32
32
  * @param {Map<string, string[]>} [options.cteColumns]
33
33
  * @param {Record<string, AsyncDataSource>} [options.tables]
34
34
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
35
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
35
36
  * @returns {QueryPlan}
36
37
  */
37
- function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
38
+ export function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
38
39
  if (stmt.type === 'with') {
39
40
  // Build CTE plans in order (each CTE can reference preceding CTEs)
40
41
  ctePlans ??= new Map()
@@ -44,12 +45,12 @@ function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
44
45
  ctePlans.set(cte.name.toLowerCase(), ctePlan)
45
46
  cteColumns.set(cte.name.toLowerCase(), inferStatementColumns({ stmt: cte.query, cteColumns, tables }))
46
47
  }
47
- return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns })
48
+ return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns, outerScope })
48
49
  }
49
50
  if (stmt.type === 'compound') {
50
51
  return planSetOperation({ compound: stmt, ctePlans, cteColumns, tables })
51
52
  }
52
- return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns })
53
+ return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns, outerScope })
53
54
  }
54
55
 
55
56
  /**
@@ -100,9 +101,10 @@ function planSetOperation({ compound, ctePlans, cteColumns, tables }) {
100
101
  * @param {Map<string, string[]>} [options.cteColumns]
101
102
  * @param {Record<string, AsyncDataSource>} [options.tables]
102
103
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
104
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
103
105
  * @returns {QueryPlan}
104
106
  */
105
- function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
107
+ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
106
108
  // Check for aggregation
107
109
  const hasAggregate = select.columns.some(col =>
108
110
  col.type === 'derived' && findAggregate(col.expr)
@@ -114,7 +116,8 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
114
116
  const sourceAlias = fromAlias(select.from)
115
117
 
116
118
  // Resolve aliases (and validate qualified references)
117
- const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table)].map(a => [a, true]))
119
+ // Include outerScope aliases so correlated references pass validation
120
+ const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table), ...outerScope ?? []].map(a => [a, true]))
118
121
  /** @type {Map<string, ExprNode>} */
119
122
  const aliases = new Map()
120
123
  const columns = select.columns.map(col => {
@@ -153,6 +156,11 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
153
156
  const hints = {}
154
157
  const perTableColumns = extractColumns({ select, parentColumns })
155
158
  hints.columns = perTableColumns.get(sourceAlias)
159
+ // Empty columns array means no columns were referenced, but a FROM subquery
160
+ // still needs its own columns (e.g. for DISTINCT). Treat empty as unrestricted.
161
+ if (hints.columns?.length === 0 && select.from.type === 'subquery') {
162
+ hints.columns = undefined
163
+ }
156
164
  if (!select.joins.length) {
157
165
  hints.where = select.where
158
166
  if (!needsBuffering && !select.distinct) {
@@ -163,7 +171,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
163
171
 
164
172
  // Start with the data source (FROM clause)
165
173
  /** @type {QueryPlan} */
166
- let plan = planFrom({ select, ctePlans, cteColumns, hints, tables })
174
+ let plan = planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope })
167
175
 
168
176
  // Add JOINs
169
177
  if (select.joins.length) {
@@ -255,9 +263,10 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
255
263
  * @param {Map<string, string[]>} [options.cteColumns]
256
264
  * @param {ScanOptions} options.hints
257
265
  * @param {Record<string, AsyncDataSource>} [options.tables]
266
+ * @param {string[]} [options.outerScope]
258
267
  * @returns {QueryPlan}
259
268
  */
260
- function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
269
+ function planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope }) {
261
270
  if (select.from.type === 'table') {
262
271
  const ctePlan = ctePlans?.get(select.from.table.toLowerCase())
263
272
  if (ctePlan) {
@@ -271,6 +280,7 @@ function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
271
280
  ctePlans,
272
281
  cteColumns,
273
282
  tables,
283
+ outerScope,
274
284
  parentColumns: hints.columns?.map(name => ({ type: 'identifier', name, positionStart: 0, positionEnd: 0 })),
275
285
  })
276
286
  // Validate that requested columns exist in subquery output
package/src/types.d.ts CHANGED
@@ -40,6 +40,12 @@ export interface ExecuteContext {
40
40
  tables: Record<string, AsyncDataSource>
41
41
  functions?: Record<string, UserDefinedFunction>
42
42
  signal?: AbortSignal
43
+ // current query's FROM + JOIN aliases (e.g. ['a', 'b'])
44
+ scope?: string[]
45
+ // the enclosing query's current row, for resolving correlated references
46
+ outerRow?: AsyncRow
47
+ // aliases from the enclosing query that are valid correlated references
48
+ outerAliases?: Set<string>
43
49
  }
44
50
 
45
51
  // AsyncRow represents a row with async cell values
@@ -1,5 +1,12 @@
1
1
  import { FUNCTION_SIGNATURES } from './functions.js'
2
2
 
3
+ /** Well-known window functions that are not supported */
4
+ const WINDOW_FUNCTIONS = new Set([
5
+ 'ROW_NUMBER', 'RANK', 'DENSE_RANK', 'NTILE',
6
+ 'LAG', 'LEAD', 'FIRST_VALUE', 'LAST_VALUE', 'NTH_VALUE',
7
+ 'CUME_DIST', 'PERCENT_RANK',
8
+ ])
9
+
3
10
  /**
4
11
  * Structured parse error with position range.
5
12
  */
@@ -103,10 +110,16 @@ export class UnknownFunctionError extends ParseError {
103
110
  * @param {number} options.positionEnd
104
111
  */
105
112
  constructor({ funcName, positionStart, positionEnd }) {
106
- const suggestions = suggestFunctions(funcName)
107
- let message = `Unknown function "${funcName}" at position ${positionStart}.`
108
- if (suggestions.length) {
109
- message += ` Did you mean ${suggestions.join(', ')}?`
113
+ const upper = funcName.toUpperCase()
114
+ let message
115
+ if (WINDOW_FUNCTIONS.has(upper)) {
116
+ message = `Window function "${funcName}" is not supported at position ${positionStart}`
117
+ } else {
118
+ const suggestions = suggestFunctions(funcName)
119
+ message = `Unknown function "${funcName}" at position ${positionStart}.`
120
+ if (suggestions.length) {
121
+ message += ` Did you mean ${suggestions.join(', ')}?`
122
+ }
110
123
  }
111
124
  super({ message, positionStart, positionEnd })
112
125
  }