squirreling 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -140,7 +140,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
140
140
 
141
141
  - `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
142
142
  - `WITH` clause for Common Table Expressions (CTEs)
143
- - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
143
+ - Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
144
144
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
145
145
  - `GROUP BY` and `HAVING` clauses
146
146
  - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
@@ -154,14 +154,14 @@ Squirreling mostly follows the SQL standard. The following features are supporte
154
154
 
155
155
  ### Functions
156
156
 
157
- - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`
157
+ - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`, `STRING_AGG`
158
158
  - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
159
159
  - Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
160
160
  - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
161
161
  - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
162
- - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`
162
+ - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`
163
163
  - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
164
- - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`
164
+ - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
165
165
  - Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
166
- - Conditional: `COALESCE`, `NULLIF`
166
+ - Conditional: `COALESCE`, `NULLIF`, `GREATEST`, `LEAST`
167
167
  - User-defined functions (UDFs)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.1",
3
+ "version": "0.12.3",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -39,11 +39,11 @@
39
39
  "test": "vitest run"
40
40
  },
41
41
  "devDependencies": {
42
- "@types/node": "25.5.2",
43
- "@vitest/coverage-v8": "4.1.3",
42
+ "@types/node": "25.6.0",
43
+ "@vitest/coverage-v8": "4.1.4",
44
44
  "eslint": "9.39.2",
45
45
  "eslint-plugin-jsdoc": "62.9.0",
46
46
  "typescript": "6.0.2",
47
- "vitest": "4.1.3"
47
+ "vitest": "4.1.4"
48
48
  }
49
49
  }
@@ -15,7 +15,7 @@ export function asyncRow(obj, columns) {
15
15
  for (const key of columns) {
16
16
  cells[key] = () => Promise.resolve(obj[key])
17
17
  }
18
- return { columns, cells }
18
+ return { columns, cells, resolved: obj }
19
19
  }
20
20
 
21
21
  /**
@@ -34,13 +34,14 @@ export function memorySource({ data, columns }) {
34
34
  }
35
35
  const firstColumns = Object.keys(data[0])
36
36
  // Check first 1000 rows for consistent columns
37
+ const firstColSet = new Set(firstColumns)
37
38
  for (let i = 1; i < data.length && i < 1000; i++) {
38
39
  const rowColumns = Object.keys(data[i])
39
40
  const missing = firstColumns.find(col => !rowColumns.includes(col))
40
41
  if (missing) {
41
42
  throw new Error(`Inconsistent data, column "${missing}" not found in row ${i}`)
42
43
  }
43
- const extra = rowColumns.find(col => !firstColumns.includes(col))
44
+ const extra = rowColumns.find(col => !firstColSet.has(col))
44
45
  if (extra) {
45
46
  throw new Error(`Inconsistent data, unexpected column "${extra}" found in row ${i}`)
46
47
  }
@@ -54,11 +55,12 @@ export function memorySource({ data, columns }) {
54
55
  // Only apply offset and limit if no where clause
55
56
  const start = !where ? offset ?? 0 : 0
56
57
  const end = !where && limit !== undefined ? start + limit : data.length
58
+ const rowColumns = scanColumns ?? columns
57
59
  return {
58
60
  async *rows() {
59
61
  for (let i = start; i < end && i < data.length; i++) {
60
62
  if (signal?.aborted) break
61
- yield asyncRow(data[i], scanColumns ?? columns)
63
+ yield asyncRow(data[i], rowColumns)
62
64
  }
63
65
  },
64
66
  appliedWhere: false,
@@ -62,7 +62,7 @@ export function executeHashAggregate(plan, context) {
62
62
  return {
63
63
  columns: selectColumnNames(plan.columns, child.columns),
64
64
  maxRows: child.maxRows,
65
- async *rows () {
65
+ async *rows() {
66
66
  // Collect all rows
67
67
  /** @type {AsyncRow[]} */
68
68
  const allRows = []
@@ -136,7 +136,7 @@ export function executeScalarAggregate(plan, context) {
136
136
  columns: selectColumnNames(plan.columns, child.columns),
137
137
  numRows: plan.having ? undefined : 1,
138
138
  maxRows: 1,
139
- async *rows () {
139
+ async *rows() {
140
140
  // Collect all rows into single group
141
141
  /** @type {AsyncRow[]} */
142
142
  const group = []
@@ -2,7 +2,8 @@ import { memorySource } from '../backend/dataSource.js'
2
2
  import { derivedAlias } from '../expression/alias.js'
3
3
  import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
- import { planSql } from '../plan/plan.js'
5
+ import { planSql, planStatement } from '../plan/plan.js'
6
+ import { fromAlias } from '../plan/columns.js'
6
7
  import { validateScan, validateTable } from '../validation/tables.js'
7
8
  import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
8
9
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
@@ -10,7 +11,7 @@ import { executeSort } from './sort.js'
10
11
  import { addBounds, minBounds, stableRowKey } from './utils.js'
11
12
 
12
13
  /**
13
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, QueryResults, SelectColumn, Statement } from '../types.js'
14
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, ExecuteSqlOptions, ExprNode, IdentifierNode, QueryResults, SelectColumn, SqlPrimitive, Statement } from '../types.js'
14
15
  * @import { CountNode, DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode, SetOperationNode } from '../plan/types.js'
15
16
  */
16
17
 
@@ -34,7 +35,8 @@ export function executeSql({ tables, query, functions, signal }) {
34
35
  }
35
36
  }
36
37
 
37
- const context = { tables: normalizedTables, functions, signal }
38
+ const scope = statementScope(parsed)
39
+ const context = { tables: normalizedTables, functions, signal, scope }
38
40
  const plan = planSql({ query: parsed, functions, tables: normalizedTables })
39
41
  return executePlan({ plan, context })
40
42
  }
@@ -45,11 +47,26 @@ export function executeSql({ tables, query, functions, signal }) {
45
47
  * @param {Object} options
46
48
  * @param {Statement} options.query
47
49
  * @param {ExecuteContext} options.context
50
+ * @param {string[]} [options.outerScope] - outer query aliases for correlated subqueries
48
51
  * @returns {QueryResults}
49
52
  */
50
- export function executeStatement({ query, context }) {
51
- const plan = planSql({ query, functions: context.functions, tables: context.tables })
52
- return executePlan({ plan, context })
53
+ export function executeStatement({ query, context, outerScope }) {
54
+ const plan = planStatement({ stmt: query, tables: context.tables, outerScope })
55
+ // Compute this query's scope (FROM alias + JOIN aliases) for nested correlated subqueries
56
+ const scope = statementScope(query)
57
+ return executePlan({ plan, context: scope ? { ...context, scope } : context })
58
+ }
59
+
60
+ /**
61
+ * Extracts the table aliases from a statement's FROM and JOIN clauses.
62
+ *
63
+ * @param {Statement} stmt
64
+ * @returns {string[] | undefined}
65
+ */
66
+ function statementScope(stmt) {
67
+ if (stmt.type === 'with') return statementScope(stmt.query)
68
+ if (stmt.type === 'compound') return undefined
69
+ return [fromAlias(stmt.from), ...stmt.joins.map(j => j.alias ?? j.table)]
53
70
  }
54
71
 
55
72
  /**
@@ -88,7 +105,7 @@ export function executePlan({ plan, context }) {
88
105
  } else if (plan.type === 'SetOperation') {
89
106
  return executeSetOperation(plan, context)
90
107
  }
91
- return { columns: [], async *rows () {} }
108
+ return { columns: [], async *rows() {} }
92
109
  }
93
110
 
94
111
  /**
@@ -142,7 +159,7 @@ function executeScan(plan, context) {
142
159
  columns: [column],
143
160
  numRows: scanRows,
144
161
  maxRows: scanRows,
145
- async *rows () {
162
+ async *rows() {
146
163
  const columns = [column]
147
164
  for await (const chunk of chunks) {
148
165
  if (signal?.aborted) return
@@ -172,7 +189,7 @@ function executeScan(plan, context) {
172
189
  columns: plan.hints.columns ?? table.columns,
173
190
  numRows: !plan.hints.where ? scanRows : undefined,
174
191
  maxRows: scanRows,
175
- async *rows () {
192
+ async *rows() {
176
193
  let result = scanResult.rows()
177
194
 
178
195
  // Apply WHERE if data source did not
@@ -205,7 +222,7 @@ function executeCount(plan, context) {
205
222
  columns: plan.columns.map(col => col.alias ?? derivedAlias(col.expr)),
206
223
  numRows: 1,
207
224
  maxRows: 1,
208
- async *rows () {
225
+ async *rows() {
209
226
  // Use source numRows if available
210
227
  let count = table.numRows
211
228
  if (count === undefined) {
@@ -344,37 +361,86 @@ function executeFilter(plan, context) {
344
361
  */
345
362
  function executeProject(plan, context) {
346
363
  const child = executePlan({ plan: plan.child, context })
364
+
365
+ // Pre-compute column names for derived columns (avoids per-row derivedAlias calls)
366
+ const hasStar = plan.columns.some(col => col.type === 'star')
367
+
368
+ /** @type {string[] | undefined} */
369
+ let staticColumns
370
+ /** @type {{ alias: string, sourceName: string }[] | undefined} */
371
+ let identifierMap
372
+ if (!hasStar) {
373
+ const derived = /** @type {DerivedColumn[]} */ (plan.columns)
374
+ staticColumns = derived.map(col => col.alias ?? derivedAlias(col.expr))
375
+ const allIdentifiers = derived.every(col =>
376
+ col.expr.type === 'identifier' && !col.expr.prefix
377
+ )
378
+ if (allIdentifiers) {
379
+ identifierMap = derived.map((col, i) => ({
380
+ alias: staticColumns[i],
381
+ sourceName: /** @type {IdentifierNode} */ (col.expr).name,
382
+ }))
383
+ }
384
+ }
385
+
347
386
  return {
348
387
  columns: selectColumnNames(plan.columns, child.columns),
349
388
  numRows: child.numRows,
350
389
  maxRows: child.maxRows,
351
- async *rows () {
390
+ async *rows() {
352
391
  let rowIndex = 0
392
+ let identifierMapValidated = false
353
393
 
354
394
  for await (const row of child.rows()) {
355
395
  if (context.signal?.aborted) return
356
396
  rowIndex++
397
+
398
+ // Validate identifier fast path on first row (may fail for JOINs with prefixed columns)
399
+ if (identifierMap && !identifierMapValidated) {
400
+ identifierMapValidated = true
401
+ if (!identifierMap.every(m => m.sourceName in row.cells)) {
402
+ identifierMap = undefined
403
+ }
404
+ }
405
+
406
+ // Fast path: all columns are simple identifier references
407
+ if (identifierMap) {
408
+ /** @type {AsyncCells} */
409
+ const cells = {}
410
+ const source = row.resolved
411
+ /** @type {Record<string, SqlPrimitive> | undefined} */
412
+ const resolved = source ? {} : undefined
413
+ for (const { alias, sourceName } of identifierMap) {
414
+ cells[alias] = row.cells[sourceName]
415
+ if (resolved && source) resolved[alias] = source[sourceName]
416
+ }
417
+ yield resolved
418
+ ? { columns: staticColumns, cells, resolved }
419
+ : { columns: staticColumns, cells }
420
+ continue
421
+ }
422
+
357
423
  const currentRowIndex = rowIndex
358
424
 
359
425
  /** @type {string[]} */
360
- const columns = []
426
+ const columns = staticColumns ?? []
361
427
  /** @type {AsyncCells} */
362
428
  const cells = {}
363
429
 
364
- for (const col of plan.columns) {
430
+ for (let i = 0; i < plan.columns.length; i++) {
431
+ const col = plan.columns[i]
365
432
  if (col.type === 'star') {
366
433
  const prefix = col.table ? `${col.table}.` : undefined
367
434
  for (const key of row.columns) {
368
435
  if (prefix && !key.startsWith(prefix)) continue
369
- // Strip table prefix for output column names
370
436
  const dotIndex = key.indexOf('.')
371
437
  const outputKey = prefix ? key.substring(prefix.length) : dotIndex >= 0 ? key.substring(dotIndex + 1) : key
372
438
  columns.push(outputKey)
373
439
  cells[outputKey] = row.cells[key]
374
440
  }
375
441
  } else {
376
- const alias = col.alias ?? derivedAlias(col.expr)
377
- columns.push(alias)
442
+ const alias = staticColumns ? staticColumns[i] : (col.alias ?? derivedAlias(col.expr))
443
+ if (!staticColumns) columns.push(alias)
378
444
  cells[alias] = () => evaluateExpr({
379
445
  node: col.expr,
380
446
  row,
@@ -402,7 +468,7 @@ function executeDistinct(plan, context) {
402
468
  return {
403
469
  columns: child.columns,
404
470
  maxRows: child.maxRows,
405
- async *rows () {
471
+ async *rows() {
406
472
  const { signal } = context
407
473
  const MAX_CHUNK = 256
408
474
 
@@ -478,7 +544,7 @@ function executeSetOperation(plan, context) {
478
544
  columns: left.columns,
479
545
  numRows: addBounds(left.numRows, right.numRows),
480
546
  maxRows: addBounds(left.maxRows, right.maxRows),
481
- async *rows () {
547
+ async *rows() {
482
548
  // UNION ALL: yield all rows from both sides
483
549
  yield* left.rows()
484
550
  yield* right.rows()
@@ -490,7 +556,7 @@ function executeSetOperation(plan, context) {
490
556
  return {
491
557
  columns: left.columns,
492
558
  maxRows: addBounds(left.maxRows, right.maxRows),
493
- async *rows () {
559
+ async *rows() {
494
560
  // UNION: yield deduplicated rows from both sides
495
561
  const seen = new Set()
496
562
  for await (const row of left.rows()) {
@@ -518,7 +584,7 @@ function executeSetOperation(plan, context) {
518
584
  return {
519
585
  columns: left.columns,
520
586
  maxRows: minBounds(left.maxRows, right.maxRows),
521
- async *rows () {
587
+ async *rows() {
522
588
  // Materialize right side keys
523
589
  /** @type {Map<any, number>} */
524
590
  const rightKeys = new Map()
@@ -560,7 +626,7 @@ function executeSetOperation(plan, context) {
560
626
  return {
561
627
  columns: left.columns,
562
628
  maxRows: left.maxRows,
563
- async *rows () {
629
+ async *rows() {
564
630
  // Materialize right side keys
565
631
  /** @type {Map<any, number>} */
566
632
  const rightKeys = new Map()
@@ -19,7 +19,7 @@ export function executeNestedLoopJoin(plan, context) {
19
19
  const right = executePlan({ plan: plan.right, context })
20
20
  return {
21
21
  columns: mergeColumnNames(left.columns, right.columns, plan.leftAlias, plan.rightAlias),
22
- async *rows () {
22
+ async *rows() {
23
23
  const leftTable = plan.leftAlias
24
24
  const rightTable = plan.rightAlias
25
25
 
@@ -97,7 +97,7 @@ export function executePositionalJoin(plan, context) {
97
97
  columns: mergeColumnNames(left.columns, right.columns, plan.leftAlias, plan.rightAlias),
98
98
  numRows,
99
99
  maxRows: maxBounds(left.maxRows, right.maxRows),
100
- async *rows () {
100
+ async *rows() {
101
101
  const { signal } = context
102
102
  const leftTable = plan.leftAlias
103
103
  const rightTable = plan.rightAlias
@@ -143,7 +143,7 @@ export function executeHashJoin(plan, context) {
143
143
  const right = executePlan({ plan: plan.right, context })
144
144
  return {
145
145
  columns: mergeColumnNames(left.columns, right.columns, plan.leftAlias, plan.rightAlias),
146
- async *rows () {
146
+ async *rows() {
147
147
  const leftTable = plan.leftAlias
148
148
  const rightTable = plan.rightAlias
149
149
 
@@ -20,7 +20,7 @@ export function executeSort(plan, context) {
20
20
  columns: child.columns,
21
21
  numRows: child.numRows,
22
22
  maxRows: child.maxRows,
23
- async *rows () {
23
+ async *rows() {
24
24
  // Buffer all rows
25
25
  /** @type {AsyncRow[]} */
26
26
  const rows = []
@@ -2,6 +2,8 @@
2
2
  * @import { AsyncRow, OrderByItem, QueryResults, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
+ const primitiveTypes = new Set(['number', 'bigint', 'boolean', 'string'])
6
+
5
7
  /**
6
8
  * Compares two values for a single ORDER BY term, handling nulls and direction
7
9
  *
@@ -24,10 +26,9 @@ export function compareForTerm(a, b, term) {
24
26
  // Compare non-null values
25
27
  if (a == b) return 0
26
28
 
27
- const primitives = ['number', 'bigint', 'boolean', 'string']
28
29
  let cmp
29
- if (primitives.includes(typeof a) && primitives.includes(typeof b)) {
30
- cmp = a < b ? -1 : a > b ? 1 : 0
30
+ if (primitiveTypes.has(typeof a) && primitiveTypes.has(typeof b)) {
31
+ cmp = a < b ? -1 : 1
31
32
  } else {
32
33
  const aa = String(a)
33
34
  const bb = String(b)
@@ -51,6 +52,29 @@ export async function collect(results) {
51
52
  for await (const asyncRow of results.rows()) {
52
53
  rows.push(asyncRow)
53
54
  }
55
+
56
+ // Fast path: if all rows have pre-materialized data, skip Promise overhead
57
+ let allMaterialized = rows.length > 0
58
+ for (let i = 0; i < rows.length; i++) {
59
+ if (!rows[i].resolved) {
60
+ allMaterialized = false
61
+ break
62
+ }
63
+ }
64
+ if (allMaterialized) {
65
+ const result = new Array(rows.length)
66
+ for (let i = 0; i < rows.length; i++) {
67
+ const row = rows[i]
68
+ /** @type {Record<string, SqlPrimitive>} */
69
+ const item = {}
70
+ for (const col of row.columns) {
71
+ item[col] = row.resolved[col]
72
+ }
73
+ result[i] = item
74
+ }
75
+ return result
76
+ }
77
+
54
78
  return Promise.all(rows.map(async asyncRow => {
55
79
  const values = await Promise.all(asyncRow.columns.map(k => asyncRow.cells[k]()))
56
80
  /** @type {Record<string, SqlPrimitive>} */
@@ -39,6 +39,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
39
39
  if (qualified in row.cells) {
40
40
  return row.cells[qualified]()
41
41
  }
42
+ // Check outer row for correlated subquery references
43
+ if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
44
+ return context.outerRow.cells[node.name]()
45
+ }
42
46
  // Fall back to just the column part
43
47
  if (node.name in row.cells) {
44
48
  return row.cells[node.name]()
@@ -66,7 +70,11 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
66
70
 
67
71
  // Scalar subquery - returns a single value
68
72
  if (node.type === 'subquery') {
69
- const gen = executeStatement({ query: node.subquery, context }).rows()
73
+ const outerScope = context.scope
74
+ const subContext = outerScope
75
+ ? { ...context, outerRow: row, outerAliases: new Set(outerScope) }
76
+ : context
77
+ const gen = executeStatement({ query: node.subquery, context: subContext, outerScope }).rows()
70
78
  const { value } = await gen.next()
71
79
  gen.return(undefined)
72
80
  if (!value) return null
@@ -272,6 +280,32 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
272
280
  ))
273
281
  }
274
282
  }
283
+
284
+ if (funcName === 'STRING_AGG') {
285
+ const separatorNode = node.args[1]
286
+ const separator = String(await evaluateExpr({ node: separatorNode, row: filteredRows[0] ?? { columns: [], cells: {} }, context }))
287
+ /** @type {string[]} */
288
+ const values = []
289
+ if (node.distinct) {
290
+ const seen = new Set()
291
+ for (const row of filteredRows) {
292
+ const v = await evaluateExpr({ node: argNode, row, context })
293
+ if (v == null) continue
294
+ const str = String(v)
295
+ const key = keyify(str)
296
+ if (!seen.has(key)) {
297
+ seen.add(key)
298
+ values.push(str)
299
+ }
300
+ }
301
+ } else {
302
+ for (const row of filteredRows) {
303
+ const v = await evaluateExpr({ node: argNode, row, context })
304
+ if (v != null) values.push(String(v))
305
+ }
306
+ }
307
+ return values.length === 0 ? null : values.join(separator)
308
+ }
275
309
  }
276
310
 
277
311
  /** @type {SqlPrimitive[]} */
@@ -311,6 +345,20 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
311
345
  return val1 == await val2 ? null : val1
312
346
  }
313
347
 
348
+ if (funcName === 'GREATEST' || funcName === 'LEAST') {
349
+ // Skip nulls; return null if all inputs are null
350
+ const isGreatest = funcName === 'GREATEST'
351
+ /** @type {SqlPrimitive} */
352
+ let best = null
353
+ for (const arg of args) {
354
+ if (arg == null) continue
355
+ if (best == null || (isGreatest ? arg > best : arg < best)) {
356
+ best = arg
357
+ }
358
+ }
359
+ return best
360
+ }
361
+
314
362
  if (funcName === 'DATE_TRUNC') {
315
363
  return dateTrunc(args[0], args[1])
316
364
  }
@@ -357,6 +405,25 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
357
405
  return result
358
406
  }
359
407
 
408
+ if (funcName === 'JSON_ARRAY_LENGTH') {
409
+ let arr = args[0]
410
+ if (arr == null) return null
411
+ if (typeof arr === 'string') {
412
+ try {
413
+ arr = JSON.parse(arr)
414
+ } catch {
415
+ throw new ArgValueError({
416
+ ...node,
417
+ message: 'invalid JSON string',
418
+ hint: 'Argument must be valid JSON.',
419
+ rowIndex,
420
+ })
421
+ }
422
+ }
423
+ if (!Array.isArray(arr)) return null
424
+ return arr.length
425
+ }
426
+
360
427
  if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY') {
361
428
  const arr = args[0]
362
429
  if (!Array.isArray(arr)) return null
@@ -78,6 +78,27 @@ export function evaluateRegexpFunc({ funcName, node, args, rowIndex }) {
78
78
  return null
79
79
  }
80
80
 
81
+ if (funcName === 'REGEXP_MATCHES') {
82
+ const str = args[0]
83
+ const pattern = args[1]
84
+ if (str == null || pattern == null) return null
85
+ const strVal = String(str)
86
+ const patternStr = String(pattern)
87
+
88
+ let regex
89
+ try {
90
+ regex = new RegExp(patternStr)
91
+ } catch (/** @type {any} */ error) {
92
+ throw new ArgValueError({
93
+ ...node,
94
+ message: `invalid regex pattern: ${error.message}`,
95
+ rowIndex,
96
+ })
97
+ }
98
+
99
+ return regex.test(strVal)
100
+ }
101
+
81
102
  if (funcName === 'REGEXP_REPLACE') {
82
103
  const str = args[0]
83
104
  const pattern = args[1]
@@ -104,6 +104,16 @@ export function parseFunctionCall(state, positionStart) {
104
104
  expect(state, 'paren', ')')
105
105
  }
106
106
 
107
+ // Check for OVER clause (window functions not supported)
108
+ const overTok = current(state)
109
+ if (overTok.type === 'identifier' && overTok.value.toUpperCase() === 'OVER') {
110
+ throw new ParseError({
111
+ message: `Window functions are not supported: ${funcName}(...) OVER (...)`,
112
+ positionStart,
113
+ positionEnd: overTok.positionEnd,
114
+ })
115
+ }
116
+
107
117
  return {
108
118
  type: 'function',
109
119
  funcName,
@@ -163,7 +163,54 @@ function collectColumnsFromExpr(expr, columns, aliases) {
163
163
  collectColumnsFromExpr(expr.elseResult, columns, aliases)
164
164
  }
165
165
  }
166
- // No columns: count(*), literal, interval, exists, not exists, subquery
166
+ // Subqueries: collect prefixed identifiers for correlated column detection.
167
+ // Only prefixed identifiers are collected because correlated outer references
168
+ // are always qualified (e.g. users.id, a.session_id). Unprefixed identifiers
169
+ // from the inner query would incorrectly be attributed to the outer table.
170
+ if (expr.type === 'subquery' || expr.type === 'in' || expr.type === 'exists' || expr.type === 'not exists') {
171
+ if (expr.type === 'in') {
172
+ collectColumnsFromExpr(expr.expr, columns, aliases)
173
+ }
174
+ const sub = expr.subquery
175
+ if (sub) {
176
+ /** @type {IdentifierNode[]} */
177
+ const inner = []
178
+ collectColumnsFromStatement(sub, inner)
179
+ for (const id of inner) {
180
+ if (id.prefix) columns.push(id)
181
+ }
182
+ }
183
+ }
184
+ // No columns: count(*), literal, interval
185
+ }
186
+
187
+ /**
188
+ * Collects identifiers from a subquery statement for correlated column detection.
189
+ *
190
+ * @param {Statement} stmt
191
+ * @param {IdentifierNode[]} columns
192
+ */
193
+ function collectColumnsFromStatement(stmt, columns) {
194
+ if (stmt.type === 'compound') {
195
+ collectColumnsFromStatement(stmt.left, columns)
196
+ collectColumnsFromStatement(stmt.right, columns)
197
+ return
198
+ }
199
+ if (stmt.type === 'with') {
200
+ collectColumnsFromStatement(stmt.query, columns)
201
+ return
202
+ }
203
+ for (const col of stmt.columns) {
204
+ if (col.type === 'derived') collectColumnsFromExpr(col.expr, columns)
205
+ }
206
+ collectColumnsFromExpr(stmt.where, columns)
207
+ if (stmt.from?.type === 'subquery') {
208
+ collectColumnsFromStatement(stmt.from.query, columns)
209
+ }
210
+ for (const join of stmt.joins) collectColumnsFromExpr(join.on, columns)
211
+ for (const expr of stmt.groupBy) collectColumnsFromExpr(expr, columns)
212
+ collectColumnsFromExpr(stmt.having, columns)
213
+ for (const item of stmt.orderBy) collectColumnsFromExpr(item.expr, columns)
167
214
  }
168
215
 
169
216
  /**
package/src/plan/plan.js CHANGED
@@ -32,9 +32,10 @@ export function planSql({ query, functions, tables }) {
32
32
  * @param {Map<string, string[]>} [options.cteColumns]
33
33
  * @param {Record<string, AsyncDataSource>} [options.tables]
34
34
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
35
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
35
36
  * @returns {QueryPlan}
36
37
  */
37
- function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
38
+ export function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
38
39
  if (stmt.type === 'with') {
39
40
  // Build CTE plans in order (each CTE can reference preceding CTEs)
40
41
  ctePlans ??= new Map()
@@ -44,12 +45,12 @@ function planStatement({ stmt, ctePlans, cteColumns, tables, parentColumns }) {
44
45
  ctePlans.set(cte.name.toLowerCase(), ctePlan)
45
46
  cteColumns.set(cte.name.toLowerCase(), inferStatementColumns({ stmt: cte.query, cteColumns, tables }))
46
47
  }
47
- return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns })
48
+ return planStatement({ stmt: stmt.query, ctePlans, cteColumns, tables, parentColumns, outerScope })
48
49
  }
49
50
  if (stmt.type === 'compound') {
50
51
  return planSetOperation({ compound: stmt, ctePlans, cteColumns, tables })
51
52
  }
52
- return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns })
53
+ return planSelect({ select: stmt, ctePlans, cteColumns, tables, parentColumns, outerScope })
53
54
  }
54
55
 
55
56
  /**
@@ -100,9 +101,10 @@ function planSetOperation({ compound, ctePlans, cteColumns, tables }) {
100
101
  * @param {Map<string, string[]>} [options.cteColumns]
101
102
  * @param {Record<string, AsyncDataSource>} [options.tables]
102
103
  * @param {IdentifierNode[]} [options.parentColumns] - columns needed by the parent query (for subquery pushdown)
104
+ * @param {string[]} [options.outerScope] - aliases from an outer query (for correlated subqueries)
103
105
  * @returns {QueryPlan}
104
106
  */
105
- function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
107
+ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
106
108
  // Check for aggregation
107
109
  const hasAggregate = select.columns.some(col =>
108
110
  col.type === 'derived' && findAggregate(col.expr)
@@ -114,7 +116,8 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
114
116
  const sourceAlias = fromAlias(select.from)
115
117
 
116
118
  // Resolve aliases (and validate qualified references)
117
- const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table)].map(a => [a, true]))
119
+ // Include outerScope aliases so correlated references pass validation
120
+ const scopeTables = Object.fromEntries([sourceAlias, ...select.joins.map(j => j.alias ?? j.table), ...outerScope ?? []].map(a => [a, true]))
118
121
  /** @type {Map<string, ExprNode>} */
119
122
  const aliases = new Map()
120
123
  const columns = select.columns.map(col => {
@@ -153,6 +156,11 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
153
156
  const hints = {}
154
157
  const perTableColumns = extractColumns({ select, parentColumns })
155
158
  hints.columns = perTableColumns.get(sourceAlias)
159
+ // Empty columns array means no columns were referenced, but a FROM subquery
160
+ // still needs its own columns (e.g. for DISTINCT). Treat empty as unrestricted.
161
+ if (hints.columns?.length === 0 && select.from.type === 'subquery') {
162
+ hints.columns = undefined
163
+ }
156
164
  if (!select.joins.length) {
157
165
  hints.where = select.where
158
166
  if (!needsBuffering && !select.distinct) {
@@ -163,7 +171,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
163
171
 
164
172
  // Start with the data source (FROM clause)
165
173
  /** @type {QueryPlan} */
166
- let plan = planFrom({ select, ctePlans, cteColumns, hints, tables })
174
+ let plan = planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope })
167
175
 
168
176
  // Add JOINs
169
177
  if (select.joins.length) {
@@ -255,9 +263,10 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns }) {
255
263
  * @param {Map<string, string[]>} [options.cteColumns]
256
264
  * @param {ScanOptions} options.hints
257
265
  * @param {Record<string, AsyncDataSource>} [options.tables]
266
+ * @param {string[]} [options.outerScope]
258
267
  * @returns {QueryPlan}
259
268
  */
260
- function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
269
+ function planFrom({ select, ctePlans, cteColumns, hints, tables, outerScope }) {
261
270
  if (select.from.type === 'table') {
262
271
  const ctePlan = ctePlans?.get(select.from.table.toLowerCase())
263
272
  if (ctePlan) {
@@ -271,6 +280,7 @@ function planFrom({ select, ctePlans, cteColumns, hints, tables }) {
271
280
  ctePlans,
272
281
  cteColumns,
273
282
  tables,
283
+ outerScope,
274
284
  parentColumns: hints.columns?.map(name => ({ type: 'identifier', name, positionStart: 0, positionEnd: 0 })),
275
285
  })
276
286
  // Validate that requested columns exist in subquery output
package/src/types.d.ts CHANGED
@@ -40,12 +40,21 @@ export interface ExecuteContext {
40
40
  tables: Record<string, AsyncDataSource>
41
41
  functions?: Record<string, UserDefinedFunction>
42
42
  signal?: AbortSignal
43
+ // current query's FROM + JOIN aliases (e.g. ['a', 'b'])
44
+ scope?: string[]
45
+ // the enclosing query's current row, for resolving correlated references
46
+ outerRow?: AsyncRow
47
+ // aliases from the enclosing query that are valid correlated references
48
+ outerAliases?: Set<string>
43
49
  }
44
50
 
45
51
  // AsyncRow represents a row with async cell values
46
52
  export interface AsyncRow {
47
53
  columns: string[]
48
54
  cells: AsyncCells
55
+ // Optional pre-materialized row values keyed by output column name.
56
+ // When present, consumers can skip the AsyncCell Promise roundtrip.
57
+ resolved?: Record<string, SqlPrimitive>
49
58
  }
50
59
  export type AsyncCells = Record<string, AsyncCell>
51
60
  export type AsyncCell = () => Promise<SqlPrimitive>
@@ -110,9 +119,9 @@ export interface UserDefinedFunction {
110
119
  arguments: FunctionSignature
111
120
  }
112
121
 
113
- export type AggregateFunc = 'COUNT' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE'
122
+ export type AggregateFunc = 'COUNT' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
114
123
 
115
- export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE'
124
+ export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE' | 'REGEXP_MATCHES'
116
125
 
117
126
  export type MathFunc =
118
127
  | 'FLOOR'
@@ -11,7 +11,7 @@ export const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'
11
11
  * @returns {name is AggregateFunc}
12
12
  */
13
13
  export function isAggregateFunc(name) {
14
- return ['COUNT', 'SUM', 'AVG', 'MIN', 'MAX', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE'].includes(name)
14
+ return ['COUNT', 'SUM', 'AVG', 'MIN', 'MAX', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
15
15
  }
16
16
 
17
17
  /**
@@ -31,7 +31,7 @@ export function isMathFunc(name) {
31
31
  * @returns {name is RegExpFunction}
32
32
  */
33
33
  export function isRegexpFunc(name) {
34
- return ['REGEXP_SUBSTR', 'REGEXP_EXTRACT', 'REGEXP_REPLACE'].includes(name)
34
+ return ['REGEXP_SUBSTR', 'REGEXP_EXTRACT', 'REGEXP_REPLACE', 'REGEXP_MATCHES'].includes(name)
35
35
  }
36
36
 
37
37
  /**
@@ -112,6 +112,7 @@ export const FUNCTION_SIGNATURES = {
112
112
  REGEXP_SUBSTR: { min: 2, max: 4, signature: 'string, pattern[, position[, occurrence]]' },
113
113
  REGEXP_EXTRACT: { min: 2, max: 4, signature: 'string, pattern[, position[, occurrence]]' },
114
114
  REGEXP_REPLACE: { min: 3, max: 5, signature: 'string, pattern, replacement[, position[, occurrence]]' },
115
+ REGEXP_MATCHES: { min: 2, max: 2, signature: 'string, pattern' },
115
116
 
116
117
  // Date/time functions
117
118
  RANDOM: { min: 0, max: 0, signature: '' },
@@ -154,6 +155,7 @@ export const FUNCTION_SIGNATURES = {
154
155
  JSON_QUERY: { min: 2, max: 2, signature: 'expression, path' },
155
156
  JSON_EXTRACT: { min: 2, max: 2, signature: 'expression, path' },
156
157
  JSON_OBJECT: { min: 0, signature: 'key1, value1[, ...]' },
158
+ JSON_ARRAY_LENGTH: { min: 1, max: 1, signature: 'array' },
157
159
  JSON_ARRAYAGG: { min: 1, max: 1, signature: 'expression' },
158
160
 
159
161
  // Array functions
@@ -165,6 +167,8 @@ export const FUNCTION_SIGNATURES = {
165
167
  // Conditional functions
166
168
  COALESCE: { min: 1, signature: 'value1, value2[, ...]' },
167
169
  NULLIF: { min: 2, max: 2, signature: 'value1, value2' },
170
+ GREATEST: { min: 1, signature: 'value1[, value2, ...]' },
171
+ LEAST: { min: 1, signature: 'value1[, value2, ...]' },
168
172
 
169
173
  // Aggregate functions
170
174
  COUNT: { min: 1, max: 1, signature: 'expression' },
@@ -177,6 +181,7 @@ export const FUNCTION_SIGNATURES = {
177
181
  MEDIAN: { min: 1, max: 1, signature: 'expression' },
178
182
  PERCENTILE_CONT: { min: 2, max: 2, signature: 'fraction, expression' },
179
183
  APPROX_QUANTILE: { min: 2, max: 2, signature: 'expression, fraction' },
184
+ STRING_AGG: { min: 2, max: 2, signature: 'expression, separator' },
180
185
 
181
186
  // Spatial functions
182
187
  ST_INTERSECTS: { min: 2, max: 2, signature: 'geometry, geometry' },
@@ -1,5 +1,12 @@
1
1
  import { FUNCTION_SIGNATURES } from './functions.js'
2
2
 
3
+ /** Well-known window functions that are not supported */
4
+ const WINDOW_FUNCTIONS = new Set([
5
+ 'ROW_NUMBER', 'RANK', 'DENSE_RANK', 'NTILE',
6
+ 'LAG', 'LEAD', 'FIRST_VALUE', 'LAST_VALUE', 'NTH_VALUE',
7
+ 'CUME_DIST', 'PERCENT_RANK',
8
+ ])
9
+
3
10
  /**
4
11
  * Structured parse error with position range.
5
12
  */
@@ -103,10 +110,16 @@ export class UnknownFunctionError extends ParseError {
103
110
  * @param {number} options.positionEnd
104
111
  */
105
112
  constructor({ funcName, positionStart, positionEnd }) {
106
- const suggestions = suggestFunctions(funcName)
107
- let message = `Unknown function "${funcName}" at position ${positionStart}.`
108
- if (suggestions.length) {
109
- message += ` Did you mean ${suggestions.join(', ')}?`
113
+ const upper = funcName.toUpperCase()
114
+ let message
115
+ if (WINDOW_FUNCTIONS.has(upper)) {
116
+ message = `Window function "${funcName}" is not supported at position ${positionStart}`
117
+ } else {
118
+ const suggestions = suggestFunctions(funcName)
119
+ message = `Unknown function "${funcName}" at position ${positionStart}.`
120
+ if (suggestions.length) {
121
+ message += ` Did you mean ${suggestions.join(', ')}?`
122
+ }
110
123
  }
111
124
  super({ message, positionStart, positionEnd })
112
125
  }