squirreling 0.12.8 → 0.12.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.8",
3
+ "version": "0.12.10",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
package/src/ast.d.ts CHANGED
@@ -106,6 +106,14 @@ export interface FunctionNode extends AstBase {
106
106
  filter?: ExprNode
107
107
  }
108
108
 
109
+ export interface WindowFunctionNode extends AstBase {
110
+ type: 'window'
111
+ funcName: string
112
+ args: ExprNode[]
113
+ partitionBy: ExprNode[]
114
+ orderBy: OrderByItem[]
115
+ }
116
+
109
117
  export type CastType = 'TEXT' | 'STRING' | 'VARCHAR' | 'INTEGER' | 'INT' | 'BIGINT' | 'FLOAT' | 'REAL' | 'DOUBLE' | 'BOOLEAN' | 'BOOL'
110
118
 
111
119
  export interface CastNode extends AstBase {
@@ -166,6 +174,7 @@ export type ExprNode =
166
174
  | UnaryNode
167
175
  | BinaryNode
168
176
  | FunctionNode
177
+ | WindowFunctionNode
169
178
  | CastNode
170
179
  | InSubqueryNode
171
180
  | InValuesNode
@@ -1,6 +1,7 @@
1
1
  import { derivedAlias } from '../expression/alias.js'
2
2
  import { evaluateExpr } from '../expression/evaluate.js'
3
3
  import { executePlan, selectColumnNames } from './execute.js'
4
+ import { sortEntriesByTerms } from './sort.js'
4
5
  import { keyify } from './utils.js'
5
6
 
6
7
  /**
@@ -50,6 +51,22 @@ function projectAggregateColumns(selectColumns, group, context) {
50
51
  return { columns, cells }
51
52
  }
52
53
 
54
+ /**
55
+ * Builds the row visible to post-aggregation expressions such as HAVING and
56
+ * grouped ORDER BY: source group columns plus aggregate output aliases.
57
+ *
58
+ * @param {AsyncRow[]} group
59
+ * @param {AsyncRow} aggregateRow
60
+ * @returns {AsyncRow}
61
+ */
62
+ function aggregateContextRow(group, aggregateRow) {
63
+ const baseRow = group[0] ?? { columns: [], cells: {} }
64
+ return {
65
+ columns: [...baseRow.columns, ...aggregateRow.columns],
66
+ cells: { ...baseRow.cells, ...aggregateRow.cells },
67
+ }
68
+ }
69
+
53
70
  /**
54
71
  * Executes a hash aggregate operation (GROUP BY)
55
72
  *
@@ -85,27 +102,42 @@ export function executeHashAggregate(plan, context) {
85
102
  group.push(row)
86
103
  }
87
104
 
88
- // Yield one row per group
105
+ /** @type {{ row: AsyncRow, group: AsyncRow[], contextRow: AsyncRow }[]} */
106
+ const aggregateRows = []
107
+
89
108
  for (const group of groups.values()) {
90
109
  const asyncRow = projectAggregateColumns(plan.columns, group, context)
110
+ const contextRow = aggregateContextRow(group, asyncRow)
91
111
 
92
112
  // Apply HAVING filter
93
113
  if (plan.having) {
94
- /** @type {AsyncRow} */
95
- const havingRow = {
96
- columns: [...group[0].columns, ...asyncRow.columns],
97
- cells: { ...group[0].cells, ...asyncRow.cells },
98
- }
99
114
  const passes = await evaluateExpr({
100
115
  node: plan.having,
101
- row: havingRow,
116
+ row: contextRow,
102
117
  rows: group,
103
118
  context,
104
119
  })
105
120
  if (!passes) continue
106
121
  }
107
122
 
108
- yield asyncRow
123
+ aggregateRows.push({ row: asyncRow, group, contextRow })
124
+ }
125
+
126
+ if (plan.orderBy?.length) {
127
+ const sortedRows = await sortEntriesByTerms({
128
+ entries: aggregateRows.map((aggregateRow, idx) => ({
129
+ row: aggregateRow.contextRow,
130
+ rows: aggregateRow.group,
131
+ idx,
132
+ })),
133
+ orderBy: plan.orderBy,
134
+ context,
135
+ })
136
+ aggregateRows.splice(0, aggregateRows.length, ...sortedRows.map(({ idx }) => aggregateRows[idx]))
137
+ }
138
+
139
+ for (const { row } of aggregateRows) {
140
+ yield row
109
141
  }
110
142
  },
111
143
  }
@@ -9,6 +9,7 @@ import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
9
9
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
10
10
  import { executeSort } from './sort.js'
11
11
  import { addBounds, minBounds, stableRowKey } from './utils.js'
12
+ import { executeWindow } from './window.js'
12
13
 
13
14
  /**
14
15
  * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, ExecuteSqlOptions, ExprNode, IdentifierNode, QueryResults, SelectColumn, SqlPrimitive, Statement } from '../types.js'
@@ -120,6 +121,8 @@ export function executePlan({ plan, context }) {
120
121
  return executeSetOperation(plan, context)
121
122
  } else if (plan.type === 'TableFunction') {
122
123
  return executeTableFunction(plan, context)
124
+ } else if (plan.type === 'Window') {
125
+ return executeWindow(plan, context)
123
126
  }
124
127
  return { columns: [], async *rows() {} }
125
128
  }
@@ -4,12 +4,117 @@ import { executePlan } from './execute.js'
4
4
  import { compareForTerm } from './utils.js'
5
5
 
6
6
  /**
7
- * @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
7
+ * @import { AsyncRow, ExecuteContext, OrderByItem, QueryResults, SqlPrimitive } from '../types.js'
8
8
  * @import { SortNode } from '../plan/types.js'
9
9
  */
10
10
 
11
11
  const MAX_CHUNK = 256
12
12
 
13
+ /**
14
+ * @typedef {{
15
+ * row: AsyncRow,
16
+ * rows?: AsyncRow[],
17
+ * }} SortEntry
18
+ */
19
+
20
+ /**
21
+ * Sorts rows by ORDER BY terms while evaluating async sort keys in concurrent
22
+ * chunks and delaying later terms until earlier terms tie.
23
+ *
24
+ * @template {SortEntry} T
25
+ * @param {{
26
+ * entries: T[],
27
+ * orderBy: OrderByItem[],
28
+ * context: ExecuteContext,
29
+ * cacheValues?: boolean,
30
+ * }} options
31
+ * @returns {Promise<T[]>}
32
+ */
33
+ export async function sortEntriesByTerms({ entries, orderBy, context, cacheValues = false }) {
34
+ if (entries.length === 0) return []
35
+
36
+ /** @type {(SqlPrimitive | undefined)[][]} */
37
+ const evaluatedValues = entries.map(() => Array(orderBy.length))
38
+
39
+ /** @type {number[][]} */
40
+ let groups = [entries.map((_, i) => i)]
41
+
42
+ for (let orderByIdx = 0; orderByIdx < orderBy.length; orderByIdx++) {
43
+ const term = orderBy[orderByIdx]
44
+ /** @type {number[][]} */
45
+ const nextGroups = []
46
+
47
+ for (const group of groups) {
48
+ if (group.length <= 1) {
49
+ nextGroups.push(group)
50
+ continue
51
+ }
52
+
53
+ const alias = derivedAlias(term.expr)
54
+ /** @type {number[]} */
55
+ const missing = []
56
+ for (const idx of group) {
57
+ if (evaluatedValues[idx][orderByIdx] === undefined) missing.push(idx)
58
+ }
59
+ let chunkSize = 1
60
+ let start = 0
61
+ while (start < missing.length) {
62
+ if (context.signal?.aborted) return []
63
+ const chunk = missing.slice(start, start + chunkSize)
64
+ const values = await Promise.all(chunk.map(idx =>
65
+ evaluateExpr({
66
+ node: term.expr,
67
+ row: entries[idx].row,
68
+ rows: entries[idx].rows,
69
+ context,
70
+ })
71
+ ))
72
+ for (let i = 0; i < chunk.length; i++) {
73
+ const idx = chunk[i]
74
+ const value = values[i]
75
+ evaluatedValues[idx][orderByIdx] = value
76
+ if (cacheValues && !(alias in entries[idx].row.cells)) {
77
+ entries[idx].row.cells[alias] = () => Promise.resolve(value)
78
+ }
79
+ }
80
+ start += chunk.length
81
+ chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
82
+ }
83
+
84
+ group.sort((aIdx, bIdx) => {
85
+ const av = evaluatedValues[aIdx][orderByIdx]
86
+ const bv = evaluatedValues[bIdx][orderByIdx]
87
+ return compareForTerm(av, bv, term)
88
+ })
89
+
90
+ if (orderByIdx < orderBy.length - 1) {
91
+ /** @type {number[]} */
92
+ let currentSubGroup = [group[0]]
93
+ for (let i = 1; i < group.length; i++) {
94
+ const prevIdx = group[i - 1]
95
+ const currIdx = group[i]
96
+ const prevVal = evaluatedValues[prevIdx][orderByIdx]
97
+ const currVal = evaluatedValues[currIdx][orderByIdx]
98
+
99
+ if (compareForTerm(prevVal, currVal, term) === 0) {
100
+ currentSubGroup.push(currIdx)
101
+ } else {
102
+ nextGroups.push(currentSubGroup)
103
+ currentSubGroup = [currIdx]
104
+ }
105
+ }
106
+ nextGroups.push(currentSubGroup)
107
+ } else {
108
+ nextGroups.push(group)
109
+ }
110
+ }
111
+
112
+ groups = nextGroups
113
+ }
114
+
115
+ return groups.flat().map(idx => entries[idx])
116
+ }
117
+
13
118
  /**
14
119
  * Executes a sort operation (ORDER BY)
15
120
  *
@@ -32,92 +137,16 @@ export function executeSort(plan, context) {
32
137
  rows.push(row)
33
138
  }
34
139
 
35
- if (rows.length === 0) return
36
-
37
- // Multi-pass lazy sorting
38
- /** @type {(SqlPrimitive | undefined)[][]} */
39
- const evaluatedValues = rows.map(() => Array(plan.orderBy.length))
40
-
41
- /** @type {number[][]} */
42
- let groups = [rows.map((_, i) => i)]
43
-
44
- for (let orderByIdx = 0; orderByIdx < plan.orderBy.length; orderByIdx++) {
45
- const term = plan.orderBy[orderByIdx]
46
- /** @type {number[][]} */
47
- const nextGroups = []
48
-
49
- for (const group of groups) {
50
- if (group.length <= 1) {
51
- nextGroups.push(group)
52
- continue
53
- }
54
-
55
- // Evaluate this column for all rows in the group, in parallel
56
- // chunks that double up to MAX_CHUNK so a slow UDF doesn't serialize.
57
- // Cache each value back into the row so downstream projection can
58
- // reuse it instead of re-invoking the expression.
59
- const alias = derivedAlias(term.expr)
60
- /** @type {number[]} */
61
- const missing = []
62
- for (const idx of group) {
63
- if (evaluatedValues[idx][orderByIdx] === undefined) missing.push(idx)
64
- }
65
- let chunkSize = 1
66
- let start = 0
67
- while (start < missing.length) {
68
- if (context.signal?.aborted) return
69
- const chunk = missing.slice(start, start + chunkSize)
70
- const values = await Promise.all(chunk.map(idx =>
71
- evaluateExpr({ node: term.expr, row: rows[idx], context })
72
- ))
73
- for (let i = 0; i < chunk.length; i++) {
74
- const idx = chunk[i]
75
- const value = values[i]
76
- evaluatedValues[idx][orderByIdx] = value
77
- if (!(alias in rows[idx].cells)) {
78
- rows[idx].cells[alias] = () => Promise.resolve(value)
79
- }
80
- }
81
- start += chunk.length
82
- chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
83
- }
84
-
85
- // Sort the group by this column
86
- group.sort((aIdx, bIdx) => {
87
- const av = evaluatedValues[aIdx][orderByIdx]
88
- const bv = evaluatedValues[bIdx][orderByIdx]
89
- return compareForTerm(av, bv, term)
90
- })
91
-
92
- // Split into sub-groups based on ties
93
- if (orderByIdx < plan.orderBy.length - 1) {
94
- /** @type {number[]} */
95
- let currentSubGroup = [group[0]]
96
- for (let i = 1; i < group.length; i++) {
97
- const prevIdx = group[i - 1]
98
- const currIdx = group[i]
99
- const prevVal = evaluatedValues[prevIdx][orderByIdx]
100
- const currVal = evaluatedValues[currIdx][orderByIdx]
101
-
102
- if (compareForTerm(prevVal, currVal, term) === 0) {
103
- currentSubGroup.push(currIdx)
104
- } else {
105
- nextGroups.push(currentSubGroup)
106
- currentSubGroup = [currIdx]
107
- }
108
- }
109
- nextGroups.push(currentSubGroup)
110
- } else {
111
- nextGroups.push(group)
112
- }
113
- }
114
-
115
- groups = nextGroups
116
- }
140
+ const sortedRows = await sortEntriesByTerms({
141
+ entries: rows.map(row => ({ row })),
142
+ orderBy: plan.orderBy,
143
+ context,
144
+ cacheValues: true,
145
+ })
117
146
 
118
147
  // Yield sorted rows
119
- for (const idx of groups.flat()) {
120
- yield rows[idx]
148
+ for (const { row } of sortedRows) {
149
+ yield row
121
150
  }
122
151
  },
123
152
  }
@@ -121,6 +121,19 @@ export function maxBounds(a, b) {
121
121
  return a ?? b
122
122
  }
123
123
 
124
+ /**
125
+ * Returns true for plain object SqlPrimitive values, excluding null, arrays, and Dates.
126
+ *
127
+ * @param {SqlPrimitive} value
128
+ * @returns {value is Record<string, SqlPrimitive>}
129
+ */
130
+ export function isPlainObject(value) {
131
+ return value != null
132
+ && typeof value === 'object'
133
+ && !Array.isArray(value)
134
+ && !(value instanceof Date)
135
+ }
136
+
124
137
  /**
125
138
  * @param {SqlPrimitive} value
126
139
  * @returns {string}
@@ -0,0 +1,154 @@
1
+ import { evaluateExpr } from '../expression/evaluate.js'
2
+ import { executePlan } from './execute.js'
3
+ import { compareForTerm, keyify } from './utils.js'
4
+
5
+ /**
6
+ * @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
7
+ * @import { WindowNode, WindowSpec } from '../plan/types.js'
8
+ */
9
+
10
+ /**
11
+ * Executes a Window plan node: buffers the child's rows, assigns each window
12
+ * function's output per partition, and yields rows in input order with the
13
+ * synthetic window cells attached.
14
+ *
15
+ * @param {WindowNode} plan
16
+ * @param {ExecuteContext} context
17
+ * @returns {QueryResults}
18
+ */
19
+ export function executeWindow(plan, context) {
20
+ const child = executePlan({ plan: plan.child, context })
21
+ const extraColumns = plan.windows.map(w => w.alias)
22
+
23
+ // Streaming fast path: every window is OVER () with no partition/order, so
24
+ // each row's output depends only on its position in the input stream. Avoids
25
+ // buffering — critical for large scans (e.g. parquet).
26
+ const streamable = plan.windows.every(w => w.partitionBy.length === 0 && w.orderBy.length === 0)
27
+
28
+ if (streamable) {
29
+ return {
30
+ columns: [...child.columns, ...extraColumns],
31
+ numRows: child.numRows,
32
+ maxRows: child.maxRows,
33
+ async *rows() {
34
+ let i = 0
35
+ for await (const row of child.rows()) {
36
+ if (context.signal?.aborted) return
37
+ i++
38
+ const cells = { ...row.cells }
39
+ for (const w of plan.windows) {
40
+ const value = assignRowNumber(w.funcName, i - 1)
41
+ cells[w.alias] = () => Promise.resolve(value)
42
+ }
43
+ yield {
44
+ columns: [...row.columns, ...extraColumns],
45
+ cells,
46
+ }
47
+ }
48
+ },
49
+ }
50
+ }
51
+
52
+ return {
53
+ columns: [...child.columns, ...extraColumns],
54
+ numRows: child.numRows,
55
+ maxRows: child.maxRows,
56
+ async *rows() {
57
+ /** @type {AsyncRow[]} */
58
+ const rows = []
59
+ for await (const row of child.rows()) {
60
+ if (context.signal?.aborted) return
61
+ rows.push(row)
62
+ }
63
+ if (rows.length === 0) return
64
+
65
+ // One SqlPrimitive per window spec per row, indexed by row input position.
66
+ /** @type {SqlPrimitive[][]} */
67
+ const windowValues = plan.windows.map(() => new Array(rows.length))
68
+
69
+ for (let w = 0; w < plan.windows.length; w++) {
70
+ await computeWindow(plan.windows[w], rows, windowValues[w], context)
71
+ if (context.signal?.aborted) return
72
+ }
73
+
74
+ for (let i = 0; i < rows.length; i++) {
75
+ if (context.signal?.aborted) return
76
+ const row = rows[i]
77
+ const cells = { ...row.cells }
78
+ for (let w = 0; w < plan.windows.length; w++) {
79
+ const { alias } = plan.windows[w]
80
+ const value = windowValues[w][i]
81
+ cells[alias] = () => Promise.resolve(value)
82
+ }
83
+ yield {
84
+ columns: [...row.columns, ...extraColumns],
85
+ cells,
86
+ }
87
+ }
88
+ },
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Computes a single window function across all rows, writing the per-row
94
+ * output values into `output`.
95
+ *
96
+ * @param {WindowSpec} spec
97
+ * @param {AsyncRow[]} rows
98
+ * @param {SqlPrimitive[]} output
99
+ * @param {ExecuteContext} context
100
+ */
101
+ async function computeWindow(spec, rows, output, context) {
102
+ // Bucket row indices by partition key.
103
+ /** @type {Map<string | number | bigint | boolean, number[]>} */
104
+ const partitions = new Map()
105
+ const partitionKeys = await Promise.all(rows.map(row =>
106
+ Promise.all(spec.partitionBy.map(expr => evaluateExpr({ node: expr, row, context })))
107
+ ))
108
+ for (let i = 0; i < rows.length; i++) {
109
+ const key = keyify(...partitionKeys[i])
110
+ let bucket = partitions.get(key)
111
+ if (!bucket) {
112
+ bucket = []
113
+ partitions.set(key, bucket)
114
+ }
115
+ bucket.push(i)
116
+ }
117
+
118
+ for (const bucket of partitions.values()) {
119
+ if (context.signal?.aborted) return
120
+
121
+ // Order within the partition. Empty ORDER BY → input order.
122
+ if (spec.orderBy.length) {
123
+ const orderValues = await Promise.all(bucket.map(idx =>
124
+ Promise.all(spec.orderBy.map(term => evaluateExpr({ node: term.expr, row: rows[idx], context })))
125
+ ))
126
+ /** @type {{ idx: number, values: SqlPrimitive[], pos: number }[]} */
127
+ const entries = bucket.map((idx, k) => ({ idx, values: orderValues[k], pos: k }))
128
+ entries.sort((a, b) => {
129
+ for (let i = 0; i < spec.orderBy.length; i++) {
130
+ const cmp = compareForTerm(a.values[i], b.values[i], spec.orderBy[i])
131
+ if (cmp !== 0) return cmp
132
+ }
133
+ return a.pos - b.pos
134
+ })
135
+ for (let k = 0; k < entries.length; k++) {
136
+ output[entries[k].idx] = assignRowNumber(spec.funcName, k)
137
+ }
138
+ } else {
139
+ for (let k = 0; k < bucket.length; k++) {
140
+ output[bucket[k]] = assignRowNumber(spec.funcName, k)
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ /**
147
+ * @param {string} funcName
148
+ * @param {number} rank - 0-based rank within the partition
149
+ * @returns {SqlPrimitive}
150
+ */
151
+ function assignRowNumber(funcName, rank) {
152
+ if (funcName === 'ROW_NUMBER') return rank + 1
153
+ throw new Error(`Unsupported window function: ${funcName}`)
154
+ }
@@ -31,6 +31,9 @@ export function derivedAlias(expr) {
31
31
  }
32
32
  return expr.funcName.toLowerCase() + '_' + expr.args.map(derivedAlias).join('_')
33
33
  }
34
+ if (expr.type === 'window') {
35
+ return expr.funcName.toLowerCase()
36
+ }
34
37
  if (expr.type === 'interval') {
35
38
  return `interval_${expr.value}_${expr.unit.toLowerCase()}`
36
39
  }
@@ -1,5 +1,5 @@
1
1
  import { executeStatement } from '../execute/execute.js'
2
- import { keyify, stringify } from '../execute/utils.js'
2
+ import { isPlainObject, keyify, stringify } from '../execute/utils.js'
3
3
  import { ArgValueError, ExecutionError } from '../validation/executionErrors.js'
4
4
  import { isAggregateFunc, isMathFunc, isRegexpFunc, isSpatialFunc, isStringFunc } from '../validation/functions.js'
5
5
  import { UnknownFunctionError } from '../validation/parseErrors.js'
@@ -39,6 +39,14 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
39
39
  if (qualified in row.cells) {
40
40
  return row.cells[qualified]()
41
41
  }
42
+ const prefix = node.prefix + '.'
43
+ const prefixedColumns = row.columns.filter(col => col.startsWith(prefix))
44
+ if (prefixedColumns.length === 1) {
45
+ const value = await row.cells[prefixedColumns[0]]()
46
+ if (isPlainObject(value) && Object.prototype.hasOwnProperty.call(value, node.name)) {
47
+ return value[node.name]
48
+ }
49
+ }
42
50
  // Check outer row for correlated subquery references
43
51
  if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
44
52
  return context.outerRow.cells[node.name]()
@@ -473,6 +481,23 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
473
481
  if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY') {
474
482
  const arr = args[0]
475
483
  if (!Array.isArray(arr)) return null
484
+ if (funcName === 'ARRAY_LENGTH' && args.length === 2) {
485
+ const dim = args[1]
486
+ if (typeof dim !== 'number' && typeof dim !== 'bigint') return null
487
+ const d = Number(dim)
488
+ if (!Number.isInteger(d) || d < 1) return null
489
+ let level = arr
490
+ for (let i = 1; i < d; i++) {
491
+ if (!Array.isArray(level) || level.length === 0) return null
492
+ const first = level[0]
493
+ if (!Array.isArray(first)) return null
494
+ for (const item of level) {
495
+ if (!Array.isArray(item) || item.length !== first.length) return null
496
+ }
497
+ level = first
498
+ }
499
+ return level.length
500
+ }
476
501
  return arr.length
477
502
  }
478
503
 
@@ -1,10 +1,10 @@
1
- import { isAggregateFunc, isKnownFunction, niladicFuncs, validateFunctionArgs } from '../validation/functions.js'
1
+ import { isAggregateFunc, isKnownFunction, isWindowFunc, niladicFuncs, validateFunctionArgs } from '../validation/functions.js'
2
2
  import { ParseError, UnknownFunctionError } from '../validation/parseErrors.js'
3
3
  import { parseExpression } from './expression.js'
4
4
  import { consume, current, expect, match } from './state.js'
5
5
 
6
6
  /**
7
- * @import { ExprNode, ParserState } from '../types.js'
7
+ * @import { ExprNode, OrderByItem, ParserState } from '../types.js'
8
8
  */
9
9
 
10
10
  /**
@@ -128,13 +128,43 @@ export function parseFunctionCall(state, positionStart) {
128
128
  expect(state, 'paren', ')')
129
129
  }
130
130
 
131
- // Check for OVER clause (window functions not supported)
131
+ // Check for OVER clause
132
132
  const overTok = current(state)
133
- if (overTok.type === 'identifier' && overTok.value.toUpperCase() === 'OVER') {
133
+ const hasOver = overTok.type === 'identifier' && overTok.value.toUpperCase() === 'OVER'
134
+
135
+ if (hasOver) {
136
+ if (!isWindowFunc(funcNameUpper)) {
137
+ throw new ParseError({
138
+ message: `Window functions are not supported: ${funcName}(...) OVER (...)`,
139
+ positionStart,
140
+ positionEnd: overTok.positionEnd,
141
+ })
142
+ }
143
+ if (filter) {
144
+ throw new ParseError({
145
+ message: `FILTER cannot be combined with OVER for "${funcName}"`,
146
+ positionStart,
147
+ positionEnd: overTok.positionEnd,
148
+ })
149
+ }
150
+ consume(state)
151
+ const { partitionBy, orderBy } = parseWindowSpec(state, positionStart)
152
+ return {
153
+ type: 'window',
154
+ funcName,
155
+ args,
156
+ partitionBy,
157
+ orderBy,
158
+ positionStart,
159
+ positionEnd: state.lastPos,
160
+ }
161
+ }
162
+
163
+ if (isWindowFunc(funcNameUpper)) {
134
164
  throw new ParseError({
135
- message: `Window functions are not supported: ${funcName}(...) OVER (...)`,
165
+ message: `${funcName}() requires an OVER clause at position ${positionStart}`,
136
166
  positionStart,
137
- positionEnd: overTok.positionEnd,
167
+ positionEnd: state.lastPos,
138
168
  })
139
169
  }
140
170
 
@@ -148,3 +178,64 @@ export function parseFunctionCall(state, positionStart) {
148
178
  positionEnd: state.lastPos,
149
179
  }
150
180
  }
181
+
182
+ /**
183
+ * Parses the window spec after OVER: ( [PARTITION BY expr[, ...]] [ORDER BY expr [ASC|DESC] [NULLS FIRST|LAST][, ...]] )
184
+ *
185
+ * @param {ParserState} state
186
+ * @param {number} positionStart - start position of the enclosing function call (for OrderByItem positions)
187
+ * @returns {{ partitionBy: ExprNode[], orderBy: OrderByItem[] }}
188
+ */
189
+ function parseWindowSpec(state, positionStart) {
190
+ expect(state, 'paren', '(')
191
+ /** @type {ExprNode[]} */
192
+ const partitionBy = []
193
+ /** @type {OrderByItem[]} */
194
+ const orderBy = []
195
+
196
+ const partitionTok = current(state)
197
+ if (partitionTok.type === 'identifier' && partitionTok.value.toUpperCase() === 'PARTITION') {
198
+ consume(state)
199
+ expect(state, 'keyword', 'BY')
200
+ while (true) {
201
+ partitionBy.push(parseExpression(state))
202
+ if (!match(state, 'comma')) break
203
+ }
204
+ }
205
+
206
+ if (match(state, 'keyword', 'ORDER')) {
207
+ expect(state, 'keyword', 'BY')
208
+ while (true) {
209
+ const expr = parseExpression(state)
210
+ /** @type {'ASC' | 'DESC'} */
211
+ let direction = 'ASC'
212
+ if (match(state, 'keyword', 'ASC')) {
213
+ direction = 'ASC'
214
+ } else if (match(state, 'keyword', 'DESC')) {
215
+ direction = 'DESC'
216
+ }
217
+ /** @type {'FIRST' | 'LAST' | undefined} */
218
+ let nulls
219
+ if (match(state, 'keyword', 'NULLS')) {
220
+ const tok = consume(state)
221
+ const upper = tok.value.toUpperCase()
222
+ if (tok.type === 'identifier' && upper === 'FIRST') {
223
+ nulls = 'FIRST'
224
+ } else if (tok.type === 'identifier' && upper === 'LAST') {
225
+ nulls = 'LAST'
226
+ } else {
227
+ throw new ParseError({
228
+ message: `Expected FIRST or LAST after NULLS at position ${tok.positionStart}`,
229
+ positionStart: tok.positionStart,
230
+ positionEnd: tok.positionEnd,
231
+ })
232
+ }
233
+ }
234
+ orderBy.push({ expr, direction, nulls, positionStart, positionEnd: state.lastPos })
235
+ if (!match(state, 'comma')) break
236
+ }
237
+ }
238
+
239
+ expect(state, 'paren', ')')
240
+ return { partitionBy, orderBy }
241
+ }
@@ -157,6 +157,16 @@ export function parsePrimary(state) {
157
157
  if (match(state, 'dot')) {
158
158
  prefix = name
159
159
  name = expect(state, 'identifier').value
160
+ } else if (match(state, 'bracket', '[')) {
161
+ // table['column'] — string subscript is equivalent to dot access
162
+ const fieldTok = current(state)
163
+ if (fieldTok.type !== 'string') {
164
+ throw parseError(state, 'string literal')
165
+ }
166
+ consume(state)
167
+ expect(state, 'bracket', ']')
168
+ prefix = name
169
+ name = fieldTok.value
160
170
  }
161
171
 
162
172
  return {
@@ -211,6 +211,10 @@ function collectColumnsFromExpr(expr, columns, aliases) {
211
211
  collectColumnsFromExpr(arg, columns, aliases)
212
212
  }
213
213
  collectColumnsFromExpr(expr.filter, columns, aliases)
214
+ } else if (expr.type === 'window') {
215
+ for (const arg of expr.args) collectColumnsFromExpr(arg, columns, aliases)
216
+ for (const p of expr.partitionBy) collectColumnsFromExpr(p, columns, aliases)
217
+ for (const o of expr.orderBy) collectColumnsFromExpr(o.expr, columns, aliases)
214
218
  } else if (expr.type === 'cast') {
215
219
  collectColumnsFromExpr(expr.expr, columns, aliases)
216
220
  } else if (expr.type === 'in valuelist') {
package/src/plan/plan.js CHANGED
@@ -1,13 +1,14 @@
1
1
  import { derivedAlias } from '../expression/alias.js'
2
2
  import { parseSql } from '../parse/parse.js'
3
3
  import { findAggregate } from '../validation/aggregates.js'
4
+ import { ParseError } from '../validation/parseErrors.js'
4
5
  import { ColumnNotFoundError, TableNotFoundError } from '../validation/tables.js'
5
6
  import { validateNoIdentifiers, validateScan, validateTableRefs } from '../validation/tables.js'
6
7
  import { extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, tableFunctionColumnNames } from './columns.js'
7
8
 
8
9
  /**
9
- * @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement } from '../types.js'
10
- * @import { QueryPlan } from './types.d.ts'
10
+ * @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, OrderByItem, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement, WindowFunctionNode } from '../types.js'
11
+ * @import { QueryPlan, WindowSpec } from './types.d.ts'
11
12
  */
12
13
 
13
14
  /**
@@ -106,12 +107,51 @@ function planSetOperation({ compound, ctePlans, cteColumns, tables, parentColumn
106
107
  * @returns {QueryPlan}
107
108
  */
108
109
  function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outerScope }) {
110
+ // Reject window functions in clauses where they're not permitted.
111
+ expectNoWindowFunction(select.where, 'WHERE')
112
+ expectNoWindowFunction(select.having, 'HAVING')
113
+ for (const expr of select.groupBy) expectNoWindowFunction(expr, 'GROUP BY')
114
+ for (const term of select.orderBy) expectNoWindowFunction(term.expr, 'ORDER BY')
115
+ for (const join of select.joins) expectNoWindowFunction(join.on, 'JOIN ON')
116
+
117
+ // Collect window functions from SELECT columns and rewrite them to identifiers
118
+ // pointing at the synthetic cells produced by the Window plan node.
119
+ /** @type {WindowSpec[]} */
120
+ const windows = []
121
+ const windowColumns = select.columns.map(col => {
122
+ if (col.type !== 'derived') return col
123
+ const originalAlias = col.alias ?? derivedAlias(col.expr)
124
+ const expr = collectWindows(col.expr, windows)
125
+ if (expr === col.expr) return col
126
+ return { ...col, expr, alias: originalAlias }
127
+ })
128
+
129
+ if (windows.length && select.columns.some(col => col.type === 'derived' && findAggregate(col.expr))) {
130
+ throw new ParseError({
131
+ message: 'Window functions are not supported in queries with aggregation',
132
+ ...select,
133
+ })
134
+ }
135
+ if (windows.length && select.groupBy.length) {
136
+ throw new ParseError({
137
+ message: 'Window functions are not supported in queries with aggregation',
138
+ ...select,
139
+ })
140
+ }
141
+
142
+ // Preserve the pre-substitution columns for column-extraction, so synthetic
143
+ // `__window_N` identifiers are not requested from the data source.
144
+ const originalSelect = select
145
+ select = { ...select, columns: windowColumns }
146
+
109
147
  // Check for aggregation
110
148
  const hasAggregate = select.columns.some(col =>
111
149
  col.type === 'derived' && findAggregate(col.expr)
112
150
  )
113
151
  const useGrouping = hasAggregate || select.groupBy.length > 0
114
- const needsBuffering = useGrouping || select.orderBy.length > 0
152
+ // Windows with PARTITION BY or ORDER BY buffer; `OVER ()` streams.
153
+ const bufferingWindows = windows.some(w => w.partitionBy.length > 0 || w.orderBy.length > 0)
154
+ const needsBuffering = useGrouping || select.orderBy.length > 0 || bufferingWindows
115
155
 
116
156
  // Source alias for FROM clause
117
157
  const sourceAlias = fromAlias(select.from)
@@ -137,6 +177,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
137
177
  }
138
178
  return col
139
179
  })
180
+ const orderBy = resolveOrderByAliases(select.orderBy, aliases)
140
181
 
141
182
  // Validate qualified references in other clauses
142
183
  validateTableRefs(select.where, scopeTables)
@@ -155,7 +196,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
155
196
  // included so they are only applied to fresh scans, not CTE/subquery plans)
156
197
  /** @type {ScanOptions} */
157
198
  const hints = {}
158
- const perTableColumns = extractColumns({ select, parentColumns })
199
+ const perTableColumns = extractColumns({ select: originalSelect, parentColumns })
159
200
  hints.columns = perTableColumns.get(sourceAlias)
160
201
  // Empty columns array means no columns were referenced, but a FROM subquery
161
202
  // still needs its own columns (e.g. for DISTINCT). Treat empty as unrestricted.
@@ -195,7 +236,16 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
195
236
  const groupBy = aliases.size > 0
196
237
  ? select.groupBy.map(expr => resolveAliases(expr, aliases))
197
238
  : select.groupBy
198
- plan = { type: 'HashAggregate', groupBy, columns, having: select.having, child: plan }
239
+ /** @type {QueryPlan} */
240
+ const aggregatePlan = {
241
+ type: 'HashAggregate',
242
+ groupBy,
243
+ columns,
244
+ having: select.having,
245
+ child: plan,
246
+ }
247
+ if (orderBy.length) aggregatePlan.orderBy = orderBy
248
+ plan = aggregatePlan
199
249
  } else if (!select.having && !select.where && plan.type === 'Scan' && isOwnScan && isAllCountStar(select.columns)) {
200
250
  plan = { type: 'Count', table: plan.table, columns: select.columns }
201
251
  } else {
@@ -203,8 +253,8 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
203
253
  }
204
254
 
205
255
  // ORDER BY (after aggregation)
206
- if (select.orderBy.length) {
207
- plan = { type: 'Sort', orderBy: select.orderBy, child: plan }
256
+ if (orderBy.length && !select.groupBy.length) {
257
+ plan = { type: 'Sort', orderBy, child: plan }
208
258
  }
209
259
 
210
260
  // DISTINCT
@@ -219,12 +269,15 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
219
269
  } else {
220
270
  // Non-aggregation path
221
271
 
272
+ // Window functions: insert before Sort so outer ORDER BY can reference
273
+ // the window output aliases.
274
+ if (windows.length) {
275
+ plan = { type: 'Window', windows, child: plan }
276
+ }
277
+
222
278
  // ORDER BY (before projection so it can access all columns)
223
279
  // Resolve SELECT aliases in ORDER BY expressions at plan time
224
- if (select.orderBy.length) {
225
- const orderBy = aliases.size > 0
226
- ? select.orderBy.map(term => ({ ...term, expr: resolveAliases(term.expr, aliases) }))
227
- : select.orderBy
280
+ if (orderBy.length) {
228
281
  plan = { type: 'Sort', orderBy, child: plan }
229
282
  }
230
283
 
@@ -423,6 +476,19 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
423
476
  return plan
424
477
  }
425
478
 
479
+ /**
480
+ * Recursively replaces identifier nodes in ORDER BY terms that match SELECT
481
+ * aliases with their aliased expressions.
482
+ *
483
+ * @param {OrderByItem[]} orderBy
484
+ * @param {Map<string, ExprNode>} aliases
485
+ * @returns {OrderByItem[]}
486
+ */
487
+ function resolveOrderByAliases(orderBy, aliases) {
488
+ if (!aliases.size) return orderBy
489
+ return orderBy.map(term => ({ ...term, expr: resolveAliases(term.expr, aliases) }))
490
+ }
491
+
426
492
  /**
427
493
  * Recursively replaces identifier nodes that match SELECT aliases
428
494
  * with their aliased expressions.
@@ -446,7 +512,8 @@ function resolveAliases(node, aliases) {
446
512
  }
447
513
  if (node.type === 'function') {
448
514
  const args = node.args.map(arg => resolveAliases(arg, aliases))
449
- return { ...node, args }
515
+ if (!node.filter) return { ...node, args }
516
+ return { ...node, args, filter: resolveAliases(node.filter, aliases) }
450
517
  }
451
518
  if (node.type === 'cast') {
452
519
  return { ...node, expr: resolveAliases(node.expr, aliases) }
@@ -609,6 +676,126 @@ function validateLateralSubqueries({ expr, ctePlans, cteColumns, tables, outerSc
609
676
  }
610
677
  }
611
678
 
679
+ /**
680
+ * Walks an expression, replacing every window function subnode with an
681
+ * identifier that points at a synthetic `__window_N` cell. The collected
682
+ * WindowSpec entries drive the Window plan node. Returns the same node
683
+ * reference when no window function is present, so untouched expressions
684
+ * aren't shallow-cloned.
685
+ *
686
+ * @param {ExprNode} expr
687
+ * @param {WindowSpec[]} windows
688
+ * @returns {ExprNode}
689
+ */
690
+ function collectWindows(expr, windows) {
691
+ if (!expr || !findWindow(expr)) return expr
692
+ if (expr.type === 'window') {
693
+ const alias = `__window_${windows.length}`
694
+ windows.push({
695
+ alias,
696
+ funcName: expr.funcName.toUpperCase(),
697
+ args: expr.args,
698
+ partitionBy: expr.partitionBy,
699
+ orderBy: expr.orderBy,
700
+ })
701
+ return {
702
+ type: 'identifier',
703
+ name: alias,
704
+ positionStart: expr.positionStart,
705
+ positionEnd: expr.positionEnd,
706
+ }
707
+ }
708
+ if (expr.type === 'unary') {
709
+ return { ...expr, argument: collectWindows(expr.argument, windows) }
710
+ }
711
+ if (expr.type === 'binary') {
712
+ return { ...expr, left: collectWindows(expr.left, windows), right: collectWindows(expr.right, windows) }
713
+ }
714
+ if (expr.type === 'function') {
715
+ return { ...expr, args: expr.args.map(a => collectWindows(a, windows)) }
716
+ }
717
+ if (expr.type === 'cast') {
718
+ return { ...expr, expr: collectWindows(expr.expr, windows) }
719
+ }
720
+ if (expr.type === 'in valuelist') {
721
+ return {
722
+ ...expr,
723
+ expr: collectWindows(expr.expr, windows),
724
+ values: expr.values.map(v => collectWindows(v, windows)),
725
+ }
726
+ }
727
+ if (expr.type === 'case') {
728
+ return {
729
+ ...expr,
730
+ caseExpr: expr.caseExpr && collectWindows(expr.caseExpr, windows),
731
+ whenClauses: expr.whenClauses.map(w => ({
732
+ ...w,
733
+ condition: collectWindows(w.condition, windows),
734
+ result: collectWindows(w.result, windows),
735
+ })),
736
+ elseResult: expr.elseResult && collectWindows(expr.elseResult, windows),
737
+ }
738
+ }
739
+ return expr
740
+ }
741
+
742
+ /**
743
+ * Throws if the expression tree contains a window function.
744
+ *
745
+ * @param {ExprNode | undefined} expr
746
+ * @param {string} clause
747
+ */
748
+ function expectNoWindowFunction(expr, clause) {
749
+ const win = findWindow(expr)
750
+ if (win) {
751
+ throw new ParseError({
752
+ message: `Window function ${win.funcName.toUpperCase()} is not allowed in ${clause} clause`,
753
+ positionStart: win.positionStart,
754
+ positionEnd: win.positionEnd,
755
+ })
756
+ }
757
+ }
758
+
759
+ /**
760
+ * @param {ExprNode | undefined} expr
761
+ * @returns {WindowFunctionNode | undefined}
762
+ */
763
+ function findWindow(expr) {
764
+ if (!expr) return undefined
765
+ if (expr.type === 'window') return expr
766
+ if (expr.type === 'binary') return findWindow(expr.left) || findWindow(expr.right)
767
+ if (expr.type === 'unary') return findWindow(expr.argument)
768
+ if (expr.type === 'function') {
769
+ for (const arg of expr.args) {
770
+ const found = findWindow(arg)
771
+ if (found) return found
772
+ }
773
+ return undefined
774
+ }
775
+ if (expr.type === 'cast') return findWindow(expr.expr)
776
+ if (expr.type === 'in valuelist') {
777
+ const found = findWindow(expr.expr)
778
+ if (found) return found
779
+ for (const val of expr.values) {
780
+ const f = findWindow(val)
781
+ if (f) return f
782
+ }
783
+ return undefined
784
+ }
785
+ if (expr.type === 'case') {
786
+ if (expr.caseExpr) {
787
+ const f = findWindow(expr.caseExpr)
788
+ if (f) return f
789
+ }
790
+ for (const w of expr.whenClauses) {
791
+ const f = findWindow(w.condition) || findWindow(w.result)
792
+ if (f) return f
793
+ }
794
+ if (expr.elseResult) return findWindow(expr.elseResult)
795
+ }
796
+ return undefined
797
+ }
798
+
612
799
  /**
613
800
  * Checks if every SELECT column is a plain COUNT(*).
614
801
  *
@@ -15,6 +15,7 @@ export type QueryPlan =
15
15
  | PositionalJoinNode
16
16
  | SetOperationNode
17
17
  | TableFunctionNode
18
+ | WindowNode
18
19
 
19
20
  // Scan node
20
21
  export interface ScanNode {
@@ -66,6 +67,7 @@ export interface HashAggregateNode {
66
67
  type: 'HashAggregate'
67
68
  groupBy: ExprNode[]
68
69
  columns: SelectColumn[]
70
+ orderBy?: OrderByItem[]
69
71
  having?: ExprNode
70
72
  child: QueryPlan
71
73
  }
@@ -124,3 +126,17 @@ export interface TableFunctionNode {
124
126
  args: ExprNode[]
125
127
  columnNames: string[]
126
128
  }
129
+
130
+ export interface WindowSpec {
131
+ alias: string
132
+ funcName: string
133
+ args: ExprNode[]
134
+ partitionBy: ExprNode[]
135
+ orderBy: OrderByItem[]
136
+ }
137
+
138
+ export interface WindowNode {
139
+ type: 'Window'
140
+ windows: WindowSpec[]
141
+ child: QueryPlan
142
+ }
@@ -26,6 +26,14 @@ export function isMathFunc(name) {
26
26
  ].includes(name)
27
27
  }
28
28
 
29
+ /**
30
+ * @param {string} name
31
+ * @returns {boolean}
32
+ */
33
+ export function isWindowFunc(name) {
34
+ return ['ROW_NUMBER'].includes(name)
35
+ }
36
+
29
37
  /**
30
38
  * @param {string} name
31
39
  * @returns {name is RegExpFunction}
@@ -172,7 +180,7 @@ export const FUNCTION_SIGNATURES = {
172
180
  ARRAY_AGG: { min: 1, max: 1, signature: 'expression' },
173
181
 
174
182
  // Array functions
175
- ARRAY_LENGTH: { min: 1, max: 1, signature: 'array' },
183
+ ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
176
184
  ARRAY_POSITION: { min: 2, max: 2, signature: 'array, element' },
177
185
  ARRAY_SORT: { min: 1, max: 1, signature: 'array' },
178
186
  CARDINALITY: { min: 1, max: 1, signature: 'array' },
@@ -200,6 +208,9 @@ export const FUNCTION_SIGNATURES = {
200
208
  APPROX_QUANTILE: { min: 2, max: 2, signature: 'expression, fraction' },
201
209
  STRING_AGG: { min: 2, max: 2, signature: 'expression, separator' },
202
210
 
211
+ // Window functions
212
+ ROW_NUMBER: { min: 0, max: 0, signature: '' },
213
+
203
214
  // Spatial functions
204
215
  ST_INTERSECTS: { min: 2, max: 2, signature: 'geometry, geometry' },
205
216
  ST_CONTAINS: { min: 2, max: 2, signature: 'geometry, geometry' },
@@ -2,7 +2,7 @@ import { FUNCTION_SIGNATURES } from './functions.js'
2
2
 
3
3
  /** Well-known window functions that are not supported */
4
4
  const WINDOW_FUNCTIONS = new Set([
5
- 'ROW_NUMBER', 'RANK', 'DENSE_RANK', 'NTILE',
5
+ 'RANK', 'DENSE_RANK', 'NTILE',
6
6
  'LAG', 'LEAD', 'FIRST_VALUE', 'LAST_VALUE', 'NTH_VALUE',
7
7
  'CUME_DIST', 'PERCENT_RANK',
8
8
  ])
@@ -119,6 +119,10 @@ export function validateTableRefs(expr, tables) {
119
119
  for (const arg of expr.args) {
120
120
  validateTableRefs(arg, tables)
121
121
  }
122
+ } else if (expr.type === 'window') {
123
+ for (const arg of expr.args) validateTableRefs(arg, tables)
124
+ for (const p of expr.partitionBy) validateTableRefs(p, tables)
125
+ for (const o of expr.orderBy) validateTableRefs(o.expr, tables)
122
126
  } else if (expr.type === 'cast') {
123
127
  validateTableRefs(expr.expr, tables)
124
128
  } else if (expr.type === 'in valuelist') {