squirreling 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -137,6 +137,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
137
137
  - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
138
138
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`
139
139
  - `GROUP BY` and `HAVING` clauses
140
+ - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
140
141
  - Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
141
142
 
142
143
  ### Quoting
@@ -147,14 +148,14 @@ Squirreling mostly follows the SQL standard. The following features are supporte
147
148
 
148
149
  ### Functions
149
150
 
150
- - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`
151
- - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`
151
+ - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `JSON_ARRAYAGG`
152
+ - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
152
153
  - Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
153
154
  - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
154
155
  - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
155
- - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_OBJECT`
156
+ - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`
156
157
  - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
157
- - Regex: `REGEXP_SUBSTR`, `REGEXP_REPLACE`
158
+ - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`
158
159
  - Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
159
160
  - Conditional: `COALESCE`, `NULLIF`
160
161
  - User-defined functions (UDFs)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.10.2",
3
+ "version": "0.11.0",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -40,10 +40,10 @@
40
40
  },
41
41
  "devDependencies": {
42
42
  "@types/node": "25.5.0",
43
- "@vitest/coverage-v8": "4.1.0",
43
+ "@vitest/coverage-v8": "4.1.2",
44
44
  "eslint": "9.39.2",
45
- "eslint-plugin-jsdoc": "62.8.0",
46
- "typescript": "5.9.3",
47
- "vitest": "4.1.0"
45
+ "eslint-plugin-jsdoc": "62.8.1",
46
+ "typescript": "6.0.2",
47
+ "vitest": "4.1.2"
48
48
  }
49
49
  }
package/src/ast.d.ts CHANGED
@@ -8,8 +8,8 @@ export type SqlPrimitive =
8
8
  | SqlPrimitive[]
9
9
  | Record<string, any>
10
10
 
11
- export interface SelectStatement {
12
- with?: WithClause
11
+ export interface SelectStatement extends AstBase {
12
+ type: 'select'
13
13
  distinct: boolean
14
14
  columns: SelectColumn[]
15
15
  from: FromTable | FromSubquery
@@ -22,25 +22,42 @@ export interface SelectStatement {
22
22
  offset?: number
23
23
  }
24
24
 
25
- export interface WithClause {
25
+ export type SetOperator = 'UNION' | 'INTERSECT' | 'EXCEPT'
26
+
27
+ export interface SetOperationStatement extends AstBase {
28
+ type: 'compound'
29
+ operator: SetOperator
30
+ all: boolean
31
+ left: Statement
32
+ right: Statement
33
+ orderBy: OrderByItem[]
34
+ limit?: number
35
+ offset?: number
36
+ }
37
+
38
+ export interface WithStatement extends AstBase {
39
+ type: 'with'
26
40
  ctes: CTEDefinition[]
41
+ query: Statement
27
42
  }
28
43
 
29
- export interface CTEDefinition {
44
+ export type Statement = SelectStatement | SetOperationStatement | WithStatement
45
+
46
+ export interface CTEDefinition extends AstBase {
30
47
  name: string
31
- query: SelectStatement
48
+ query: Statement
32
49
  }
33
50
 
34
51
  export interface FromTable extends AstBase {
35
- kind: 'table'
52
+ type: 'table'
36
53
  table: string
37
54
  alias?: string
38
55
  }
39
56
 
40
- export interface FromSubquery {
41
- kind: 'subquery'
42
- query: SelectStatement
43
- alias: string
57
+ export interface FromSubquery extends AstBase {
58
+ type: 'subquery'
59
+ query: Statement
60
+ alias?: string
44
61
  }
45
62
 
46
63
  export type ArithmeticOp = '+' | '-' | '*' | '/' | '%'
@@ -91,7 +108,7 @@ export interface CastNode extends AstBase {
91
108
  export interface InSubqueryNode extends AstBase {
92
109
  type: 'in'
93
110
  expr: ExprNode
94
- subquery: SelectStatement
111
+ subquery: Statement
95
112
  }
96
113
 
97
114
  export interface InValuesNode extends AstBase {
@@ -102,7 +119,7 @@ export interface InValuesNode extends AstBase {
102
119
 
103
120
  export interface ExistsNode extends AstBase {
104
121
  type: 'exists' | 'not exists'
105
- subquery: SelectStatement
122
+ subquery: Statement
106
123
  }
107
124
 
108
125
  export interface WhenClause extends AstBase {
@@ -119,7 +136,7 @@ export interface CaseNode extends AstBase {
119
136
 
120
137
  export interface SubqueryNode extends AstBase {
121
138
  type: 'subquery'
122
- subquery: SelectStatement
139
+ subquery: Statement
123
140
  }
124
141
 
125
142
  export type IntervalUnit = 'DAY' | 'MONTH' | 'YEAR' | 'HOUR' | 'MINUTE' | 'SECOND'
@@ -150,12 +167,12 @@ export type ExprNode =
150
167
  | StarNode
151
168
 
152
169
  export interface StarColumn {
153
- kind: 'star'
170
+ type: 'star'
154
171
  table?: string
155
172
  }
156
173
 
157
174
  export interface DerivedColumn {
158
- kind: 'derived'
175
+ type: 'derived'
159
176
  expr: ExprNode
160
177
  alias?: string
161
178
  }
@@ -32,19 +32,20 @@ export function memorySource({ data, columns }) {
32
32
  if (!data.length) {
33
33
  throw new Error('Unknown columns: data is empty and no columns provided')
34
34
  }
35
- columns = Object.keys(data[0])
35
+ const firstColumns = Object.keys(data[0])
36
36
  // Check first 1000 rows for consistent columns
37
37
  for (let i = 1; i < data.length && i < 1000; i++) {
38
38
  const rowColumns = Object.keys(data[i])
39
- const missing = columns.find(col => !rowColumns.includes(col))
39
+ const missing = firstColumns.find(col => !rowColumns.includes(col))
40
40
  if (missing) {
41
41
  throw new Error(`Inconsistent data, column "${missing}" not found in row ${i}`)
42
42
  }
43
- const extra = rowColumns.find(col => !columns.includes(col))
43
+ const extra = rowColumns.find(col => !firstColumns.includes(col))
44
44
  if (extra) {
45
45
  throw new Error(`Inconsistent data, unexpected column "${extra}" found in row ${i}`)
46
46
  }
47
47
  }
48
+ columns = firstColumns
48
49
  }
49
50
  return {
50
51
  numRows: data.length,
@@ -1,10 +1,10 @@
1
1
  import { derivedAlias } from '../expression/alias.js'
2
2
  import { evaluateExpr } from '../expression/evaluate.js'
3
3
  import { executePlan } from './execute.js'
4
- import { stringify } from './utils.js'
4
+ import { keyify } from './utils.js'
5
5
 
6
6
  /**
7
- * @import { AsyncCells, AsyncRow, ExecuteContext, SelectColumn } from '../types.js'
7
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, SelectColumn, SqlPrimitive } from '../types.js'
8
8
  * @import { HashAggregateNode, ScalarAggregateNode } from '../plan/types.js'
9
9
  */
10
10
 
@@ -23,7 +23,7 @@ function projectAggregateColumns(selectColumns, group, context) {
23
23
  const cells = {}
24
24
 
25
25
  for (const col of selectColumns) {
26
- if (col.kind === 'star') {
26
+ if (col.type === 'star') {
27
27
  const firstRow = group[0]
28
28
  if (firstRow) {
29
29
  for (const key of firstRow.columns) {
@@ -31,7 +31,7 @@ function projectAggregateColumns(selectColumns, group, context) {
31
31
  cells[key] = firstRow.cells[key]
32
32
  }
33
33
  }
34
- } else if (col.kind === 'derived') {
34
+ } else {
35
35
  const alias = col.alias ?? derivedAlias(col.expr)
36
36
  columns.push(alias)
37
37
  cells[alias] = () => evaluateExpr({
@@ -63,30 +63,21 @@ export async function* executeHashAggregate(plan, context) {
63
63
  }
64
64
 
65
65
  // Group rows by GROUP BY keys
66
- /** @type {Map<string, AsyncRow[]>} */
67
- const groupMap = new Map()
68
- /** @type {AsyncRow[][]} */
69
- const groups = []
66
+ /** @type {Map<any, AsyncRow[]>} */
67
+ const groups = new Map()
70
68
 
71
69
  for (const row of allRows) {
72
- /** @type {string[]} */
73
- const keyParts = []
74
- for (const expr of plan.groupBy) {
75
- const v = await evaluateExpr({ node: expr, row, context })
76
- keyParts.push(stringify(v))
77
- }
78
- const key = keyParts.join('|')
79
- let group = groupMap.get(key)
70
+ const key = keyify(...await Promise.all(plan.groupBy.map(expr => evaluateExpr({ node: expr, row, context }))))
71
+ let group = groups.get(key)
80
72
  if (!group) {
81
73
  group = []
82
- groupMap.set(key, group)
83
- groups.push(group)
74
+ groups.set(key, group)
84
75
  }
85
76
  group.push(row)
86
77
  }
87
78
 
88
79
  // Yield one row per group
89
- for (const group of groups) {
80
+ for (const group of groups.values()) {
90
81
  const asyncRow = projectAggregateColumns(plan.columns, group, context)
91
82
 
92
83
  // Apply HAVING filter
@@ -117,6 +108,13 @@ export async function* executeHashAggregate(plan, context) {
117
108
  * @yields {AsyncRow}
118
109
  */
119
110
  export async function* executeScalarAggregate(plan, context) {
111
+ // Fast path: use scanColumn when available
112
+ const fast = tryColumnScanAggregate(plan, context)
113
+ if (fast) {
114
+ yield* fast
115
+ return
116
+ }
117
+
120
118
  // Collect all rows into single group
121
119
  /** @type {AsyncRow[]} */
122
120
  const group = []
@@ -145,3 +143,146 @@ export async function* executeScalarAggregate(plan, context) {
145
143
 
146
144
  yield asyncRow
147
145
  }
146
+
147
+ /**
148
+ * @typedef {{
149
+ * funcName: string,
150
+ * column: string,
151
+ * alias: string,
152
+ * distinct?: boolean,
153
+ * }} ColumnAggSpec
154
+ */
155
+
156
+ /**
157
+ * Checks if a scalar aggregate can use the scanColumn fast path.
158
+ * Returns an async generator if so, undefined otherwise.
159
+ *
160
+ * @param {ScalarAggregateNode} plan
161
+ * @param {ExecuteContext} context
162
+ * @returns {AsyncGenerator<AsyncRow> | undefined}
163
+ */
164
+ function tryColumnScanAggregate(plan, context) {
165
+ // No HAVING support in fast path
166
+ if (plan.having) return
167
+ // Child must be a direct table scan
168
+ if (plan.child.type !== 'Scan') return
169
+ const scanNode = plan.child
170
+ // No WHERE in scan (scanColumn doesn't support filtering)
171
+ if (scanNode.hints.where) return
172
+
173
+ const table = context.tables[scanNode.table]
174
+ if (!table?.scanColumn) return
175
+
176
+ // All columns must be simple aggregates on plain identifiers
177
+ /** @type {ColumnAggSpec[]} */
178
+ const specs = []
179
+ for (const col of plan.columns) {
180
+ if (col.type !== 'derived') return
181
+ const spec = extractColumnAggSpec(col)
182
+ if (!spec) return
183
+ specs.push(spec)
184
+ }
185
+
186
+ return (async function* () {
187
+ /** @type {string[]} */
188
+ const columns = []
189
+ /** @type {AsyncCells} */
190
+ const cells = {}
191
+
192
+ for (const spec of specs) {
193
+ const value = await scanColumnAggregate(table, spec, context.signal)
194
+ columns.push(spec.alias)
195
+ cells[spec.alias] = () => Promise.resolve(value)
196
+ }
197
+
198
+ yield { columns, cells }
199
+ })()
200
+ }
201
+
202
+ /**
203
+ * Extracts aggregate spec from a simple aggregate expression node.
204
+ * Returns undefined if the expression is not a supported simple aggregate.
205
+ *
206
+ * @param {DerivedColumn} col
207
+ * @returns {ColumnAggSpec | undefined}
208
+ */
209
+ function extractColumnAggSpec({ expr, alias }) {
210
+ if (expr.type !== 'function') return
211
+ if (expr.filter) return // FILTER not supported in fast path
212
+ const funcName = expr.funcName.toUpperCase()
213
+ if (!['COUNT', 'SUM', 'AVG', 'MIN', 'MAX'].includes(funcName)) return
214
+
215
+ // Argument must be a plain column identifier
216
+ const arg = expr.args[0]
217
+ if (arg.type !== 'identifier') return
218
+ return {
219
+ funcName,
220
+ column: derivedAlias(arg),
221
+ alias: alias ?? derivedAlias(expr),
222
+ distinct: expr.distinct,
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Scans a single column and computes an aggregate value.
228
+ *
229
+ * @param {AsyncDataSource} table
230
+ * @param {ColumnAggSpec} spec
231
+ * @param {AbortSignal} [signal]
232
+ * @returns {Promise<SqlPrimitive>}
233
+ */
234
+ async function scanColumnAggregate(table, spec, signal) {
235
+ const values = table.scanColumn({ column: spec.column, signal })
236
+
237
+ if (spec.funcName === 'COUNT' && spec.distinct) {
238
+ const seen = new Set()
239
+ for await (const chunk of values) {
240
+ if (signal?.aborted) return null
241
+ for (let i = 0; i < chunk.length; i++) {
242
+ const v = chunk[i]
243
+ if (v == null) continue
244
+ seen.add(keyify(v))
245
+ }
246
+ }
247
+ return seen.size
248
+ }
249
+
250
+ if (spec.funcName === 'COUNT') {
251
+ let count = 0
252
+ for await (const chunk of values) {
253
+ if (signal?.aborted) return null
254
+ for (let i = 0; i < chunk.length; i++) {
255
+ if (chunk[i] != null) count++
256
+ }
257
+ }
258
+ return count
259
+ }
260
+
261
+ // SUM, AVG, MIN, MAX
262
+ let sum = 0
263
+ let count = 0
264
+ /** @type {SqlPrimitive} */
265
+ let min = null
266
+ /** @type {SqlPrimitive} */
267
+ let max = null
268
+
269
+ for await (const chunk of values) {
270
+ if (signal?.aborted) return null
271
+ for (let i = 0; i < chunk.length; i++) {
272
+ const v = chunk[i]
273
+ if (v == null) continue
274
+ if (min === null || v < min) min = v
275
+ if (max === null || v > max) max = v
276
+ const num = Number(v)
277
+ if (!Number.isFinite(num)) continue
278
+ sum += num
279
+ count++
280
+ }
281
+ }
282
+
283
+ if (spec.funcName === 'SUM') return count === 0 ? null : sum
284
+ if (spec.funcName === 'AVG') return count === 0 ? null : sum / count
285
+ if (spec.funcName === 'MIN') return min
286
+ if (spec.funcName === 'MAX') return max
287
+ return null
288
+ }
@@ -3,15 +3,15 @@ import { derivedAlias } from '../expression/alias.js'
3
3
  import { evaluateExpr } from '../expression/evaluate.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
5
  import { planSql } from '../plan/plan.js'
6
- import { tableNotFoundError } from '../validation/planErrors.js'
6
+ import { TableNotFoundError } from '../validation/planErrors.js'
7
7
  import { executeHashAggregate, executeScalarAggregate } from './aggregates.js'
8
8
  import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from './join.js'
9
9
  import { executeSort } from './sort.js'
10
10
  import { stableRowKey } from './utils.js'
11
11
 
12
12
  /**
13
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, SelectStatement } from '../types.js'
14
- * @import { CountNode, DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode } from '../plan/types.js'
13
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteContext, ExecuteSqlOptions, ExprNode, Statement } from '../types.js'
14
+ * @import { CountNode, DistinctNode, FilterNode, LimitNode, ProjectNode, QueryPlan, ScanNode, SetOperationNode } from '../plan/types.js'
15
15
  */
16
16
 
17
17
  /**
@@ -21,7 +21,7 @@ import { stableRowKey } from './utils.js'
21
21
  * @yields {AsyncRow}
22
22
  */
23
23
  export async function* executeSql({ tables, query, functions, signal }) {
24
- const select = typeof query === 'string' ? parseSql({ query, functions }) : query
24
+ const parsed = typeof query === 'string' ? parseSql({ query, functions }) : query
25
25
 
26
26
  // Normalize tables: convert arrays to AsyncDataSource
27
27
  /** @type {Record<string, AsyncDataSource>} */
@@ -34,19 +34,19 @@ export async function* executeSql({ tables, query, functions, signal }) {
34
34
  }
35
35
  }
36
36
 
37
- yield* executeSelect({ select, context: { tables: normalizedTables, functions, signal } })
37
+ yield* executeStatement({ query: parsed, context: { tables: normalizedTables, functions, signal } })
38
38
  }
39
39
 
40
40
  /**
41
- * Executes a SELECT query against the provided tables
41
+ * Executes a statement against the provided tables
42
42
  *
43
43
  * @param {Object} options
44
- * @param {SelectStatement} options.select
44
+ * @param {Statement} options.query
45
45
  * @param {ExecuteContext} options.context
46
46
  * @yields {AsyncRow}
47
47
  */
48
- export async function* executeSelect({ select, context }) {
49
- const plan = planSql({ query: select, functions: context.functions, tables: context.tables })
48
+ export async function* executeStatement({ query, context }) {
49
+ const plan = planSql({ query, functions: context.functions, tables: context.tables })
50
50
  yield* executePlan({ plan, context })
51
51
  }
52
52
 
@@ -83,12 +83,12 @@ export async function* executePlan({ plan, context }) {
83
83
  yield* executeDistinct(plan, context)
84
84
  } else if (plan.type === 'Limit') {
85
85
  yield* executeLimit(plan, context)
86
+ } else if (plan.type === 'SetOperation') {
87
+ yield* executeSetOperation(plan, context)
86
88
  }
87
89
  }
88
90
 
89
91
  /**
90
- * Executes a table scan
91
- *
92
92
  * @param {ScanNode} plan
93
93
  * @param {ExecuteContext} context
94
94
  * @yields {AsyncRow}
@@ -98,7 +98,7 @@ async function* executeScan(plan, context) {
98
98
  // check table
99
99
  const table = tables[plan.table]
100
100
  if (!table) {
101
- throw tableNotFoundError({ table: plan.table, tables })
101
+ throw new TableNotFoundError({ table: plan.table, tables })
102
102
  }
103
103
  // check columns
104
104
  const missingColumn = plan.hints.columns?.find(col => !table.columns.includes(col))
@@ -140,12 +140,12 @@ async function* executeScan(plan, context) {
140
140
  async function* executeCount(plan, { tables, signal }) {
141
141
  const table = tables[plan.table]
142
142
  if (!table) {
143
- throw tableNotFoundError({ table: plan.table, tables })
143
+ throw new TableNotFoundError({ table: plan.table, tables })
144
144
  }
145
145
 
146
146
  // Use source numRows if available
147
147
  let count = table.numRows
148
- if (table.numRows === undefined) {
148
+ if (count === undefined) {
149
149
  // Fall back to counting rows via scan
150
150
  count = 0
151
151
  const { rows } = table.scan({ signal })
@@ -272,12 +272,12 @@ async function* executeProject(plan, context) {
272
272
  const cells = {}
273
273
 
274
274
  for (const col of plan.columns) {
275
- if (col.kind === 'star') {
275
+ if (col.type === 'star') {
276
276
  for (const key of row.columns) {
277
277
  columns.push(key)
278
278
  cells[key] = row.cells[key]
279
279
  }
280
- } else if (col.kind === 'derived') {
280
+ } else {
281
281
  const alias = col.alias ?? derivedAlias(col.expr)
282
282
  columns.push(alias)
283
283
  cells[alias] = () => evaluateExpr({
@@ -304,7 +304,6 @@ async function* executeDistinct(plan, context) {
304
304
  const { signal } = context
305
305
  const MAX_CHUNK = 256
306
306
 
307
- /** @type {Set<string>} */
308
307
  const seen = new Set()
309
308
 
310
309
  /** @type {AsyncRow[]} */
@@ -315,10 +314,11 @@ async function* executeDistinct(plan, context) {
315
314
  buffer.push(row)
316
315
 
317
316
  if (buffer.length >= MAX_CHUNK) {
318
- const keys = await Promise.all(buffer.map(r => stableRowKey(r.cells)))
317
+ const keys = buffer.map(stableRowKey)
319
318
  for (let i = 0; i < buffer.length; i++) {
320
- if (!seen.has(keys[i])) {
321
- seen.add(keys[i])
319
+ const key = await keys[i]
320
+ if (!seen.has(key)) {
321
+ seen.add(key)
322
322
  yield buffer[i]
323
323
  }
324
324
  }
@@ -328,10 +328,11 @@ async function* executeDistinct(plan, context) {
328
328
 
329
329
  // Flush remaining
330
330
  if (buffer.length > 0) {
331
- const keys = await Promise.all(buffer.map(r => stableRowKey(r.cells)))
331
+ const keys = buffer.map(stableRowKey)
332
332
  for (let i = 0; i < buffer.length; i++) {
333
- if (!seen.has(keys[i])) {
334
- seen.add(keys[i])
333
+ const key = await keys[i]
334
+ if (!seen.has(key)) {
335
+ seen.add(key)
335
336
  yield buffer[i]
336
337
  }
337
338
  }
@@ -348,3 +349,108 @@ async function* executeDistinct(plan, context) {
348
349
  async function* executeLimit(plan, context) {
349
350
  yield* limitRows(executePlan({ plan: plan.child, context }), plan.limit, plan.offset, context.signal)
350
351
  }
352
+
353
+ /**
354
+ * Executes a set operation (UNION, INTERSECT, EXCEPT)
355
+ *
356
+ * @param {SetOperationNode} plan
357
+ * @param {ExecuteContext} context
358
+ * @yields {AsyncRow}
359
+ */
360
+ async function* executeSetOperation(plan, context) {
361
+ const { signal } = context
362
+
363
+ if (plan.operator === 'UNION') {
364
+ if (plan.all) {
365
+ // UNION ALL: yield all rows from both sides
366
+ yield* executePlan({ plan: plan.left, context })
367
+ yield* executePlan({ plan: plan.right, context })
368
+ } else {
369
+ // UNION: yield deduplicated rows from both sides
370
+ const seen = new Set()
371
+ for await (const row of executePlan({ plan: plan.left, context })) {
372
+ if (signal?.aborted) return
373
+ const key = await stableRowKey(row)
374
+ if (!seen.has(key)) {
375
+ seen.add(key)
376
+ yield row
377
+ }
378
+ }
379
+ for await (const row of executePlan({ plan: plan.right, context })) {
380
+ if (signal?.aborted) return
381
+ const key = await stableRowKey(row)
382
+ if (!seen.has(key)) {
383
+ seen.add(key)
384
+ yield row
385
+ }
386
+ }
387
+ }
388
+ } else if (plan.operator === 'INTERSECT') {
389
+ // Materialize right side keys
390
+ /** @type {Map<any, number>} */
391
+ const rightKeys = new Map()
392
+ for await (const row of executePlan({ plan: plan.right, context })) {
393
+ if (signal?.aborted) return
394
+ const key = await stableRowKey(row)
395
+ rightKeys.set(key, (rightKeys.get(key) ?? 0) + 1)
396
+ }
397
+
398
+ if (plan.all) {
399
+ // INTERSECT ALL: yield each left row that matches, consuming right counts
400
+ for await (const row of executePlan({ plan: plan.left, context })) {
401
+ if (signal?.aborted) return
402
+ const key = await stableRowKey(row)
403
+ const count = rightKeys.get(key)
404
+ if (count) {
405
+ rightKeys.set(key, count - 1)
406
+ yield row
407
+ }
408
+ }
409
+ } else {
410
+ // INTERSECT: yield deduplicated rows present in both
411
+ const seen = new Set()
412
+ for await (const row of executePlan({ plan: plan.left, context })) {
413
+ if (signal?.aborted) return
414
+ const key = await stableRowKey(row)
415
+ if (rightKeys.has(key) && !seen.has(key)) {
416
+ seen.add(key)
417
+ yield row
418
+ }
419
+ }
420
+ }
421
+ } else if (plan.operator === 'EXCEPT') {
422
+ // Materialize right side keys
423
+ /** @type {Map<any, number>} */
424
+ const rightKeys = new Map()
425
+ for await (const row of executePlan({ plan: plan.right, context })) {
426
+ if (signal?.aborted) return
427
+ const key = await stableRowKey(row)
428
+ rightKeys.set(key, (rightKeys.get(key) ?? 0) + 1)
429
+ }
430
+
431
+ if (plan.all) {
432
+ // EXCEPT ALL: yield left rows, consuming right counts
433
+ for await (const row of executePlan({ plan: plan.left, context })) {
434
+ if (signal?.aborted) return
435
+ const key = await stableRowKey(row)
436
+ const count = rightKeys.get(key)
437
+ if (count) {
438
+ rightKeys.set(key, count - 1)
439
+ } else {
440
+ yield row
441
+ }
442
+ }
443
+ } else {
444
+ // EXCEPT: yield deduplicated left rows not in right
445
+ const seen = new Set()
446
+ for await (const row of executePlan({ plan: plan.left, context })) {
447
+ if (signal?.aborted) return
448
+ const key = await stableRowKey(row)
449
+ if (!rightKeys.has(key) && !seen.has(key)) {
450
+ seen.add(key)
451
+ yield row
452
+ }
453
+ }
454
+ }
455
+ }
456
+ }