squirreling 0.12.21 → 0.12.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -141,7 +141,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
141
141
  - `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
142
142
  - `WITH` clause for Common Table Expressions (CTEs)
143
143
  - Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
144
- - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`
144
+ - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`, with `ON` or `USING (col, ...)` conditions
145
145
  - `GROUP BY` and `HAVING` clauses
146
146
  - Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
147
147
  - Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
@@ -160,7 +160,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
160
160
  - Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
161
161
  - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
162
162
  - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_DIFF`, `DATEDIFF`, `DATE_PART`, `DATE_TRUNC`, `EPOCH`, `EXTRACT`, `INTERVAL`
163
- - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
163
+ - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`, `JSON_KEYS`
164
164
  - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_CONTAINS`, `ARRAY_SORT`, `ARRAY_APPEND`, `ARRAY_CONCAT`, `LEN`, `CARDINALITY`, `SIZE`
165
165
  - Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
166
166
  - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.21",
3
+ "version": "0.12.23",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
package/src/ast.d.ts CHANGED
@@ -210,6 +210,7 @@ export interface JoinClause extends AstBase {
210
210
  table: string
211
211
  alias?: string
212
212
  on?: ExprNode
213
+ using?: string[]
213
214
  fromFunction?: FromFunction
214
215
  }
215
216
 
@@ -3,6 +3,7 @@ import { evaluateExpr } from '../expression/evaluate.js'
3
3
  import { executePlan, selectColumnNames } from './execute.js'
4
4
  import { sortEntriesByTerms } from './sort.js'
5
5
  import { keyify } from './utils.js'
6
+ import { yieldToEventLoop } from './yield.js'
6
7
 
7
8
  /**
8
9
  * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, QueryResults, SelectColumn, SqlPrimitive } from '../types.js'
@@ -89,29 +90,45 @@ export function executeHashAggregate(plan, context) {
89
90
  let collectCount = 0
90
91
  for await (const row of child.rows()) {
91
92
  if (++collectCount % YIELD_INTERVAL === 0) {
92
- await new Promise(resolve => setTimeout(resolve, 0))
93
+ await yieldToEventLoop()
93
94
  if (context.signal?.aborted) return
94
95
  }
95
96
  allRows.push(row)
96
97
  }
97
98
 
98
- // Group rows by GROUP BY keys
99
+ // Group rows by GROUP BY keys.
100
+ // Each chunk dispatches all per-row key evaluations in parallel so
101
+ // async cells (e.g. lazy parquet decode) overlap; the await is at the
102
+ // chunk boundary. Synchronous cells stay cheap because we skip the
103
+ // inner Promise.all wrapper when there's a single GROUP BY expression.
99
104
  /** @type {Map<any, AsyncRow[]>} */
100
105
  const groups = new Map()
106
+ const { groupBy } = plan
107
+ const singleKey = groupBy.length === 1
108
+ const singleExpr = singleKey ? groupBy[0] : null
101
109
 
102
110
  for (let chunkStart = 0; chunkStart < allRows.length; chunkStart += YIELD_INTERVAL) {
103
111
  if (chunkStart > 0) {
104
- await new Promise(resolve => setTimeout(resolve, 0))
112
+ await yieldToEventLoop()
105
113
  if (context.signal?.aborted) return
106
114
  }
107
115
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, allRows.length)
108
- const chunkKeys = await Promise.all(
109
- allRows.slice(chunkStart, chunkEnd).map(row =>
110
- Promise.all(plan.groupBy.map(expr => evaluateExpr({ node: expr, row, context })))
111
- )
112
- )
113
- for (let j = 0; j < chunkKeys.length; j++) {
114
- const key = keyify(...chunkKeys[j])
116
+ const chunkLen = chunkEnd - chunkStart
117
+ /** @type {Promise<any>[]} */
118
+ const pending = new Array(chunkLen)
119
+ if (singleKey) {
120
+ for (let j = 0; j < chunkLen; j++) {
121
+ pending[j] = evaluateExpr({ node: singleExpr, row: allRows[chunkStart + j], context })
122
+ }
123
+ } else {
124
+ for (let j = 0; j < chunkLen; j++) {
125
+ const row = allRows[chunkStart + j]
126
+ pending[j] = Promise.all(groupBy.map(expr => evaluateExpr({ node: expr, row, context })))
127
+ }
128
+ }
129
+ const chunkKeys = await Promise.all(pending)
130
+ for (let j = 0; j < chunkLen; j++) {
131
+ const key = singleKey ? keyify(chunkKeys[j]) : keyify(...chunkKeys[j])
115
132
  const row = allRows[chunkStart + j]
116
133
  let group = groups.get(key)
117
134
  if (!group) {
@@ -189,7 +206,7 @@ export function executeScalarAggregate(plan, context) {
189
206
  let collectCount = 0
190
207
  for await (const row of child.rows()) {
191
208
  if (++collectCount % YIELD_INTERVAL === 0) {
192
- await new Promise(resolve => setTimeout(resolve, 0))
209
+ await yieldToEventLoop()
193
210
  if (context.signal?.aborted) return
194
211
  }
195
212
  group.push(row)
@@ -10,6 +10,7 @@ import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from '.
10
10
  import { executeSort } from './sort.js'
11
11
  import { addBounds, minBounds, stableRowKey } from './utils.js'
12
12
  import { executeWindow } from './window.js'
13
+ import { yieldToEventLoop } from './yield.js'
13
14
 
14
15
  /**
15
16
  * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, ExecuteSqlOptions, ExprNode, IdentifierNode, QueryResults, SelectColumn, SqlPrimitive, Statement } from '../types.js'
@@ -417,7 +418,7 @@ async function* filterRows(rows, condition, context, limit) {
417
418
  for await (const row of rows) {
418
419
  if (context.signal?.aborted) return
419
420
  if (++innerCount % YIELD_INTERVAL === 0) {
420
- await new Promise(resolve => setTimeout(resolve, 0))
421
+ await yieldToEventLoop()
421
422
  if (context.signal?.aborted) return
422
423
  }
423
424
  rowIndex++
@@ -463,7 +464,7 @@ async function* limitRows(rows, limit = Infinity, offset = 0, signal) {
463
464
  for await (const row of rows) {
464
465
  if (signal?.aborted) return
465
466
  if (++innerCount % YIELD_INTERVAL === 0) {
466
- await new Promise(resolve => setTimeout(resolve, 0))
467
+ await yieldToEventLoop()
467
468
  if (signal?.aborted) return
468
469
  }
469
470
  if (skipped < offset) {
@@ -518,7 +519,7 @@ function executeProject(plan, context) {
518
519
  for await (const row of child.rows()) {
519
520
  if (context.signal?.aborted) return
520
521
  if (++innerCount % YIELD_INTERVAL === 0) {
521
- await new Promise(resolve => setTimeout(resolve, 0))
522
+ await yieldToEventLoop()
522
523
  if (context.signal?.aborted) return
523
524
  }
524
525
  rowIndex++
@@ -611,7 +612,7 @@ function executeDistinct(plan, context) {
611
612
  for await (const row of child.rows()) {
612
613
  if (signal?.aborted) return
613
614
  if (++innerCount % YIELD_INTERVAL === 0) {
614
- await new Promise(resolve => setTimeout(resolve, 0))
615
+ await yieldToEventLoop()
615
616
  if (signal?.aborted) return
616
617
  }
617
618
  buffer.push(row)
@@ -698,7 +699,7 @@ function executeSetOperation(plan, context) {
698
699
  for await (const row of left.rows()) {
699
700
  if (signal?.aborted) return
700
701
  if (++count % YIELD_INTERVAL === 0) {
701
- await new Promise(resolve => setTimeout(resolve, 0))
702
+ await yieldToEventLoop()
702
703
  if (signal?.aborted) return
703
704
  }
704
705
  const key = await stableRowKey(row)
@@ -710,7 +711,7 @@ function executeSetOperation(plan, context) {
710
711
  for await (const row of right.rows()) {
711
712
  if (signal?.aborted) return
712
713
  if (++count % YIELD_INTERVAL === 0) {
713
- await new Promise(resolve => setTimeout(resolve, 0))
714
+ await yieldToEventLoop()
714
715
  if (signal?.aborted) return
715
716
  }
716
717
  const key = await stableRowKey(row)
@@ -736,7 +737,7 @@ function executeSetOperation(plan, context) {
736
737
  for await (const row of right.rows()) {
737
738
  if (signal?.aborted) return
738
739
  if (++tick % YIELD_INTERVAL === 0) {
739
- await new Promise(resolve => setTimeout(resolve, 0))
740
+ await yieldToEventLoop()
740
741
  if (signal?.aborted) return
741
742
  }
742
743
  const key = await stableRowKey(row)
@@ -748,7 +749,7 @@ function executeSetOperation(plan, context) {
748
749
  for await (const row of left.rows()) {
749
750
  if (signal?.aborted) return
750
751
  if (++tick % YIELD_INTERVAL === 0) {
751
- await new Promise(resolve => setTimeout(resolve, 0))
752
+ await yieldToEventLoop()
752
753
  if (signal?.aborted) return
753
754
  }
754
755
  const key = await stableRowKey(row)
@@ -764,7 +765,7 @@ function executeSetOperation(plan, context) {
764
765
  for await (const row of left.rows()) {
765
766
  if (signal?.aborted) return
766
767
  if (++tick % YIELD_INTERVAL === 0) {
767
- await new Promise(resolve => setTimeout(resolve, 0))
768
+ await yieldToEventLoop()
768
769
  if (signal?.aborted) return
769
770
  }
770
771
  const key = await stableRowKey(row)
@@ -791,7 +792,7 @@ function executeSetOperation(plan, context) {
791
792
  for await (const row of right.rows()) {
792
793
  if (signal?.aborted) return
793
794
  if (++tick % YIELD_INTERVAL === 0) {
794
- await new Promise(resolve => setTimeout(resolve, 0))
795
+ await yieldToEventLoop()
795
796
  if (signal?.aborted) return
796
797
  }
797
798
  const key = await stableRowKey(row)
@@ -803,7 +804,7 @@ function executeSetOperation(plan, context) {
803
804
  for await (const row of left.rows()) {
804
805
  if (signal?.aborted) return
805
806
  if (++tick % YIELD_INTERVAL === 0) {
806
- await new Promise(resolve => setTimeout(resolve, 0))
807
+ await yieldToEventLoop()
807
808
  if (signal?.aborted) return
808
809
  }
809
810
  const key = await stableRowKey(row)
@@ -820,7 +821,7 @@ function executeSetOperation(plan, context) {
820
821
  for await (const row of left.rows()) {
821
822
  if (signal?.aborted) return
822
823
  if (++tick % YIELD_INTERVAL === 0) {
823
- await new Promise(resolve => setTimeout(resolve, 0))
824
+ await yieldToEventLoop()
824
825
  if (signal?.aborted) return
825
826
  }
826
827
  const key = await stableRowKey(row)
@@ -1,6 +1,7 @@
1
1
  import { evaluateExpr } from '../expression/evaluate.js'
2
2
  import { keyify, maxBounds } from './utils.js'
3
3
  import { executePlan } from './execute.js'
4
+ import { yieldToEventLoop } from './yield.js'
4
5
 
5
6
  /**
6
7
  * @import { AsyncCells, AsyncRow, ExecuteContext, QueryResults } from '../types.js'
@@ -56,7 +57,7 @@ export function executeNestedLoopJoin(plan, context) {
56
57
 
57
58
  for (const rightRow of rightRows) {
58
59
  if (++innerCount % YIELD_INTERVAL === 0) {
59
- await new Promise(resolve => setTimeout(resolve, 0))
60
+ await yieldToEventLoop()
60
61
  if (context.signal?.aborted) return
61
62
  }
62
63
  const tempMerged = mergeRows(leftRow, rightRow, leftTable, rightTable)
@@ -263,7 +264,7 @@ export function executeHashJoin(plan, context) {
263
264
  if (candidates?.length) {
264
265
  for (const rightRow of candidates) {
265
266
  if (++innerCount % YIELD_INTERVAL === 0) {
266
- await new Promise(resolve => setTimeout(resolve, 0))
267
+ await yieldToEventLoop()
267
268
  if (context.signal?.aborted) return
268
269
  }
269
270
  const merged = mergeRows(leftRow, rightRow, leftTable, rightTable)
@@ -1,6 +1,7 @@
1
1
  import { evaluateExpr } from '../expression/evaluate.js'
2
2
  import { executePlan } from './execute.js'
3
3
  import { compareForTerm, keyify } from './utils.js'
4
+ import { yieldToEventLoop } from './yield.js'
4
5
 
5
6
  /**
6
7
  * @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
@@ -40,7 +41,7 @@ export function executeWindow(plan, context) {
40
41
  let i = 0
41
42
  for await (const row of child.rows()) {
42
43
  if (++i % YIELD_INTERVAL === 0) {
43
- await new Promise(resolve => setTimeout(resolve, 0))
44
+ await yieldToEventLoop()
44
45
  if (context.signal?.aborted) return
45
46
  }
46
47
  const cells = { ...row.cells }
@@ -67,7 +68,7 @@ export function executeWindow(plan, context) {
67
68
  let collectCount = 0
68
69
  for await (const row of child.rows()) {
69
70
  if (++collectCount % YIELD_INTERVAL === 0) {
70
- await new Promise(resolve => setTimeout(resolve, 0))
71
+ await yieldToEventLoop()
71
72
  if (context.signal?.aborted) return
72
73
  }
73
74
  rows.push(row)
@@ -86,7 +87,7 @@ export function executeWindow(plan, context) {
86
87
  let emitCount = 0
87
88
  for (let i = 0; i < rows.length; i++) {
88
89
  if (++emitCount % YIELD_INTERVAL === 0) {
89
- await new Promise(resolve => setTimeout(resolve, 0))
90
+ await yieldToEventLoop()
90
91
  if (context.signal?.aborted) return
91
92
  }
92
93
  const row = rows[i]
@@ -120,7 +121,7 @@ async function computeWindow(spec, rows, output, context) {
120
121
  const partitions = new Map()
121
122
  for (let chunkStart = 0; chunkStart < rows.length; chunkStart += YIELD_INTERVAL) {
122
123
  if (chunkStart > 0) {
123
- await new Promise(resolve => setTimeout(resolve, 0))
124
+ await yieldToEventLoop()
124
125
  if (context.signal?.aborted) return
125
126
  }
126
127
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, rows.length)
@@ -151,7 +152,7 @@ async function computeWindow(spec, rows, output, context) {
151
152
  const entries = new Array(bucket.length)
152
153
  for (let chunkStart = 0; chunkStart < bucket.length; chunkStart += YIELD_INTERVAL) {
153
154
  if (chunkStart > 0) {
154
- await new Promise(resolve => setTimeout(resolve, 0))
155
+ await yieldToEventLoop()
155
156
  if (context.signal?.aborted) return
156
157
  }
157
158
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, bucket.length)
@@ -203,7 +204,7 @@ async function applyWindowFunction(spec, ordered, rows, output, context) {
203
204
  let tick = 0
204
205
  for (let k = 0; k < ordered.length; k++) {
205
206
  if (++tick % YIELD_INTERVAL === 0) {
206
- await new Promise(resolve => setTimeout(resolve, 0))
207
+ await yieldToEventLoop()
207
208
  if (context.signal?.aborted) return
208
209
  }
209
210
  const idx = ordered[k]
@@ -0,0 +1,30 @@
1
+ // Yield to the event loop so that timer-based aborts can fire.
2
+ //
3
+ // In Node, setTimeout(fn, 0) is clamped to a 1ms minimum, so a tight loop
4
+ // that yields every few thousand iterations can spend hundreds of ms in
5
+ // scheduling overhead alone. setImmediate (Node) and MessageChannel
6
+ // (browsers) provide the same macrotask boundary at a fraction of the cost.
7
+ //
8
+ // We need a macrotask boundary (not just a microtask) because the abort
9
+ // timer itself is a macrotask; microtasks alone cannot let it fire.
10
+
11
+ /** @type {() => Promise<void>} */
12
+ export const yieldToEventLoop = (() => {
13
+ if (typeof setImmediate === 'function') {
14
+ return () => new Promise(resolve => setImmediate(resolve))
15
+ }
16
+ if (typeof MessageChannel !== 'undefined') {
17
+ const channel = new MessageChannel()
18
+ /** @type {Array<() => void>} */
19
+ const queue = []
20
+ channel.port1.onmessage = () => {
21
+ const resolve = queue.shift()
22
+ if (resolve) resolve()
23
+ }
24
+ return () => new Promise(resolve => {
25
+ queue.push(resolve)
26
+ channel.port2.postMessage(0)
27
+ })
28
+ }
29
+ return () => new Promise(resolve => setTimeout(resolve, 0))
30
+ })()
@@ -1,5 +1,6 @@
1
1
  import { executeStatement } from '../execute/execute.js'
2
2
  import { isPlainObject, keyify, sqlEquals, stringify } from '../execute/utils.js'
3
+ import { yieldToEventLoop } from '../execute/yield.js'
3
4
  import { ArgValueError, ExecutionError } from '../validation/executionErrors.js'
4
5
  import { isAggregateFunc, isMathFunc, isRegexpFunc, isSpatialFunc, isStringFunc } from '../validation/functions.js'
5
6
  import { UnknownFunctionError } from '../validation/parseErrors.js'
@@ -31,16 +32,21 @@ const YIELD_INTERVAL = 4000
31
32
  async function evaluateAll(node, rows, context) {
32
33
  /** @type {SqlPrimitive[]} */
33
34
  const results = new Array(rows.length)
35
+ /** @type {Promise<SqlPrimitive>[]} */
36
+ const pending = new Array(Math.min(YIELD_INTERVAL, rows.length))
34
37
  for (let i = 0; i < rows.length; i += YIELD_INTERVAL) {
35
38
  if (i > 0) {
36
- await new Promise(resolve => setTimeout(resolve, 0))
39
+ await yieldToEventLoop()
37
40
  context.signal?.throwIfAborted()
38
41
  }
39
42
  const end = Math.min(i + YIELD_INTERVAL, rows.length)
40
- const chunk = await Promise.all(rows.slice(i, end).map(row =>
41
- evaluateExpr({ node, row, context })
42
- ))
43
- for (let j = 0; j < chunk.length; j++) results[i + j] = chunk[j]
43
+ const chunkLen = end - i
44
+ pending.length = chunkLen
45
+ for (let j = 0; j < chunkLen; j++) {
46
+ pending[j] = evaluateExpr({ node, row: rows[i + j], context })
47
+ }
48
+ const chunk = await Promise.all(pending)
49
+ for (let j = 0; j < chunkLen; j++) results[i + j] = chunk[j]
44
50
  }
45
51
  return results
46
52
  }
@@ -326,7 +332,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
326
332
  return values[lower] + (values[upper] - values[lower]) * (pos - lower)
327
333
  }
328
334
 
329
- if (funcName === 'JSON_ARRAYAGG' || funcName === 'ARRAY_AGG') {
335
+ if (funcName === 'JSON_ARRAYAGG' || funcName === 'ARRAY_AGG' || funcName === 'LIST') {
330
336
  const allValues = await evaluateAll(argNode, filteredRows, context)
331
337
  if (node.distinct) {
332
338
  /** @type {SqlPrimitive[]} */
@@ -510,6 +516,25 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
510
516
  return typeof value
511
517
  }
512
518
 
519
+ if (funcName === 'JSON_KEYS') {
520
+ let value = args[0]
521
+ if (value == null) return null
522
+ if (typeof value === 'string') {
523
+ try {
524
+ value = JSON.parse(value)
525
+ } catch {
526
+ throw new ArgValueError({
527
+ ...node,
528
+ message: 'invalid JSON string',
529
+ hint: 'Argument must be valid JSON.',
530
+ rowIndex,
531
+ })
532
+ }
533
+ }
534
+ if (typeof value !== 'object' || value === null || Array.isArray(value) || value instanceof Date) return null
535
+ return Object.keys(value)
536
+ }
537
+
513
538
  if (funcName === 'JSON_ARRAY_LENGTH') {
514
539
  let arr = args[0]
515
540
  if (arr == null) return null
@@ -703,7 +728,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
703
728
  let innerCount = 0
704
729
  for await (const resRow of subResult.rows()) {
705
730
  if (++innerCount % YIELD_INTERVAL === 0) {
706
- await new Promise(resolve => setTimeout(resolve, 0))
731
+ await yieldToEventLoop()
707
732
  context.signal?.throwIfAborted()
708
733
  }
709
734
  const value = await resRow.cells[resRow.columns[0]]()
@@ -222,13 +222,26 @@ export function parseJoins(state) {
222
222
  const tableTok = expect(state, 'identifier')
223
223
  const tableAlias = parseTableAlias(state)
224
224
 
225
- // Parse ON condition (not for POSITIONAL joins)
225
+ // Parse ON condition or USING column list (not for POSITIONAL joins)
226
226
  /** @type {ExprNode | undefined} */
227
227
  let condition
228
+ /** @type {string[] | undefined} */
229
+ let using
228
230
  if (joinType !== 'POSITIONAL') {
229
- expect(state, 'keyword', 'ON')
230
- condition = parseExpression(state)
231
- expectNoAggregate(condition, 'JOIN ON')
231
+ if (match(state, 'keyword', 'USING')) {
232
+ expect(state, 'paren', '(')
233
+ using = []
234
+ while (true) {
235
+ const colTok = expect(state, 'identifier')
236
+ using.push(colTok.value)
237
+ if (!match(state, 'comma')) break
238
+ }
239
+ expect(state, 'paren', ')')
240
+ } else {
241
+ expect(state, 'keyword', 'ON')
242
+ condition = parseExpression(state)
243
+ expectNoAggregate(condition, 'JOIN ON')
244
+ }
232
245
  }
233
246
 
234
247
  joins.push({
@@ -236,6 +249,7 @@ export function parseJoins(state) {
236
249
  table: tableTok.value,
237
250
  alias: tableAlias,
238
251
  on: condition,
252
+ using,
239
253
  positionStart: tok.positionStart,
240
254
  positionEnd: tableTok.positionEnd,
241
255
  })
@@ -153,6 +153,13 @@ export function extractColumns({ select, parentColumns }) {
153
153
  if (sourceAlias !== undefined) visibleLateralAliases.push(sourceAlias)
154
154
  for (const join of select.joins) {
155
155
  collectColumnsFromExpr(join.on, identifiers)
156
+ // USING columns are equi-join keys on both sides; keep them in every
157
+ // table's needed set so projection pushdown can't prune the join key.
158
+ if (join.using) {
159
+ for (const col of join.using) {
160
+ for (const [, set] of perTable) set?.add(col)
161
+ }
162
+ }
156
163
  const joinAlias = join.alias ?? join.table
157
164
  if (join.fromFunction) {
158
165
  /** @type {IdentifierNode[]} */
package/src/plan/plan.js CHANGED
@@ -463,7 +463,10 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
463
463
  if (join.joinType === 'POSITIONAL') {
464
464
  plan = { type: 'PositionalJoin', leftAlias: currentLeftTable, rightAlias: rightTable, left: plan, right: rightScan }
465
465
  } else {
466
- const keys = join.on && extractEquiKeys({ condition: join.on, leftTable: currentLeftTable, rightTable })
466
+ // `USING (cols)` desugars to an equi-condition `left.col = right.col` per
467
+ // column, which routes through the hash-join path like any other ON.
468
+ const condition = join.on ?? (join.using && buildUsingCondition(join.using, join))
469
+ const keys = condition && extractEquiKeys({ condition, leftTable: currentLeftTable, rightTable })
467
470
  if (keys) {
468
471
  /** @type {HashJoinNode} */
469
472
  const hashJoin = {
@@ -484,7 +487,7 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
484
487
  joinType: join.joinType,
485
488
  leftAlias: currentLeftTable,
486
489
  rightAlias: rightTable,
487
- condition: join.on,
490
+ condition,
488
491
  left: plan,
489
492
  right: rightScan,
490
493
  }
@@ -623,6 +626,37 @@ function normalizeIdentifiers(node, sourceColumns) {
623
626
  return node
624
627
  }
625
628
 
629
+ /**
630
+ * Builds the join condition for a `JOIN ... USING (cols)` clause: an AND of
631
+ * `col = col` equalities using unprefixed identifiers. The hash-join path
632
+ * evaluates the left key against the left row and the right key against the
633
+ * right row, so each unqualified name resolves unambiguously on its own side.
634
+ *
635
+ * @param {string[]} using - shared column names from the USING clause
636
+ * @param {{ positionStart: number, positionEnd: number }} pos - position info for the synthesized exprs
637
+ * @returns {ExprNode | undefined}
638
+ */
639
+ function buildUsingCondition(using, pos) {
640
+ const { positionStart, positionEnd } = pos
641
+ /** @type {ExprNode | undefined} */
642
+ let condition
643
+ for (const col of using) {
644
+ /** @type {ExprNode} */
645
+ const eq = {
646
+ type: 'binary',
647
+ op: '=',
648
+ left: { type: 'identifier', name: col, positionStart, positionEnd },
649
+ right: { type: 'identifier', name: col, positionStart, positionEnd },
650
+ positionStart,
651
+ positionEnd,
652
+ }
653
+ condition = condition === undefined
654
+ ? eq
655
+ : { type: 'binary', op: 'AND', left: condition, right: eq, positionStart, positionEnd }
656
+ }
657
+ return condition
658
+ }
659
+
626
660
  /**
627
661
  * Splits a join ON expression into equi-key pairs and a residual predicate so
628
662
  * the planner can route AND-of-equis (with optional range/inequality
package/src/types.d.ts CHANGED
@@ -129,7 +129,7 @@ export interface UserDefinedFunction {
129
129
  arguments: FunctionSignature
130
130
  }
131
131
 
132
- export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
132
+ export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'LIST' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
133
133
 
134
134
  export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE' | 'REGEXP_MATCHES'
135
135
 
@@ -11,7 +11,7 @@ export const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'
11
11
  * @returns {name is AggregateFunc}
12
12
  */
13
13
  export function isAggregateFunc(name) {
14
- return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
14
+ return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'LIST', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
15
15
  }
16
16
 
17
17
  /**
@@ -179,8 +179,10 @@ export const FUNCTION_SIGNATURES = {
179
179
  JSON_ARRAY_LENGTH: { min: 1, max: 1, signature: 'array' },
180
180
  JSON_VALID: { min: 1, max: 1, signature: 'value' },
181
181
  JSON_TYPE: { min: 1, max: 1, signature: 'value' },
182
+ JSON_KEYS: { min: 1, max: 1, signature: 'value' },
182
183
  JSON_ARRAYAGG: { min: 1, max: 1, signature: 'expression' },
183
184
  ARRAY_AGG: { min: 1, max: 1, signature: 'expression' },
185
+ LIST: { min: 1, max: 1, signature: 'expression' },
184
186
 
185
187
  // Array functions
186
188
  ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
@@ -3,7 +3,7 @@ export const KEYWORDS = new Set([
3
3
  'HAVING', 'ORDER', 'ASC', 'DESC', 'NULLS', 'LIMIT', 'OFFSET', 'AS', 'ALL',
4
4
  'DISTINCT', 'TRUE', 'FALSE', 'NULL', 'LIKE', 'IN', 'EXISTS', 'BETWEEN',
5
5
  'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'INNER', 'LEFT', 'RIGHT',
6
- 'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
6
+ 'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'USING', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
7
7
  'HOUR', 'MINUTE', 'SECOND', 'FILTER', 'WITHIN',
8
8
  'UNION', 'INTERSECT', 'EXCEPT',
9
9
  ])
@@ -17,7 +17,7 @@ export const RESERVED_KEYWORDS = new Set([
17
17
  'EXISTS', 'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'INTERVAL',
18
18
  'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET',
19
19
  'AS', 'ALL', 'DISTINCT',
20
- 'JOIN', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'ON',
20
+ 'JOIN', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'ON', 'USING',
21
21
  'UNION', 'INTERSECT', 'EXCEPT',
22
22
  ])
23
23
 
@@ -30,6 +30,6 @@ export const RESERVED_AFTER_COLUMN = new Set([
30
30
  // Keywords that cannot be used as table aliases
31
31
  export const RESERVED_AFTER_TABLE = new Set([
32
32
  'WHERE', 'GROUP', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'JOIN', 'INNER',
33
- 'LEFT', 'RIGHT', 'FULL', 'CROSS', 'ON', 'POSITIONAL', 'LATERAL',
33
+ 'LEFT', 'RIGHT', 'FULL', 'CROSS', 'ON', 'USING', 'POSITIONAL', 'LATERAL',
34
34
  'UNION', 'INTERSECT', 'EXCEPT',
35
35
  ])