squirreling 0.12.21 → 0.12.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/ast.d.ts +1 -0
- package/src/execute/aggregates.js +28 -11
- package/src/execute/execute.js +13 -12
- package/src/execute/join.js +3 -2
- package/src/execute/window.js +7 -6
- package/src/execute/yield.js +30 -0
- package/src/expression/evaluate.js +32 -7
- package/src/parse/joins.js +18 -4
- package/src/plan/columns.js +7 -0
- package/src/plan/plan.js +36 -2
- package/src/types.d.ts +1 -1
- package/src/validation/functions.js +3 -1
- package/src/validation/keywords.js +3 -3
package/README.md
CHANGED
|
@@ -141,7 +141,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
141
141
|
- `SELECT` statements with `DISTINCT`, `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
|
|
142
142
|
- `WITH` clause for Common Table Expressions (CTEs)
|
|
143
143
|
- Subqueries in `SELECT`, `FROM`, `WHERE`, and correlated subqueries
|
|
144
|
-
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`
|
|
144
|
+
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `CROSS JOIN`, `POSITIONAL JOIN`, `LATERAL VIEW [OUTER] EXPLODE(...)`, with `ON` or `USING (col, ...)` conditions
|
|
145
145
|
- `GROUP BY` and `HAVING` clauses
|
|
146
146
|
- Set operations: `UNION`, `UNION ALL`, `INTERSECT`, `INTERSECT ALL`, `EXCEPT`, `EXCEPT ALL`
|
|
147
147
|
- Expressions: `CASE`, `CAST`, `BETWEEN`, `IN`, `LIKE`, `IS NULL`, `IS NOT NULL`
|
|
@@ -160,7 +160,7 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
160
160
|
- Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
|
|
161
161
|
- Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
|
|
162
162
|
- Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_DIFF`, `DATEDIFF`, `DATE_PART`, `DATE_TRUNC`, `EPOCH`, `EXTRACT`, `INTERVAL`
|
|
163
|
-
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
|
|
163
|
+
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`, `JSON_KEYS`
|
|
164
164
|
- Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_CONTAINS`, `ARRAY_SORT`, `ARRAY_APPEND`, `ARRAY_CONCAT`, `LEN`, `CARDINALITY`, `SIZE`
|
|
165
165
|
- Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
|
|
166
166
|
- Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
|
package/package.json
CHANGED
package/src/ast.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { evaluateExpr } from '../expression/evaluate.js'
|
|
|
3
3
|
import { executePlan, selectColumnNames } from './execute.js'
|
|
4
4
|
import { sortEntriesByTerms } from './sort.js'
|
|
5
5
|
import { keyify } from './utils.js'
|
|
6
|
+
import { yieldToEventLoop } from './yield.js'
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, QueryResults, SelectColumn, SqlPrimitive } from '../types.js'
|
|
@@ -89,29 +90,45 @@ export function executeHashAggregate(plan, context) {
|
|
|
89
90
|
let collectCount = 0
|
|
90
91
|
for await (const row of child.rows()) {
|
|
91
92
|
if (++collectCount % YIELD_INTERVAL === 0) {
|
|
92
|
-
await
|
|
93
|
+
await yieldToEventLoop()
|
|
93
94
|
if (context.signal?.aborted) return
|
|
94
95
|
}
|
|
95
96
|
allRows.push(row)
|
|
96
97
|
}
|
|
97
98
|
|
|
98
|
-
// Group rows by GROUP BY keys
|
|
99
|
+
// Group rows by GROUP BY keys.
|
|
100
|
+
// Each chunk dispatches all per-row key evaluations in parallel so
|
|
101
|
+
// async cells (e.g. lazy parquet decode) overlap; the await is at the
|
|
102
|
+
// chunk boundary. Synchronous cells stay cheap because we skip the
|
|
103
|
+
// inner Promise.all wrapper when there's a single GROUP BY expression.
|
|
99
104
|
/** @type {Map<any, AsyncRow[]>} */
|
|
100
105
|
const groups = new Map()
|
|
106
|
+
const { groupBy } = plan
|
|
107
|
+
const singleKey = groupBy.length === 1
|
|
108
|
+
const singleExpr = singleKey ? groupBy[0] : null
|
|
101
109
|
|
|
102
110
|
for (let chunkStart = 0; chunkStart < allRows.length; chunkStart += YIELD_INTERVAL) {
|
|
103
111
|
if (chunkStart > 0) {
|
|
104
|
-
await
|
|
112
|
+
await yieldToEventLoop()
|
|
105
113
|
if (context.signal?.aborted) return
|
|
106
114
|
}
|
|
107
115
|
const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, allRows.length)
|
|
108
|
-
const
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
116
|
+
const chunkLen = chunkEnd - chunkStart
|
|
117
|
+
/** @type {Promise<any>[]} */
|
|
118
|
+
const pending = new Array(chunkLen)
|
|
119
|
+
if (singleKey) {
|
|
120
|
+
for (let j = 0; j < chunkLen; j++) {
|
|
121
|
+
pending[j] = evaluateExpr({ node: singleExpr, row: allRows[chunkStart + j], context })
|
|
122
|
+
}
|
|
123
|
+
} else {
|
|
124
|
+
for (let j = 0; j < chunkLen; j++) {
|
|
125
|
+
const row = allRows[chunkStart + j]
|
|
126
|
+
pending[j] = Promise.all(groupBy.map(expr => evaluateExpr({ node: expr, row, context })))
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const chunkKeys = await Promise.all(pending)
|
|
130
|
+
for (let j = 0; j < chunkLen; j++) {
|
|
131
|
+
const key = singleKey ? keyify(chunkKeys[j]) : keyify(...chunkKeys[j])
|
|
115
132
|
const row = allRows[chunkStart + j]
|
|
116
133
|
let group = groups.get(key)
|
|
117
134
|
if (!group) {
|
|
@@ -189,7 +206,7 @@ export function executeScalarAggregate(plan, context) {
|
|
|
189
206
|
let collectCount = 0
|
|
190
207
|
for await (const row of child.rows()) {
|
|
191
208
|
if (++collectCount % YIELD_INTERVAL === 0) {
|
|
192
|
-
await
|
|
209
|
+
await yieldToEventLoop()
|
|
193
210
|
if (context.signal?.aborted) return
|
|
194
211
|
}
|
|
195
212
|
group.push(row)
|
package/src/execute/execute.js
CHANGED
|
@@ -10,6 +10,7 @@ import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from '.
|
|
|
10
10
|
import { executeSort } from './sort.js'
|
|
11
11
|
import { addBounds, minBounds, stableRowKey } from './utils.js'
|
|
12
12
|
import { executeWindow } from './window.js'
|
|
13
|
+
import { yieldToEventLoop } from './yield.js'
|
|
13
14
|
|
|
14
15
|
/**
|
|
15
16
|
* @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, ExecuteSqlOptions, ExprNode, IdentifierNode, QueryResults, SelectColumn, SqlPrimitive, Statement } from '../types.js'
|
|
@@ -417,7 +418,7 @@ async function* filterRows(rows, condition, context, limit) {
|
|
|
417
418
|
for await (const row of rows) {
|
|
418
419
|
if (context.signal?.aborted) return
|
|
419
420
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
420
|
-
await
|
|
421
|
+
await yieldToEventLoop()
|
|
421
422
|
if (context.signal?.aborted) return
|
|
422
423
|
}
|
|
423
424
|
rowIndex++
|
|
@@ -463,7 +464,7 @@ async function* limitRows(rows, limit = Infinity, offset = 0, signal) {
|
|
|
463
464
|
for await (const row of rows) {
|
|
464
465
|
if (signal?.aborted) return
|
|
465
466
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
466
|
-
await
|
|
467
|
+
await yieldToEventLoop()
|
|
467
468
|
if (signal?.aborted) return
|
|
468
469
|
}
|
|
469
470
|
if (skipped < offset) {
|
|
@@ -518,7 +519,7 @@ function executeProject(plan, context) {
|
|
|
518
519
|
for await (const row of child.rows()) {
|
|
519
520
|
if (context.signal?.aborted) return
|
|
520
521
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
521
|
-
await
|
|
522
|
+
await yieldToEventLoop()
|
|
522
523
|
if (context.signal?.aborted) return
|
|
523
524
|
}
|
|
524
525
|
rowIndex++
|
|
@@ -611,7 +612,7 @@ function executeDistinct(plan, context) {
|
|
|
611
612
|
for await (const row of child.rows()) {
|
|
612
613
|
if (signal?.aborted) return
|
|
613
614
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
614
|
-
await
|
|
615
|
+
await yieldToEventLoop()
|
|
615
616
|
if (signal?.aborted) return
|
|
616
617
|
}
|
|
617
618
|
buffer.push(row)
|
|
@@ -698,7 +699,7 @@ function executeSetOperation(plan, context) {
|
|
|
698
699
|
for await (const row of left.rows()) {
|
|
699
700
|
if (signal?.aborted) return
|
|
700
701
|
if (++count % YIELD_INTERVAL === 0) {
|
|
701
|
-
await
|
|
702
|
+
await yieldToEventLoop()
|
|
702
703
|
if (signal?.aborted) return
|
|
703
704
|
}
|
|
704
705
|
const key = await stableRowKey(row)
|
|
@@ -710,7 +711,7 @@ function executeSetOperation(plan, context) {
|
|
|
710
711
|
for await (const row of right.rows()) {
|
|
711
712
|
if (signal?.aborted) return
|
|
712
713
|
if (++count % YIELD_INTERVAL === 0) {
|
|
713
|
-
await
|
|
714
|
+
await yieldToEventLoop()
|
|
714
715
|
if (signal?.aborted) return
|
|
715
716
|
}
|
|
716
717
|
const key = await stableRowKey(row)
|
|
@@ -736,7 +737,7 @@ function executeSetOperation(plan, context) {
|
|
|
736
737
|
for await (const row of right.rows()) {
|
|
737
738
|
if (signal?.aborted) return
|
|
738
739
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
739
|
-
await
|
|
740
|
+
await yieldToEventLoop()
|
|
740
741
|
if (signal?.aborted) return
|
|
741
742
|
}
|
|
742
743
|
const key = await stableRowKey(row)
|
|
@@ -748,7 +749,7 @@ function executeSetOperation(plan, context) {
|
|
|
748
749
|
for await (const row of left.rows()) {
|
|
749
750
|
if (signal?.aborted) return
|
|
750
751
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
751
|
-
await
|
|
752
|
+
await yieldToEventLoop()
|
|
752
753
|
if (signal?.aborted) return
|
|
753
754
|
}
|
|
754
755
|
const key = await stableRowKey(row)
|
|
@@ -764,7 +765,7 @@ function executeSetOperation(plan, context) {
|
|
|
764
765
|
for await (const row of left.rows()) {
|
|
765
766
|
if (signal?.aborted) return
|
|
766
767
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
767
|
-
await
|
|
768
|
+
await yieldToEventLoop()
|
|
768
769
|
if (signal?.aborted) return
|
|
769
770
|
}
|
|
770
771
|
const key = await stableRowKey(row)
|
|
@@ -791,7 +792,7 @@ function executeSetOperation(plan, context) {
|
|
|
791
792
|
for await (const row of right.rows()) {
|
|
792
793
|
if (signal?.aborted) return
|
|
793
794
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
794
|
-
await
|
|
795
|
+
await yieldToEventLoop()
|
|
795
796
|
if (signal?.aborted) return
|
|
796
797
|
}
|
|
797
798
|
const key = await stableRowKey(row)
|
|
@@ -803,7 +804,7 @@ function executeSetOperation(plan, context) {
|
|
|
803
804
|
for await (const row of left.rows()) {
|
|
804
805
|
if (signal?.aborted) return
|
|
805
806
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
806
|
-
await
|
|
807
|
+
await yieldToEventLoop()
|
|
807
808
|
if (signal?.aborted) return
|
|
808
809
|
}
|
|
809
810
|
const key = await stableRowKey(row)
|
|
@@ -820,7 +821,7 @@ function executeSetOperation(plan, context) {
|
|
|
820
821
|
for await (const row of left.rows()) {
|
|
821
822
|
if (signal?.aborted) return
|
|
822
823
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
823
|
-
await
|
|
824
|
+
await yieldToEventLoop()
|
|
824
825
|
if (signal?.aborted) return
|
|
825
826
|
}
|
|
826
827
|
const key = await stableRowKey(row)
|
package/src/execute/join.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { evaluateExpr } from '../expression/evaluate.js'
|
|
2
2
|
import { keyify, maxBounds } from './utils.js'
|
|
3
3
|
import { executePlan } from './execute.js'
|
|
4
|
+
import { yieldToEventLoop } from './yield.js'
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* @import { AsyncCells, AsyncRow, ExecuteContext, QueryResults } from '../types.js'
|
|
@@ -56,7 +57,7 @@ export function executeNestedLoopJoin(plan, context) {
|
|
|
56
57
|
|
|
57
58
|
for (const rightRow of rightRows) {
|
|
58
59
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
59
|
-
await
|
|
60
|
+
await yieldToEventLoop()
|
|
60
61
|
if (context.signal?.aborted) return
|
|
61
62
|
}
|
|
62
63
|
const tempMerged = mergeRows(leftRow, rightRow, leftTable, rightTable)
|
|
@@ -263,7 +264,7 @@ export function executeHashJoin(plan, context) {
|
|
|
263
264
|
if (candidates?.length) {
|
|
264
265
|
for (const rightRow of candidates) {
|
|
265
266
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
266
|
-
await
|
|
267
|
+
await yieldToEventLoop()
|
|
267
268
|
if (context.signal?.aborted) return
|
|
268
269
|
}
|
|
269
270
|
const merged = mergeRows(leftRow, rightRow, leftTable, rightTable)
|
package/src/execute/window.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { evaluateExpr } from '../expression/evaluate.js'
|
|
2
2
|
import { executePlan } from './execute.js'
|
|
3
3
|
import { compareForTerm, keyify } from './utils.js'
|
|
4
|
+
import { yieldToEventLoop } from './yield.js'
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
|
|
@@ -40,7 +41,7 @@ export function executeWindow(plan, context) {
|
|
|
40
41
|
let i = 0
|
|
41
42
|
for await (const row of child.rows()) {
|
|
42
43
|
if (++i % YIELD_INTERVAL === 0) {
|
|
43
|
-
await
|
|
44
|
+
await yieldToEventLoop()
|
|
44
45
|
if (context.signal?.aborted) return
|
|
45
46
|
}
|
|
46
47
|
const cells = { ...row.cells }
|
|
@@ -67,7 +68,7 @@ export function executeWindow(plan, context) {
|
|
|
67
68
|
let collectCount = 0
|
|
68
69
|
for await (const row of child.rows()) {
|
|
69
70
|
if (++collectCount % YIELD_INTERVAL === 0) {
|
|
70
|
-
await
|
|
71
|
+
await yieldToEventLoop()
|
|
71
72
|
if (context.signal?.aborted) return
|
|
72
73
|
}
|
|
73
74
|
rows.push(row)
|
|
@@ -86,7 +87,7 @@ export function executeWindow(plan, context) {
|
|
|
86
87
|
let emitCount = 0
|
|
87
88
|
for (let i = 0; i < rows.length; i++) {
|
|
88
89
|
if (++emitCount % YIELD_INTERVAL === 0) {
|
|
89
|
-
await
|
|
90
|
+
await yieldToEventLoop()
|
|
90
91
|
if (context.signal?.aborted) return
|
|
91
92
|
}
|
|
92
93
|
const row = rows[i]
|
|
@@ -120,7 +121,7 @@ async function computeWindow(spec, rows, output, context) {
|
|
|
120
121
|
const partitions = new Map()
|
|
121
122
|
for (let chunkStart = 0; chunkStart < rows.length; chunkStart += YIELD_INTERVAL) {
|
|
122
123
|
if (chunkStart > 0) {
|
|
123
|
-
await
|
|
124
|
+
await yieldToEventLoop()
|
|
124
125
|
if (context.signal?.aborted) return
|
|
125
126
|
}
|
|
126
127
|
const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, rows.length)
|
|
@@ -151,7 +152,7 @@ async function computeWindow(spec, rows, output, context) {
|
|
|
151
152
|
const entries = new Array(bucket.length)
|
|
152
153
|
for (let chunkStart = 0; chunkStart < bucket.length; chunkStart += YIELD_INTERVAL) {
|
|
153
154
|
if (chunkStart > 0) {
|
|
154
|
-
await
|
|
155
|
+
await yieldToEventLoop()
|
|
155
156
|
if (context.signal?.aborted) return
|
|
156
157
|
}
|
|
157
158
|
const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, bucket.length)
|
|
@@ -203,7 +204,7 @@ async function applyWindowFunction(spec, ordered, rows, output, context) {
|
|
|
203
204
|
let tick = 0
|
|
204
205
|
for (let k = 0; k < ordered.length; k++) {
|
|
205
206
|
if (++tick % YIELD_INTERVAL === 0) {
|
|
206
|
-
await
|
|
207
|
+
await yieldToEventLoop()
|
|
207
208
|
if (context.signal?.aborted) return
|
|
208
209
|
}
|
|
209
210
|
const idx = ordered[k]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
// Yield to the event loop so that timer-based aborts can fire.
|
|
2
|
+
//
|
|
3
|
+
// In Node, setTimeout(fn, 0) is clamped to a 1ms minimum, so a tight loop
|
|
4
|
+
// that yields every few thousand iterations can spend hundreds of ms in
|
|
5
|
+
// scheduling overhead alone. setImmediate (Node) and MessageChannel
|
|
6
|
+
// (browsers) provide the same macrotask boundary at a fraction of the cost.
|
|
7
|
+
//
|
|
8
|
+
// We need a macrotask boundary (not just a microtask) because the abort
|
|
9
|
+
// timer itself is a macrotask; microtasks alone cannot let it fire.
|
|
10
|
+
|
|
11
|
+
/** @type {() => Promise<void>} */
|
|
12
|
+
export const yieldToEventLoop = (() => {
|
|
13
|
+
if (typeof setImmediate === 'function') {
|
|
14
|
+
return () => new Promise(resolve => setImmediate(resolve))
|
|
15
|
+
}
|
|
16
|
+
if (typeof MessageChannel !== 'undefined') {
|
|
17
|
+
const channel = new MessageChannel()
|
|
18
|
+
/** @type {Array<() => void>} */
|
|
19
|
+
const queue = []
|
|
20
|
+
channel.port1.onmessage = () => {
|
|
21
|
+
const resolve = queue.shift()
|
|
22
|
+
if (resolve) resolve()
|
|
23
|
+
}
|
|
24
|
+
return () => new Promise(resolve => {
|
|
25
|
+
queue.push(resolve)
|
|
26
|
+
channel.port2.postMessage(0)
|
|
27
|
+
})
|
|
28
|
+
}
|
|
29
|
+
return () => new Promise(resolve => setTimeout(resolve, 0))
|
|
30
|
+
})()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { executeStatement } from '../execute/execute.js'
|
|
2
2
|
import { isPlainObject, keyify, sqlEquals, stringify } from '../execute/utils.js'
|
|
3
|
+
import { yieldToEventLoop } from '../execute/yield.js'
|
|
3
4
|
import { ArgValueError, ExecutionError } from '../validation/executionErrors.js'
|
|
4
5
|
import { isAggregateFunc, isMathFunc, isRegexpFunc, isSpatialFunc, isStringFunc } from '../validation/functions.js'
|
|
5
6
|
import { UnknownFunctionError } from '../validation/parseErrors.js'
|
|
@@ -31,16 +32,21 @@ const YIELD_INTERVAL = 4000
|
|
|
31
32
|
async function evaluateAll(node, rows, context) {
|
|
32
33
|
/** @type {SqlPrimitive[]} */
|
|
33
34
|
const results = new Array(rows.length)
|
|
35
|
+
/** @type {Promise<SqlPrimitive>[]} */
|
|
36
|
+
const pending = new Array(Math.min(YIELD_INTERVAL, rows.length))
|
|
34
37
|
for (let i = 0; i < rows.length; i += YIELD_INTERVAL) {
|
|
35
38
|
if (i > 0) {
|
|
36
|
-
await
|
|
39
|
+
await yieldToEventLoop()
|
|
37
40
|
context.signal?.throwIfAborted()
|
|
38
41
|
}
|
|
39
42
|
const end = Math.min(i + YIELD_INTERVAL, rows.length)
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
)
|
|
43
|
-
|
|
43
|
+
const chunkLen = end - i
|
|
44
|
+
pending.length = chunkLen
|
|
45
|
+
for (let j = 0; j < chunkLen; j++) {
|
|
46
|
+
pending[j] = evaluateExpr({ node, row: rows[i + j], context })
|
|
47
|
+
}
|
|
48
|
+
const chunk = await Promise.all(pending)
|
|
49
|
+
for (let j = 0; j < chunkLen; j++) results[i + j] = chunk[j]
|
|
44
50
|
}
|
|
45
51
|
return results
|
|
46
52
|
}
|
|
@@ -326,7 +332,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
326
332
|
return values[lower] + (values[upper] - values[lower]) * (pos - lower)
|
|
327
333
|
}
|
|
328
334
|
|
|
329
|
-
if (funcName === 'JSON_ARRAYAGG' || funcName === 'ARRAY_AGG') {
|
|
335
|
+
if (funcName === 'JSON_ARRAYAGG' || funcName === 'ARRAY_AGG' || funcName === 'LIST') {
|
|
330
336
|
const allValues = await evaluateAll(argNode, filteredRows, context)
|
|
331
337
|
if (node.distinct) {
|
|
332
338
|
/** @type {SqlPrimitive[]} */
|
|
@@ -510,6 +516,25 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
510
516
|
return typeof value
|
|
511
517
|
}
|
|
512
518
|
|
|
519
|
+
if (funcName === 'JSON_KEYS') {
|
|
520
|
+
let value = args[0]
|
|
521
|
+
if (value == null) return null
|
|
522
|
+
if (typeof value === 'string') {
|
|
523
|
+
try {
|
|
524
|
+
value = JSON.parse(value)
|
|
525
|
+
} catch {
|
|
526
|
+
throw new ArgValueError({
|
|
527
|
+
...node,
|
|
528
|
+
message: 'invalid JSON string',
|
|
529
|
+
hint: 'Argument must be valid JSON.',
|
|
530
|
+
rowIndex,
|
|
531
|
+
})
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
if (typeof value !== 'object' || value === null || Array.isArray(value) || value instanceof Date) return null
|
|
535
|
+
return Object.keys(value)
|
|
536
|
+
}
|
|
537
|
+
|
|
513
538
|
if (funcName === 'JSON_ARRAY_LENGTH') {
|
|
514
539
|
let arr = args[0]
|
|
515
540
|
if (arr == null) return null
|
|
@@ -703,7 +728,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
703
728
|
let innerCount = 0
|
|
704
729
|
for await (const resRow of subResult.rows()) {
|
|
705
730
|
if (++innerCount % YIELD_INTERVAL === 0) {
|
|
706
|
-
await
|
|
731
|
+
await yieldToEventLoop()
|
|
707
732
|
context.signal?.throwIfAborted()
|
|
708
733
|
}
|
|
709
734
|
const value = await resRow.cells[resRow.columns[0]]()
|
package/src/parse/joins.js
CHANGED
|
@@ -222,13 +222,26 @@ export function parseJoins(state) {
|
|
|
222
222
|
const tableTok = expect(state, 'identifier')
|
|
223
223
|
const tableAlias = parseTableAlias(state)
|
|
224
224
|
|
|
225
|
-
// Parse ON condition (not for POSITIONAL joins)
|
|
225
|
+
// Parse ON condition or USING column list (not for POSITIONAL joins)
|
|
226
226
|
/** @type {ExprNode | undefined} */
|
|
227
227
|
let condition
|
|
228
|
+
/** @type {string[] | undefined} */
|
|
229
|
+
let using
|
|
228
230
|
if (joinType !== 'POSITIONAL') {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
231
|
+
if (match(state, 'keyword', 'USING')) {
|
|
232
|
+
expect(state, 'paren', '(')
|
|
233
|
+
using = []
|
|
234
|
+
while (true) {
|
|
235
|
+
const colTok = expect(state, 'identifier')
|
|
236
|
+
using.push(colTok.value)
|
|
237
|
+
if (!match(state, 'comma')) break
|
|
238
|
+
}
|
|
239
|
+
expect(state, 'paren', ')')
|
|
240
|
+
} else {
|
|
241
|
+
expect(state, 'keyword', 'ON')
|
|
242
|
+
condition = parseExpression(state)
|
|
243
|
+
expectNoAggregate(condition, 'JOIN ON')
|
|
244
|
+
}
|
|
232
245
|
}
|
|
233
246
|
|
|
234
247
|
joins.push({
|
|
@@ -236,6 +249,7 @@ export function parseJoins(state) {
|
|
|
236
249
|
table: tableTok.value,
|
|
237
250
|
alias: tableAlias,
|
|
238
251
|
on: condition,
|
|
252
|
+
using,
|
|
239
253
|
positionStart: tok.positionStart,
|
|
240
254
|
positionEnd: tableTok.positionEnd,
|
|
241
255
|
})
|
package/src/plan/columns.js
CHANGED
|
@@ -153,6 +153,13 @@ export function extractColumns({ select, parentColumns }) {
|
|
|
153
153
|
if (sourceAlias !== undefined) visibleLateralAliases.push(sourceAlias)
|
|
154
154
|
for (const join of select.joins) {
|
|
155
155
|
collectColumnsFromExpr(join.on, identifiers)
|
|
156
|
+
// USING columns are equi-join keys on both sides; keep them in every
|
|
157
|
+
// table's needed set so projection pushdown can't prune the join key.
|
|
158
|
+
if (join.using) {
|
|
159
|
+
for (const col of join.using) {
|
|
160
|
+
for (const [, set] of perTable) set?.add(col)
|
|
161
|
+
}
|
|
162
|
+
}
|
|
156
163
|
const joinAlias = join.alias ?? join.table
|
|
157
164
|
if (join.fromFunction) {
|
|
158
165
|
/** @type {IdentifierNode[]} */
|
package/src/plan/plan.js
CHANGED
|
@@ -463,7 +463,10 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
|
|
|
463
463
|
if (join.joinType === 'POSITIONAL') {
|
|
464
464
|
plan = { type: 'PositionalJoin', leftAlias: currentLeftTable, rightAlias: rightTable, left: plan, right: rightScan }
|
|
465
465
|
} else {
|
|
466
|
-
|
|
466
|
+
// `USING (cols)` desugars to an equi-condition `left.col = right.col` per
|
|
467
|
+
// column, which routes through the hash-join path like any other ON.
|
|
468
|
+
const condition = join.on ?? (join.using && buildUsingCondition(join.using, join))
|
|
469
|
+
const keys = condition && extractEquiKeys({ condition, leftTable: currentLeftTable, rightTable })
|
|
467
470
|
if (keys) {
|
|
468
471
|
/** @type {HashJoinNode} */
|
|
469
472
|
const hashJoin = {
|
|
@@ -484,7 +487,7 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
|
|
|
484
487
|
joinType: join.joinType,
|
|
485
488
|
leftAlias: currentLeftTable,
|
|
486
489
|
rightAlias: rightTable,
|
|
487
|
-
condition
|
|
490
|
+
condition,
|
|
488
491
|
left: plan,
|
|
489
492
|
right: rightScan,
|
|
490
493
|
}
|
|
@@ -623,6 +626,37 @@ function normalizeIdentifiers(node, sourceColumns) {
|
|
|
623
626
|
return node
|
|
624
627
|
}
|
|
625
628
|
|
|
629
|
+
/**
|
|
630
|
+
* Builds the join condition for a `JOIN ... USING (cols)` clause: an AND of
|
|
631
|
+
* `col = col` equalities using unprefixed identifiers. The hash-join path
|
|
632
|
+
* evaluates the left key against the left row and the right key against the
|
|
633
|
+
* right row, so each unqualified name resolves unambiguously on its own side.
|
|
634
|
+
*
|
|
635
|
+
* @param {string[]} using - shared column names from the USING clause
|
|
636
|
+
* @param {{ positionStart: number, positionEnd: number }} pos - position info for the synthesized exprs
|
|
637
|
+
* @returns {ExprNode | undefined}
|
|
638
|
+
*/
|
|
639
|
+
function buildUsingCondition(using, pos) {
|
|
640
|
+
const { positionStart, positionEnd } = pos
|
|
641
|
+
/** @type {ExprNode | undefined} */
|
|
642
|
+
let condition
|
|
643
|
+
for (const col of using) {
|
|
644
|
+
/** @type {ExprNode} */
|
|
645
|
+
const eq = {
|
|
646
|
+
type: 'binary',
|
|
647
|
+
op: '=',
|
|
648
|
+
left: { type: 'identifier', name: col, positionStart, positionEnd },
|
|
649
|
+
right: { type: 'identifier', name: col, positionStart, positionEnd },
|
|
650
|
+
positionStart,
|
|
651
|
+
positionEnd,
|
|
652
|
+
}
|
|
653
|
+
condition = condition === undefined
|
|
654
|
+
? eq
|
|
655
|
+
: { type: 'binary', op: 'AND', left: condition, right: eq, positionStart, positionEnd }
|
|
656
|
+
}
|
|
657
|
+
return condition
|
|
658
|
+
}
|
|
659
|
+
|
|
626
660
|
/**
|
|
627
661
|
* Splits a join ON expression into equi-key pairs and a residual predicate so
|
|
628
662
|
* the planner can route AND-of-equis (with optional range/inequality
|
package/src/types.d.ts
CHANGED
|
@@ -129,7 +129,7 @@ export interface UserDefinedFunction {
|
|
|
129
129
|
arguments: FunctionSignature
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
|
|
132
|
+
export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'LIST' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
|
|
133
133
|
|
|
134
134
|
export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE' | 'REGEXP_MATCHES'
|
|
135
135
|
|
|
@@ -11,7 +11,7 @@ export const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'
|
|
|
11
11
|
* @returns {name is AggregateFunc}
|
|
12
12
|
*/
|
|
13
13
|
export function isAggregateFunc(name) {
|
|
14
|
-
return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
|
|
14
|
+
return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'LIST', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -179,8 +179,10 @@ export const FUNCTION_SIGNATURES = {
|
|
|
179
179
|
JSON_ARRAY_LENGTH: { min: 1, max: 1, signature: 'array' },
|
|
180
180
|
JSON_VALID: { min: 1, max: 1, signature: 'value' },
|
|
181
181
|
JSON_TYPE: { min: 1, max: 1, signature: 'value' },
|
|
182
|
+
JSON_KEYS: { min: 1, max: 1, signature: 'value' },
|
|
182
183
|
JSON_ARRAYAGG: { min: 1, max: 1, signature: 'expression' },
|
|
183
184
|
ARRAY_AGG: { min: 1, max: 1, signature: 'expression' },
|
|
185
|
+
LIST: { min: 1, max: 1, signature: 'expression' },
|
|
184
186
|
|
|
185
187
|
// Array functions
|
|
186
188
|
ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
|
|
@@ -3,7 +3,7 @@ export const KEYWORDS = new Set([
|
|
|
3
3
|
'HAVING', 'ORDER', 'ASC', 'DESC', 'NULLS', 'LIMIT', 'OFFSET', 'AS', 'ALL',
|
|
4
4
|
'DISTINCT', 'TRUE', 'FALSE', 'NULL', 'LIKE', 'IN', 'EXISTS', 'BETWEEN',
|
|
5
5
|
'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'INNER', 'LEFT', 'RIGHT',
|
|
6
|
-
'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
|
|
6
|
+
'FULL', 'OUTER', 'CROSS', 'POSITIONAL', 'LATERAL', 'VIEW', 'ON', 'USING', 'INTERVAL', 'DAY', 'MONTH', 'YEAR',
|
|
7
7
|
'HOUR', 'MINUTE', 'SECOND', 'FILTER', 'WITHIN',
|
|
8
8
|
'UNION', 'INTERSECT', 'EXCEPT',
|
|
9
9
|
])
|
|
@@ -17,7 +17,7 @@ export const RESERVED_KEYWORDS = new Set([
|
|
|
17
17
|
'EXISTS', 'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'INTERVAL',
|
|
18
18
|
'GROUP', 'BY', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET',
|
|
19
19
|
'AS', 'ALL', 'DISTINCT',
|
|
20
|
-
'JOIN', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'ON',
|
|
20
|
+
'JOIN', 'INNER', 'LEFT', 'RIGHT', 'FULL', 'OUTER', 'ON', 'USING',
|
|
21
21
|
'UNION', 'INTERSECT', 'EXCEPT',
|
|
22
22
|
])
|
|
23
23
|
|
|
@@ -30,6 +30,6 @@ export const RESERVED_AFTER_COLUMN = new Set([
|
|
|
30
30
|
// Keywords that cannot be used as table aliases
|
|
31
31
|
export const RESERVED_AFTER_TABLE = new Set([
|
|
32
32
|
'WHERE', 'GROUP', 'HAVING', 'ORDER', 'LIMIT', 'OFFSET', 'JOIN', 'INNER',
|
|
33
|
-
'LEFT', 'RIGHT', 'FULL', 'CROSS', 'ON', 'POSITIONAL', 'LATERAL',
|
|
33
|
+
'LEFT', 'RIGHT', 'FULL', 'CROSS', 'ON', 'USING', 'POSITIONAL', 'LATERAL',
|
|
34
34
|
'UNION', 'INTERSECT', 'EXCEPT',
|
|
35
35
|
])
|