squirreling 0.12.14 → 0.12.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/package.json +1 -1
- package/src/execute/window.js +50 -17
- package/src/expression/date.js +26 -0
- package/src/expression/evaluate.js +29 -4
- package/src/types.d.ts +1 -1
- package/src/validation/functions.js +10 -2
package/README.md
CHANGED
|
@@ -154,13 +154,14 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
154
154
|
|
|
155
155
|
### Functions
|
|
156
156
|
|
|
157
|
-
- Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `ARRAY_AGG`, `JSON_ARRAYAGG`, `STRING_AGG`
|
|
157
|
+
- Aggregate: `COUNT`, `COUNTIF`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `ARRAY_AGG`, `JSON_ARRAYAGG`, `STRING_AGG`
|
|
158
|
+
- Window: `ROW_NUMBER`, `LAG`, `LEAD`
|
|
158
159
|
- String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
|
|
159
160
|
- Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
|
|
160
161
|
- Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
|
|
161
|
-
- Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
|
|
162
|
+
- Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_DIFF`, `DATEDIFF`, `DATE_PART`, `DATE_TRUNC`, `EPOCH`, `EXTRACT`, `INTERVAL`
|
|
162
163
|
- Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
|
|
163
|
-
- Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
|
|
164
|
+
- Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_CONTAINS`, `ARRAY_SORT`, `CARDINALITY`, `SIZE`
|
|
164
165
|
- Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
|
|
165
166
|
- Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
|
|
166
167
|
- Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
|
package/package.json
CHANGED
package/src/execute/window.js
CHANGED
|
@@ -20,10 +20,13 @@ export function executeWindow(plan, context) {
|
|
|
20
20
|
const child = executePlan({ plan: plan.child, context })
|
|
21
21
|
const extraColumns = plan.windows.map(w => w.alias)
|
|
22
22
|
|
|
23
|
-
// Streaming fast path: every window is
|
|
24
|
-
//
|
|
25
|
-
//
|
|
26
|
-
|
|
23
|
+
// Streaming fast path: every window is a positional function (e.g.
|
|
24
|
+
// ROW_NUMBER) with OVER () — no partition/order — so each row's output
|
|
25
|
+
// depends only on its index in the input stream. Avoids buffering, which
|
|
26
|
+
// matters for large scans (e.g. parquet).
|
|
27
|
+
const streamable = plan.windows.every(w =>
|
|
28
|
+
w.funcName === 'ROW_NUMBER' && w.partitionBy.length === 0 && w.orderBy.length === 0
|
|
29
|
+
)
|
|
27
30
|
|
|
28
31
|
if (streamable) {
|
|
29
32
|
return {
|
|
@@ -37,7 +40,7 @@ export function executeWindow(plan, context) {
|
|
|
37
40
|
i++
|
|
38
41
|
const cells = { ...row.cells }
|
|
39
42
|
for (const w of plan.windows) {
|
|
40
|
-
const value =
|
|
43
|
+
const value = i
|
|
41
44
|
cells[w.alias] = () => Promise.resolve(value)
|
|
42
45
|
}
|
|
43
46
|
yield {
|
|
@@ -119,6 +122,8 @@ async function computeWindow(spec, rows, output, context) {
|
|
|
119
122
|
if (context.signal?.aborted) return
|
|
120
123
|
|
|
121
124
|
// Order within the partition. Empty ORDER BY → input order.
|
|
125
|
+
/** @type {number[]} */
|
|
126
|
+
let ordered
|
|
122
127
|
if (spec.orderBy.length) {
|
|
123
128
|
const orderValues = await Promise.all(bucket.map(idx =>
|
|
124
129
|
Promise.all(spec.orderBy.map(term => evaluateExpr({ node: term.expr, row: rows[idx], context })))
|
|
@@ -132,23 +137,51 @@ async function computeWindow(spec, rows, output, context) {
|
|
|
132
137
|
}
|
|
133
138
|
return a.pos - b.pos
|
|
134
139
|
})
|
|
135
|
-
|
|
136
|
-
output[entries[k].idx] = assignRowNumber(spec.funcName, k)
|
|
137
|
-
}
|
|
140
|
+
ordered = entries.map(e => e.idx)
|
|
138
141
|
} else {
|
|
139
|
-
|
|
140
|
-
output[bucket[k]] = assignRowNumber(spec.funcName, k)
|
|
141
|
-
}
|
|
142
|
+
ordered = bucket
|
|
142
143
|
}
|
|
144
|
+
|
|
145
|
+
await applyWindowFunction(spec, ordered, rows, output, context)
|
|
143
146
|
}
|
|
144
147
|
}
|
|
145
148
|
|
|
146
149
|
/**
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
* @
|
|
150
|
+
* Computes window function values for a single partition's rows in order.
|
|
151
|
+
*
|
|
152
|
+
* @param {WindowSpec} spec
|
|
153
|
+
* @param {number[]} ordered - row indices in window order
|
|
154
|
+
* @param {AsyncRow[]} rows
|
|
155
|
+
* @param {SqlPrimitive[]} output
|
|
156
|
+
* @param {ExecuteContext} context
|
|
150
157
|
*/
|
|
151
|
-
function
|
|
152
|
-
if (funcName === 'ROW_NUMBER')
|
|
153
|
-
|
|
158
|
+
async function applyWindowFunction(spec, ordered, rows, output, context) {
|
|
159
|
+
if (spec.funcName === 'ROW_NUMBER') {
|
|
160
|
+
for (let k = 0; k < ordered.length; k++) {
|
|
161
|
+
output[ordered[k]] = k + 1
|
|
162
|
+
}
|
|
163
|
+
return
|
|
164
|
+
}
|
|
165
|
+
if (spec.funcName === 'LAG' || spec.funcName === 'LEAD') {
|
|
166
|
+
const direction = spec.funcName === 'LAG' ? -1 : 1
|
|
167
|
+
const [valueExpr, offsetExpr, defaultExpr] = spec.args
|
|
168
|
+
for (let k = 0; k < ordered.length; k++) {
|
|
169
|
+
if (context.signal?.aborted) return
|
|
170
|
+
const idx = ordered[k]
|
|
171
|
+
const row = rows[idx]
|
|
172
|
+
const offset = offsetExpr
|
|
173
|
+
? Number(await evaluateExpr({ node: offsetExpr, row, context }))
|
|
174
|
+
: 1
|
|
175
|
+
const target = k + direction * offset
|
|
176
|
+
if (target >= 0 && target < ordered.length) {
|
|
177
|
+
output[idx] = await evaluateExpr({ node: valueExpr, row: rows[ordered[target]], context })
|
|
178
|
+
} else if (defaultExpr) {
|
|
179
|
+
output[idx] = await evaluateExpr({ node: defaultExpr, row, context })
|
|
180
|
+
} else {
|
|
181
|
+
output[idx] = null
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return
|
|
185
|
+
}
|
|
186
|
+
throw new Error(`Unsupported window function: ${spec.funcName}`)
|
|
154
187
|
}
|
package/src/expression/date.js
CHANGED
|
@@ -100,6 +100,32 @@ export function extractField(field, dateVal) {
|
|
|
100
100
|
return null
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
+
/**
|
|
104
|
+
* Compute the number of unit boundaries between two dates (end - start).
|
|
105
|
+
* @param {SqlPrimitive} unit
|
|
106
|
+
* @param {SqlPrimitive} startVal
|
|
107
|
+
* @param {SqlPrimitive} endVal
|
|
108
|
+
* @returns {number | null}
|
|
109
|
+
*/
|
|
110
|
+
export function dateDiff(unit, startVal, endVal) {
|
|
111
|
+
if (unit == null || startVal == null || endVal == null) return null
|
|
112
|
+
const start = toDate(startVal)
|
|
113
|
+
const end = toDate(endVal)
|
|
114
|
+
if (start == null || end == null) return null
|
|
115
|
+
|
|
116
|
+
const u = String(unit).toUpperCase()
|
|
117
|
+
if (u === 'YEAR') return end.getUTCFullYear() - start.getUTCFullYear()
|
|
118
|
+
if (u === 'MONTH') {
|
|
119
|
+
return (end.getUTCFullYear() - start.getUTCFullYear()) * 12 + (end.getUTCMonth() - start.getUTCMonth())
|
|
120
|
+
}
|
|
121
|
+
const ms = end.getTime() - start.getTime()
|
|
122
|
+
if (u === 'DAY') return Math.trunc(ms / 86400000)
|
|
123
|
+
if (u === 'HOUR') return Math.trunc(ms / 3600000)
|
|
124
|
+
if (u === 'MINUTE') return Math.trunc(ms / 60000)
|
|
125
|
+
if (u === 'SECOND') return Math.trunc(ms / 1000)
|
|
126
|
+
return null
|
|
127
|
+
}
|
|
128
|
+
|
|
103
129
|
/**
|
|
104
130
|
* @param {SqlPrimitive} val
|
|
105
131
|
* @returns {Date | null}
|
|
@@ -6,7 +6,7 @@ import { UnknownFunctionError } from '../validation/parseErrors.js'
|
|
|
6
6
|
import { ColumnNotFoundError } from '../validation/tables.js'
|
|
7
7
|
import { derivedAlias } from './alias.js'
|
|
8
8
|
import { applyBinaryOp } from './binary.js'
|
|
9
|
-
import { applyIntervalToDate, dateTrunc, extractField } from './date.js'
|
|
9
|
+
import { applyIntervalToDate, dateDiff, dateTrunc, extractField } from './date.js'
|
|
10
10
|
import { evaluateMathFunc } from './math.js'
|
|
11
11
|
import { evaluateRegexpFunc } from './regexp.js'
|
|
12
12
|
import { evaluateSpatialFunc } from '../spatial/spatial.js'
|
|
@@ -208,6 +208,17 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
208
208
|
return count
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
+
if (funcName === 'COUNTIF') {
|
|
212
|
+
const values = await Promise.all(filteredRows.map(row =>
|
|
213
|
+
evaluateExpr({ node: argNode, row, context })
|
|
214
|
+
))
|
|
215
|
+
let count = 0
|
|
216
|
+
for (const v of values) {
|
|
217
|
+
if (v) count++
|
|
218
|
+
}
|
|
219
|
+
return count
|
|
220
|
+
}
|
|
221
|
+
|
|
211
222
|
if (funcName === 'SUM' || funcName === 'AVG' || funcName === 'MIN' || funcName === 'MAX') {
|
|
212
223
|
const rawValues = await Promise.all(filteredRows.map(row =>
|
|
213
224
|
evaluateExpr({ node: argNode, row, context })
|
|
@@ -406,6 +417,14 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
406
417
|
return extractField(args[0], args[1])
|
|
407
418
|
}
|
|
408
419
|
|
|
420
|
+
if (funcName === 'EPOCH') {
|
|
421
|
+
return extractField('EPOCH', args[0])
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
if (funcName === 'DATE_DIFF' || funcName === 'DATEDIFF') {
|
|
425
|
+
return dateDiff(args[0], args[1], args[2])
|
|
426
|
+
}
|
|
427
|
+
|
|
409
428
|
if (funcName === 'CURRENT_DATE') {
|
|
410
429
|
return new Date().toISOString().split('T')[0]
|
|
411
430
|
}
|
|
@@ -497,7 +516,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
497
516
|
return arr.length
|
|
498
517
|
}
|
|
499
518
|
|
|
500
|
-
if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY') {
|
|
519
|
+
if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY' || funcName === 'SIZE') {
|
|
501
520
|
const arr = args[0]
|
|
502
521
|
if (!Array.isArray(arr)) return null
|
|
503
522
|
if (funcName === 'ARRAY_LENGTH' && args.length === 2) {
|
|
@@ -527,6 +546,12 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
527
546
|
return index === -1 ? null : index + 1
|
|
528
547
|
}
|
|
529
548
|
|
|
549
|
+
if (funcName === 'ARRAY_CONTAINS') {
|
|
550
|
+
const [arr, target] = args
|
|
551
|
+
if (!Array.isArray(arr)) return null
|
|
552
|
+
return arr.includes(target)
|
|
553
|
+
}
|
|
554
|
+
|
|
530
555
|
if (funcName === 'ARRAY_SORT') {
|
|
531
556
|
const arr = args[0]
|
|
532
557
|
if (!Array.isArray(arr)) return null
|
|
@@ -612,8 +637,8 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
|
|
|
612
637
|
if (typeof val === 'object') return stringify(val)
|
|
613
638
|
return String(val)
|
|
614
639
|
}
|
|
615
|
-
// Can only cast primitives to other primitive types
|
|
616
|
-
if (typeof val === 'object') {
|
|
640
|
+
// Can only cast primitives (and Dates) to other primitive types
|
|
641
|
+
if (typeof val === 'object' && !(val instanceof Date)) {
|
|
617
642
|
throw new ExecutionError({ message: `Cannot CAST object to ${toType}`, rowIndex, ...node })
|
|
618
643
|
}
|
|
619
644
|
if (toType === 'INTEGER' || toType === 'INT') {
|
package/src/types.d.ts
CHANGED
|
@@ -129,7 +129,7 @@ export interface UserDefinedFunction {
|
|
|
129
129
|
arguments: FunctionSignature
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
export type AggregateFunc = 'COUNT' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
|
|
132
|
+
export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
|
|
133
133
|
|
|
134
134
|
export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE' | 'REGEXP_MATCHES'
|
|
135
135
|
|
|
@@ -11,7 +11,7 @@ export const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'
|
|
|
11
11
|
* @returns {name is AggregateFunc}
|
|
12
12
|
*/
|
|
13
13
|
export function isAggregateFunc(name) {
|
|
14
|
-
return ['COUNT', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
|
|
14
|
+
return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -31,7 +31,7 @@ export function isMathFunc(name) {
|
|
|
31
31
|
* @returns {boolean}
|
|
32
32
|
*/
|
|
33
33
|
export function isWindowFunc(name) {
|
|
34
|
-
return ['ROW_NUMBER'].includes(name)
|
|
34
|
+
return ['ROW_NUMBER', 'LAG', 'LEAD'].includes(name)
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
/**
|
|
@@ -140,7 +140,10 @@ export const FUNCTION_SIGNATURES = {
|
|
|
140
140
|
CURRENT_TIMESTAMP: { min: 0, max: 0, signature: '' },
|
|
141
141
|
DATE_TRUNC: { min: 2, max: 2, signature: 'unit, date' },
|
|
142
142
|
DATE_PART: { min: 2, max: 2, signature: 'field, date' },
|
|
143
|
+
DATE_DIFF: { min: 3, max: 3, signature: 'unit, start, end' },
|
|
144
|
+
DATEDIFF: { min: 3, max: 3, signature: 'unit, start, end' },
|
|
143
145
|
EXTRACT: { min: 2, max: 2, signature: 'field FROM date' },
|
|
146
|
+
EPOCH: { min: 1, max: 1, signature: 'date' },
|
|
144
147
|
|
|
145
148
|
// Math functions
|
|
146
149
|
FLOOR: { min: 1, max: 1, signature: 'number' },
|
|
@@ -182,8 +185,10 @@ export const FUNCTION_SIGNATURES = {
|
|
|
182
185
|
// Array functions
|
|
183
186
|
ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
|
|
184
187
|
ARRAY_POSITION: { min: 2, max: 2, signature: 'array, element' },
|
|
188
|
+
ARRAY_CONTAINS: { min: 2, max: 2, signature: 'array, element' },
|
|
185
189
|
ARRAY_SORT: { min: 1, max: 1, signature: 'array' },
|
|
186
190
|
CARDINALITY: { min: 1, max: 1, signature: 'array' },
|
|
191
|
+
SIZE: { min: 1, max: 1, signature: 'array' },
|
|
187
192
|
|
|
188
193
|
// Table functions (used in FROM clause)
|
|
189
194
|
UNNEST: { min: 1, max: 1, signature: 'array' },
|
|
@@ -198,6 +203,7 @@ export const FUNCTION_SIGNATURES = {
|
|
|
198
203
|
|
|
199
204
|
// Aggregate functions
|
|
200
205
|
COUNT: { min: 1, max: 1, signature: 'expression' },
|
|
206
|
+
COUNTIF: { min: 1, max: 1, signature: 'condition' },
|
|
201
207
|
SUM: { min: 1, max: 1, signature: 'expression' },
|
|
202
208
|
AVG: { min: 1, max: 1, signature: 'expression' },
|
|
203
209
|
MIN: { min: 1, max: 1, signature: 'expression' },
|
|
@@ -211,6 +217,8 @@ export const FUNCTION_SIGNATURES = {
|
|
|
211
217
|
|
|
212
218
|
// Window functions
|
|
213
219
|
ROW_NUMBER: { min: 0, max: 0, signature: '' },
|
|
220
|
+
LAG: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
|
|
221
|
+
LEAD: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
|
|
214
222
|
|
|
215
223
|
// Spatial functions
|
|
216
224
|
ST_INTERSECTS: { min: 2, max: 2, signature: 'geometry, geometry' },
|