squirreling 0.12.14 → 0.12.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -154,13 +154,14 @@ Squirreling mostly follows the SQL standard. The following features are supporte
154
154
 
155
155
  ### Functions
156
156
 
157
- - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `ARRAY_AGG`, `JSON_ARRAYAGG`, `STRING_AGG`
157
+ - Aggregate: `COUNT`, `COUNTIF`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `ARRAY_AGG`, `JSON_ARRAYAGG`, `STRING_AGG`
158
+ - Window: `ROW_NUMBER`, `LAG`, `LEAD`
158
159
  - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
159
160
  - Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
160
161
  - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
161
- - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
162
+ - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_DIFF`, `DATEDIFF`, `DATE_PART`, `DATE_TRUNC`, `EPOCH`, `EXTRACT`, `INTERVAL`
162
163
  - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
163
- - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_SORT`, `CARDINALITY`
164
+ - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_CONTAINS`, `ARRAY_SORT`, `CARDINALITY`, `SIZE`
164
165
  - Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
165
166
  - Regex: `REGEXP_SUBSTR`, `REGEXP_EXTRACT`, `REGEXP_REPLACE`, `REGEXP_MATCHES`
166
167
  - Spatial: `ST_GeomFromText`, `ST_MakeEnvelope`, `ST_AsText`, `ST_Intersects`, `ST_Contains`, `ST_ContainsProperly`, `ST_Within`, `ST_Overlaps`, `ST_Touches`, `ST_Equals`, `ST_Crosses`, `ST_Covers`, `ST_CoveredBy`, `ST_DWithin`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.14",
3
+ "version": "0.12.16",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -20,10 +20,13 @@ export function executeWindow(plan, context) {
20
20
  const child = executePlan({ plan: plan.child, context })
21
21
  const extraColumns = plan.windows.map(w => w.alias)
22
22
 
23
- // Streaming fast path: every window is OVER () with no partition/order, so
24
- // each row's output depends only on its position in the input stream. Avoids
25
- // buffering critical for large scans (e.g. parquet).
26
- const streamable = plan.windows.every(w => w.partitionBy.length === 0 && w.orderBy.length === 0)
23
+ // Streaming fast path: every window is a positional function (e.g.
24
+ // ROW_NUMBER) with OVER () no partition/order so each row's output
25
+ // depends only on its index in the input stream. Avoids buffering, which
26
+ // matters for large scans (e.g. parquet).
27
+ const streamable = plan.windows.every(w =>
28
+ w.funcName === 'ROW_NUMBER' && w.partitionBy.length === 0 && w.orderBy.length === 0
29
+ )
27
30
 
28
31
  if (streamable) {
29
32
  return {
@@ -37,7 +40,7 @@ export function executeWindow(plan, context) {
37
40
  i++
38
41
  const cells = { ...row.cells }
39
42
  for (const w of plan.windows) {
40
- const value = assignRowNumber(w.funcName, i - 1)
43
+ const value = i
41
44
  cells[w.alias] = () => Promise.resolve(value)
42
45
  }
43
46
  yield {
@@ -119,6 +122,8 @@ async function computeWindow(spec, rows, output, context) {
119
122
  if (context.signal?.aborted) return
120
123
 
121
124
  // Order within the partition. Empty ORDER BY → input order.
125
+ /** @type {number[]} */
126
+ let ordered
122
127
  if (spec.orderBy.length) {
123
128
  const orderValues = await Promise.all(bucket.map(idx =>
124
129
  Promise.all(spec.orderBy.map(term => evaluateExpr({ node: term.expr, row: rows[idx], context })))
@@ -132,23 +137,51 @@ async function computeWindow(spec, rows, output, context) {
132
137
  }
133
138
  return a.pos - b.pos
134
139
  })
135
- for (let k = 0; k < entries.length; k++) {
136
- output[entries[k].idx] = assignRowNumber(spec.funcName, k)
137
- }
140
+ ordered = entries.map(e => e.idx)
138
141
  } else {
139
- for (let k = 0; k < bucket.length; k++) {
140
- output[bucket[k]] = assignRowNumber(spec.funcName, k)
141
- }
142
+ ordered = bucket
142
143
  }
144
+
145
+ await applyWindowFunction(spec, ordered, rows, output, context)
143
146
  }
144
147
  }
145
148
 
146
149
  /**
147
- * @param {string} funcName
148
- * @param {number} rank - 0-based rank within the partition
149
- * @returns {SqlPrimitive}
150
+ * Computes window function values for a single partition's rows in order.
151
+ *
152
+ * @param {WindowSpec} spec
153
+ * @param {number[]} ordered - row indices in window order
154
+ * @param {AsyncRow[]} rows
155
+ * @param {SqlPrimitive[]} output
156
+ * @param {ExecuteContext} context
150
157
  */
151
- function assignRowNumber(funcName, rank) {
152
- if (funcName === 'ROW_NUMBER') return rank + 1
153
- throw new Error(`Unsupported window function: ${funcName}`)
158
+ async function applyWindowFunction(spec, ordered, rows, output, context) {
159
+ if (spec.funcName === 'ROW_NUMBER') {
160
+ for (let k = 0; k < ordered.length; k++) {
161
+ output[ordered[k]] = k + 1
162
+ }
163
+ return
164
+ }
165
+ if (spec.funcName === 'LAG' || spec.funcName === 'LEAD') {
166
+ const direction = spec.funcName === 'LAG' ? -1 : 1
167
+ const [valueExpr, offsetExpr, defaultExpr] = spec.args
168
+ for (let k = 0; k < ordered.length; k++) {
169
+ if (context.signal?.aborted) return
170
+ const idx = ordered[k]
171
+ const row = rows[idx]
172
+ const offset = offsetExpr
173
+ ? Number(await evaluateExpr({ node: offsetExpr, row, context }))
174
+ : 1
175
+ const target = k + direction * offset
176
+ if (target >= 0 && target < ordered.length) {
177
+ output[idx] = await evaluateExpr({ node: valueExpr, row: rows[ordered[target]], context })
178
+ } else if (defaultExpr) {
179
+ output[idx] = await evaluateExpr({ node: defaultExpr, row, context })
180
+ } else {
181
+ output[idx] = null
182
+ }
183
+ }
184
+ return
185
+ }
186
+ throw new Error(`Unsupported window function: ${spec.funcName}`)
154
187
  }
@@ -100,6 +100,32 @@ export function extractField(field, dateVal) {
100
100
  return null
101
101
  }
102
102
 
103
+ /**
104
+ * Compute the number of unit boundaries between two dates (end - start).
105
+ * @param {SqlPrimitive} unit
106
+ * @param {SqlPrimitive} startVal
107
+ * @param {SqlPrimitive} endVal
108
+ * @returns {number | null}
109
+ */
110
+ export function dateDiff(unit, startVal, endVal) {
111
+ if (unit == null || startVal == null || endVal == null) return null
112
+ const start = toDate(startVal)
113
+ const end = toDate(endVal)
114
+ if (start == null || end == null) return null
115
+
116
+ const u = String(unit).toUpperCase()
117
+ if (u === 'YEAR') return end.getUTCFullYear() - start.getUTCFullYear()
118
+ if (u === 'MONTH') {
119
+ return (end.getUTCFullYear() - start.getUTCFullYear()) * 12 + (end.getUTCMonth() - start.getUTCMonth())
120
+ }
121
+ const ms = end.getTime() - start.getTime()
122
+ if (u === 'DAY') return Math.trunc(ms / 86400000)
123
+ if (u === 'HOUR') return Math.trunc(ms / 3600000)
124
+ if (u === 'MINUTE') return Math.trunc(ms / 60000)
125
+ if (u === 'SECOND') return Math.trunc(ms / 1000)
126
+ return null
127
+ }
128
+
103
129
  /**
104
130
  * @param {SqlPrimitive} val
105
131
  * @returns {Date | null}
@@ -6,7 +6,7 @@ import { UnknownFunctionError } from '../validation/parseErrors.js'
6
6
  import { ColumnNotFoundError } from '../validation/tables.js'
7
7
  import { derivedAlias } from './alias.js'
8
8
  import { applyBinaryOp } from './binary.js'
9
- import { applyIntervalToDate, dateTrunc, extractField } from './date.js'
9
+ import { applyIntervalToDate, dateDiff, dateTrunc, extractField } from './date.js'
10
10
  import { evaluateMathFunc } from './math.js'
11
11
  import { evaluateRegexpFunc } from './regexp.js'
12
12
  import { evaluateSpatialFunc } from '../spatial/spatial.js'
@@ -208,6 +208,17 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
208
208
  return count
209
209
  }
210
210
 
211
+ if (funcName === 'COUNTIF') {
212
+ const values = await Promise.all(filteredRows.map(row =>
213
+ evaluateExpr({ node: argNode, row, context })
214
+ ))
215
+ let count = 0
216
+ for (const v of values) {
217
+ if (v) count++
218
+ }
219
+ return count
220
+ }
221
+
211
222
  if (funcName === 'SUM' || funcName === 'AVG' || funcName === 'MIN' || funcName === 'MAX') {
212
223
  const rawValues = await Promise.all(filteredRows.map(row =>
213
224
  evaluateExpr({ node: argNode, row, context })
@@ -406,6 +417,14 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
406
417
  return extractField(args[0], args[1])
407
418
  }
408
419
 
420
+ if (funcName === 'EPOCH') {
421
+ return extractField('EPOCH', args[0])
422
+ }
423
+
424
+ if (funcName === 'DATE_DIFF' || funcName === 'DATEDIFF') {
425
+ return dateDiff(args[0], args[1], args[2])
426
+ }
427
+
409
428
  if (funcName === 'CURRENT_DATE') {
410
429
  return new Date().toISOString().split('T')[0]
411
430
  }
@@ -497,7 +516,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
497
516
  return arr.length
498
517
  }
499
518
 
500
- if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY') {
519
+ if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY' || funcName === 'SIZE') {
501
520
  const arr = args[0]
502
521
  if (!Array.isArray(arr)) return null
503
522
  if (funcName === 'ARRAY_LENGTH' && args.length === 2) {
@@ -527,6 +546,12 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
527
546
  return index === -1 ? null : index + 1
528
547
  }
529
548
 
549
+ if (funcName === 'ARRAY_CONTAINS') {
550
+ const [arr, target] = args
551
+ if (!Array.isArray(arr)) return null
552
+ return arr.includes(target)
553
+ }
554
+
530
555
  if (funcName === 'ARRAY_SORT') {
531
556
  const arr = args[0]
532
557
  if (!Array.isArray(arr)) return null
@@ -612,8 +637,8 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
612
637
  if (typeof val === 'object') return stringify(val)
613
638
  return String(val)
614
639
  }
615
- // Can only cast primitives to other primitive types
616
- if (typeof val === 'object') {
640
+ // Can only cast primitives (and Dates) to other primitive types
641
+ if (typeof val === 'object' && !(val instanceof Date)) {
617
642
  throw new ExecutionError({ message: `Cannot CAST object to ${toType}`, rowIndex, ...node })
618
643
  }
619
644
  if (toType === 'INTEGER' || toType === 'INT') {
package/src/types.d.ts CHANGED
@@ -129,7 +129,7 @@ export interface UserDefinedFunction {
129
129
  arguments: FunctionSignature
130
130
  }
131
131
 
132
- export type AggregateFunc = 'COUNT' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
132
+ export type AggregateFunc = 'COUNT' | 'COUNTIF' | 'SUM' | 'AVG' | 'MIN' | 'MAX' | 'ARRAY_AGG' | 'JSON_ARRAYAGG' | 'STDDEV_SAMP' | 'STDDEV_POP' | 'MEDIAN' | 'PERCENTILE_CONT' | 'APPROX_QUANTILE' | 'STRING_AGG'
133
133
 
134
134
  export type RegExpFunction = 'REGEXP_SUBSTR' | 'REGEXP_EXTRACT' | 'REGEXP_REPLACE' | 'REGEXP_MATCHES'
135
135
 
@@ -11,7 +11,7 @@ export const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'
11
11
  * @returns {name is AggregateFunc}
12
12
  */
13
13
  export function isAggregateFunc(name) {
14
- return ['COUNT', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
14
+ return ['COUNT', 'COUNTIF', 'SUM', 'AVG', 'MIN', 'MAX', 'ARRAY_AGG', 'JSON_ARRAYAGG', 'STDDEV_SAMP', 'STDDEV_POP', 'MEDIAN', 'PERCENTILE_CONT', 'APPROX_QUANTILE', 'STRING_AGG'].includes(name)
15
15
  }
16
16
 
17
17
  /**
@@ -31,7 +31,7 @@ export function isMathFunc(name) {
31
31
  * @returns {boolean}
32
32
  */
33
33
  export function isWindowFunc(name) {
34
- return ['ROW_NUMBER'].includes(name)
34
+ return ['ROW_NUMBER', 'LAG', 'LEAD'].includes(name)
35
35
  }
36
36
 
37
37
  /**
@@ -140,7 +140,10 @@ export const FUNCTION_SIGNATURES = {
140
140
  CURRENT_TIMESTAMP: { min: 0, max: 0, signature: '' },
141
141
  DATE_TRUNC: { min: 2, max: 2, signature: 'unit, date' },
142
142
  DATE_PART: { min: 2, max: 2, signature: 'field, date' },
143
+ DATE_DIFF: { min: 3, max: 3, signature: 'unit, start, end' },
144
+ DATEDIFF: { min: 3, max: 3, signature: 'unit, start, end' },
143
145
  EXTRACT: { min: 2, max: 2, signature: 'field FROM date' },
146
+ EPOCH: { min: 1, max: 1, signature: 'date' },
144
147
 
145
148
  // Math functions
146
149
  FLOOR: { min: 1, max: 1, signature: 'number' },
@@ -182,8 +185,10 @@ export const FUNCTION_SIGNATURES = {
182
185
  // Array functions
183
186
  ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
184
187
  ARRAY_POSITION: { min: 2, max: 2, signature: 'array, element' },
188
+ ARRAY_CONTAINS: { min: 2, max: 2, signature: 'array, element' },
185
189
  ARRAY_SORT: { min: 1, max: 1, signature: 'array' },
186
190
  CARDINALITY: { min: 1, max: 1, signature: 'array' },
191
+ SIZE: { min: 1, max: 1, signature: 'array' },
187
192
 
188
193
  // Table functions (used in FROM clause)
189
194
  UNNEST: { min: 1, max: 1, signature: 'array' },
@@ -198,6 +203,7 @@ export const FUNCTION_SIGNATURES = {
198
203
 
199
204
  // Aggregate functions
200
205
  COUNT: { min: 1, max: 1, signature: 'expression' },
206
+ COUNTIF: { min: 1, max: 1, signature: 'condition' },
201
207
  SUM: { min: 1, max: 1, signature: 'expression' },
202
208
  AVG: { min: 1, max: 1, signature: 'expression' },
203
209
  MIN: { min: 1, max: 1, signature: 'expression' },
@@ -211,6 +217,8 @@ export const FUNCTION_SIGNATURES = {
211
217
 
212
218
  // Window functions
213
219
  ROW_NUMBER: { min: 0, max: 0, signature: '' },
220
+ LAG: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
221
+ LEAD: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
214
222
 
215
223
  // Spatial functions
216
224
  ST_INTERSECTS: { min: 2, max: 2, signature: 'geometry, geometry' },