squirreling 0.12.15 → 0.12.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -155,10 +155,11 @@ Squirreling mostly follows the SQL standard. The following features are supporte
155
155
  ### Functions
156
156
 
157
157
  - Aggregate: `COUNT`, `COUNTIF`, `SUM`, `AVG`, `MIN`, `MAX`, `MEDIAN`, `PERCENTILE_CONT`, `APPROX_QUANTILE`, `STDDEV_POP`, `STDDEV_SAMP`, `ARRAY_AGG`, `JSON_ARRAYAGG`, `STRING_AGG`
158
+ - Window: `ROW_NUMBER`, `LAG`, `LEAD`
158
159
  - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`, `POSITION`, `STRPOS`
159
160
  - Math: `ABS`, `SIGN`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
160
161
  - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
161
- - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_PART`, `DATE_TRUNC`, `EXTRACT`, `INTERVAL`
162
+ - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `DATE_DIFF`, `DATEDIFF`, `DATE_PART`, `DATE_TRUNC`, `EPOCH`, `EXTRACT`, `INTERVAL`
162
163
  - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_EXTRACT`, `JSON_OBJECT`, `JSON_ARRAY_LENGTH`, `JSON_VALID`, `JSON_TYPE`
163
164
  - Array: `ARRAY_LENGTH`, `ARRAY_POSITION`, `ARRAY_CONTAINS`, `ARRAY_SORT`, `CARDINALITY`, `SIZE`
164
165
  - Table functions: `UNNEST`, `EXPLODE`, `JSON_EACH`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.15",
3
+ "version": "0.12.17",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -381,7 +381,11 @@ function computeScanRows(tableNumRows, limit, offset) {
381
381
  */
382
382
  async function* filterRows(rows, condition, context, limit) {
383
383
  const MAX_CHUNK = 256
384
- let chunkSize = limit ?? Infinity
384
+ // Without a LIMIT hint, evaluate row-by-row to preserve streaming.
385
+ // With a LIMIT hint, batch in growing chunks to parallelize async cell
386
+ // evaluation across rows that may be discarded anyway.
387
+ let chunkSize = limit ?? 1
388
+ const grow = limit !== undefined
385
389
  let rowIndex = 0
386
390
 
387
391
  /** @type {{ row: AsyncRow, rowIndex: number }[]} */
@@ -400,7 +404,7 @@ async function* filterRows(rows, condition, context, limit) {
400
404
  if (results[i]) yield buffer[i].row
401
405
  }
402
406
  buffer = []
403
- chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
407
+ if (grow) chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
404
408
  }
405
409
  }
406
410
 
@@ -20,10 +20,13 @@ export function executeWindow(plan, context) {
20
20
  const child = executePlan({ plan: plan.child, context })
21
21
  const extraColumns = plan.windows.map(w => w.alias)
22
22
 
23
- // Streaming fast path: every window is OVER () with no partition/order, so
24
- // each row's output depends only on its position in the input stream. Avoids
25
- // buffering critical for large scans (e.g. parquet).
26
- const streamable = plan.windows.every(w => w.partitionBy.length === 0 && w.orderBy.length === 0)
23
+ // Streaming fast path: every window is a positional function (e.g.
24
+ // ROW_NUMBER) with OVER () no partition/order so each row's output
25
+ // depends only on its index in the input stream. Avoids buffering, which
26
+ // matters for large scans (e.g. parquet).
27
+ const streamable = plan.windows.every(w =>
28
+ w.funcName === 'ROW_NUMBER' && w.partitionBy.length === 0 && w.orderBy.length === 0
29
+ )
27
30
 
28
31
  if (streamable) {
29
32
  return {
@@ -37,7 +40,7 @@ export function executeWindow(plan, context) {
37
40
  i++
38
41
  const cells = { ...row.cells }
39
42
  for (const w of plan.windows) {
40
- const value = assignRowNumber(w.funcName, i - 1)
43
+ const value = i
41
44
  cells[w.alias] = () => Promise.resolve(value)
42
45
  }
43
46
  yield {
@@ -119,6 +122,8 @@ async function computeWindow(spec, rows, output, context) {
119
122
  if (context.signal?.aborted) return
120
123
 
121
124
  // Order within the partition. Empty ORDER BY → input order.
125
+ /** @type {number[]} */
126
+ let ordered
122
127
  if (spec.orderBy.length) {
123
128
  const orderValues = await Promise.all(bucket.map(idx =>
124
129
  Promise.all(spec.orderBy.map(term => evaluateExpr({ node: term.expr, row: rows[idx], context })))
@@ -132,23 +137,51 @@ async function computeWindow(spec, rows, output, context) {
132
137
  }
133
138
  return a.pos - b.pos
134
139
  })
135
- for (let k = 0; k < entries.length; k++) {
136
- output[entries[k].idx] = assignRowNumber(spec.funcName, k)
137
- }
140
+ ordered = entries.map(e => e.idx)
138
141
  } else {
139
- for (let k = 0; k < bucket.length; k++) {
140
- output[bucket[k]] = assignRowNumber(spec.funcName, k)
141
- }
142
+ ordered = bucket
142
143
  }
144
+
145
+ await applyWindowFunction(spec, ordered, rows, output, context)
143
146
  }
144
147
  }
145
148
 
146
149
  /**
147
- * @param {string} funcName
148
- * @param {number} rank - 0-based rank within the partition
149
- * @returns {SqlPrimitive}
150
+ * Computes window function values for a single partition's rows in order.
151
+ *
152
+ * @param {WindowSpec} spec
153
+ * @param {number[]} ordered - row indices in window order
154
+ * @param {AsyncRow[]} rows
155
+ * @param {SqlPrimitive[]} output
156
+ * @param {ExecuteContext} context
150
157
  */
151
- function assignRowNumber(funcName, rank) {
152
- if (funcName === 'ROW_NUMBER') return rank + 1
153
- throw new Error(`Unsupported window function: ${funcName}`)
158
+ async function applyWindowFunction(spec, ordered, rows, output, context) {
159
+ if (spec.funcName === 'ROW_NUMBER') {
160
+ for (let k = 0; k < ordered.length; k++) {
161
+ output[ordered[k]] = k + 1
162
+ }
163
+ return
164
+ }
165
+ if (spec.funcName === 'LAG' || spec.funcName === 'LEAD') {
166
+ const direction = spec.funcName === 'LAG' ? -1 : 1
167
+ const [valueExpr, offsetExpr, defaultExpr] = spec.args
168
+ for (let k = 0; k < ordered.length; k++) {
169
+ if (context.signal?.aborted) return
170
+ const idx = ordered[k]
171
+ const row = rows[idx]
172
+ const offset = offsetExpr
173
+ ? Number(await evaluateExpr({ node: offsetExpr, row, context }))
174
+ : 1
175
+ const target = k + direction * offset
176
+ if (target >= 0 && target < ordered.length) {
177
+ output[idx] = await evaluateExpr({ node: valueExpr, row: rows[ordered[target]], context })
178
+ } else if (defaultExpr) {
179
+ output[idx] = await evaluateExpr({ node: defaultExpr, row, context })
180
+ } else {
181
+ output[idx] = null
182
+ }
183
+ }
184
+ return
185
+ }
186
+ throw new Error(`Unsupported window function: ${spec.funcName}`)
154
187
  }
@@ -100,6 +100,32 @@ export function extractField(field, dateVal) {
100
100
  return null
101
101
  }
102
102
 
103
+ /**
104
+ * Compute the number of unit boundaries between two dates (end - start).
105
+ * @param {SqlPrimitive} unit
106
+ * @param {SqlPrimitive} startVal
107
+ * @param {SqlPrimitive} endVal
108
+ * @returns {number | null}
109
+ */
110
+ export function dateDiff(unit, startVal, endVal) {
111
+ if (unit == null || startVal == null || endVal == null) return null
112
+ const start = toDate(startVal)
113
+ const end = toDate(endVal)
114
+ if (start == null || end == null) return null
115
+
116
+ const u = String(unit).toUpperCase()
117
+ if (u === 'YEAR') return end.getUTCFullYear() - start.getUTCFullYear()
118
+ if (u === 'MONTH') {
119
+ return (end.getUTCFullYear() - start.getUTCFullYear()) * 12 + (end.getUTCMonth() - start.getUTCMonth())
120
+ }
121
+ const ms = end.getTime() - start.getTime()
122
+ if (u === 'DAY') return Math.trunc(ms / 86400000)
123
+ if (u === 'HOUR') return Math.trunc(ms / 3600000)
124
+ if (u === 'MINUTE') return Math.trunc(ms / 60000)
125
+ if (u === 'SECOND') return Math.trunc(ms / 1000)
126
+ return null
127
+ }
128
+
103
129
  /**
104
130
  * @param {SqlPrimitive} val
105
131
  * @returns {Date | null}
@@ -6,7 +6,7 @@ import { UnknownFunctionError } from '../validation/parseErrors.js'
6
6
  import { ColumnNotFoundError } from '../validation/tables.js'
7
7
  import { derivedAlias } from './alias.js'
8
8
  import { applyBinaryOp } from './binary.js'
9
- import { applyIntervalToDate, dateTrunc, extractField } from './date.js'
9
+ import { applyIntervalToDate, dateDiff, dateTrunc, extractField } from './date.js'
10
10
  import { evaluateMathFunc } from './math.js'
11
11
  import { evaluateRegexpFunc } from './regexp.js'
12
12
  import { evaluateSpatialFunc } from '../spatial/spatial.js'
@@ -417,6 +417,14 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
417
417
  return extractField(args[0], args[1])
418
418
  }
419
419
 
420
+ if (funcName === 'EPOCH') {
421
+ return extractField('EPOCH', args[0])
422
+ }
423
+
424
+ if (funcName === 'DATE_DIFF' || funcName === 'DATEDIFF') {
425
+ return dateDiff(args[0], args[1], args[2])
426
+ }
427
+
420
428
  if (funcName === 'CURRENT_DATE') {
421
429
  return new Date().toISOString().split('T')[0]
422
430
  }
@@ -629,8 +637,8 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
629
637
  if (typeof val === 'object') return stringify(val)
630
638
  return String(val)
631
639
  }
632
- // Can only cast primitives to other primitive types
633
- if (typeof val === 'object') {
640
+ // Can only cast primitives (and Dates) to other primitive types
641
+ if (typeof val === 'object' && !(val instanceof Date)) {
634
642
  throw new ExecutionError({ message: `Cannot CAST object to ${toType}`, rowIndex, ...node })
635
643
  }
636
644
  if (toType === 'INTEGER' || toType === 'INT') {
@@ -31,7 +31,7 @@ export function isMathFunc(name) {
31
31
  * @returns {boolean}
32
32
  */
33
33
  export function isWindowFunc(name) {
34
- return ['ROW_NUMBER'].includes(name)
34
+ return ['ROW_NUMBER', 'LAG', 'LEAD'].includes(name)
35
35
  }
36
36
 
37
37
  /**
@@ -140,7 +140,10 @@ export const FUNCTION_SIGNATURES = {
140
140
  CURRENT_TIMESTAMP: { min: 0, max: 0, signature: '' },
141
141
  DATE_TRUNC: { min: 2, max: 2, signature: 'unit, date' },
142
142
  DATE_PART: { min: 2, max: 2, signature: 'field, date' },
143
+ DATE_DIFF: { min: 3, max: 3, signature: 'unit, start, end' },
144
+ DATEDIFF: { min: 3, max: 3, signature: 'unit, start, end' },
143
145
  EXTRACT: { min: 2, max: 2, signature: 'field FROM date' },
146
+ EPOCH: { min: 1, max: 1, signature: 'date' },
144
147
 
145
148
  // Math functions
146
149
  FLOOR: { min: 1, max: 1, signature: 'number' },
@@ -214,6 +217,8 @@ export const FUNCTION_SIGNATURES = {
214
217
 
215
218
  // Window functions
216
219
  ROW_NUMBER: { min: 0, max: 0, signature: '' },
220
+ LAG: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
221
+ LEAD: { min: 1, max: 3, signature: 'value[, offset[, default]]' },
217
222
 
218
223
  // Spatial functions
219
224
  ST_INTERSECTS: { min: 2, max: 2, signature: 'geometry, geometry' },