squirreling 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,8 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
19
19
  - Lets you move query execution closer to your users
20
20
  - Supports standard SQL queries
21
21
  - Async streaming for large datasets
22
+ - Native javascript Promises, AsyncGenerators, AbortSignals
23
+ - Async user-defined functions (UDFs)
22
24
  - Constant memory usage for simple queries with LIMIT
23
25
  - Robust error handling and validation designed for LLM tool use
24
26
  - In-memory data option for simple use cases
@@ -27,12 +29,12 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
27
29
 
28
30
  ## Usage
29
31
 
30
- Squirreling returns an async generator, allowing you to process rows one at a time without loading everything into memory.
32
+ Squirreling returns an AsyncGenerator of AsyncRows, allowing you to process rows one at a time without loading everything into memory. AsyncRows are made up of AsyncCells, allowing for late materialization of values.
31
33
 
32
34
  ```typescript
33
35
  import { executeSql } from 'squirreling'
34
36
 
35
- // In-memory table
37
+ // Input table (in-memory for this example)
36
38
  const users = [
37
39
  { id: 1, name: 'Alice', active: true },
38
40
  { id: 2, name: 'Bob', active: false },
@@ -40,35 +42,37 @@ const users = [
40
42
  // ...more rows
41
43
  ]
42
44
 
45
+ // Squirreling return types
43
46
  interface AsyncRow {
44
47
  columns: string[]
45
48
  cells: Record<string, AsyncCell>
46
49
  }
47
50
  type AsyncCell = () => Promise<SqlPrimitive>
48
51
 
49
- // Returns an async iterable of rows with async cells
52
+ // Returns an AsyncIterable of rows with async cell loading
50
53
  const asyncRows: AsyncIterable<AsyncRow> = executeSql({
51
54
  tables: { users },
52
- query: 'SELECT count(*) as cnt FROM users WHERE active = TRUE LIMIT 10',
55
+ query: 'SELECT * FROM users',
53
56
  })
54
57
 
55
58
  // Process rows as they arrive (streaming)
56
- for await (const { cnt } of asyncRows) {
57
- console.log('Count', await cnt())
59
+ for await (const { id, name } of asyncRows) {
60
+ console.log(`User id=${await id()}, name=${await name()}`)
58
61
  }
59
62
  ```
60
63
 
61
- There is an exported helper function `collect` to gather all rows into an array if needed:
64
+ Squirreling exports a helper function `collect` to gather all rows into an array:
62
65
 
63
66
  ```javascript
64
67
  import { collect, executeSql } from 'squirreling'
65
68
 
66
69
  // Collect all rows and cells into a materialized array
67
- const allUsers: Record<string, SqlPrimitive>[] = await collect(executeSql({
70
+ const rows: Record<string, SqlPrimitive>[] = await collect(executeSql({
68
71
  tables: { users },
69
- query: 'SELECT * FROM users',
72
+ query: 'SELECT active, count(*) as cnt FROM users GROUP BY active',
70
73
  }))
71
- console.log(allUsers)
74
+ console.log(`Collected rows:`, rows)
75
+ // Collected rows: [ { active: true, cnt: 2 }, { active: false, cnt: 1 } ]
72
76
  ```
73
77
 
74
78
  ## Supported SQL Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.6.0",
3
+ "version": "0.7.0",
4
4
  "description": "Squirreling SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -37,11 +37,11 @@
37
37
  "test": "vitest run"
38
38
  },
39
39
  "devDependencies": {
40
- "@types/node": "24.10.2",
41
- "@vitest/coverage-v8": "4.0.15",
42
- "eslint": "9.39.1",
40
+ "@types/node": "24.10.4",
41
+ "@vitest/coverage-v8": "4.0.16",
42
+ "eslint": "9.39.2",
43
43
  "eslint-plugin-jsdoc": "61.5.0",
44
44
  "typescript": "5.9.3",
45
- "vitest": "4.0.15"
45
+ "vitest": "4.0.16"
46
46
  }
47
47
  }
@@ -1,8 +1,7 @@
1
1
  /**
2
- * @import { AsyncCell, AsyncCells, AsyncDataSource, AsyncRow, SqlPrimitive } from '../types.js'
2
+ * @import { AsyncCell, AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
-
6
5
  /**
7
6
  * Wraps an async generator of plain objects into an AsyncDataSource
8
7
  *
@@ -11,8 +10,11 @@
11
10
  */
12
11
  export function generatorSource(gen) {
13
12
  return {
14
- async *scan() {
15
- yield* gen
13
+ async *scan({ signal }) {
14
+ for await (const row of gen) {
15
+ if (signal?.aborted) break
16
+ yield row
17
+ }
16
18
  },
17
19
  }
18
20
  }
@@ -40,8 +42,9 @@ function asyncRow(obj) {
40
42
  */
41
43
  export function memorySource(data) {
42
44
  return {
43
- async *scan() {
45
+ async *scan({ signal }) {
44
46
  for (const item of data) {
47
+ if (signal?.aborted) break
45
48
  yield asyncRow(item)
46
49
  }
47
50
  },
@@ -58,11 +61,14 @@ export function cachedDataSource(source) {
58
61
  const cache = new Map()
59
62
  return {
60
63
  /**
64
+ * @param {ScanOptions} options
61
65
  * @yields {AsyncRow}
62
66
  */
63
- async *scan() {
67
+ async *scan(options) {
68
+ const { signal } = options
64
69
  let index = 0
65
- for await (const row of source.scan()) {
70
+ for await (const row of source.scan(options)) {
71
+ if (signal?.aborted) break
66
72
  const rowIndex = index
67
73
  /** @type {AsyncCells} */
68
74
  const cells = {}
@@ -1,7 +1,46 @@
1
+ import { isAggregateFunc } from '../validation.js'
2
+
1
3
  /**
2
4
  * @import { ExprNode, SelectStatement, SelectColumn } from '../types.js'
3
5
  */
4
6
 
7
+ /**
8
+ * Checks if an expression contains any aggregate function calls
9
+ *
10
+ * @param {ExprNode | undefined} expr
11
+ * @returns {boolean}
12
+ */
13
+ export function containsAggregate(expr) {
14
+ if (!expr) return false
15
+ if (expr.type === 'function' && isAggregateFunc(expr.name.toUpperCase())) {
16
+ return true
17
+ }
18
+ if (expr.type === 'binary') {
19
+ return containsAggregate(expr.left) || containsAggregate(expr.right)
20
+ }
21
+ if (expr.type === 'unary') {
22
+ return containsAggregate(expr.argument)
23
+ }
24
+ if (expr.type === 'cast') {
25
+ return containsAggregate(expr.expr)
26
+ }
27
+ if (expr.type === 'case') {
28
+ if (expr.caseExpr && containsAggregate(expr.caseExpr)) return true
29
+ for (const when of expr.whenClauses) {
30
+ if (containsAggregate(when.condition) || containsAggregate(when.result)) return true
31
+ }
32
+ if (containsAggregate(expr.elseResult)) return true
33
+ }
34
+ if (expr.type === 'in valuelist') {
35
+ if (containsAggregate(expr.expr)) return true
36
+ for (const val of expr.values) {
37
+ if (containsAggregate(val)) return true
38
+ }
39
+ }
40
+ // Note: Don't recurse into subqueries - they have their own aggregate scope
41
+ return false
42
+ }
43
+
5
44
  /**
6
45
  * Extracts column names needed from a SELECT statement.
7
46
  *
@@ -50,11 +89,6 @@ export function extractColumns(select) {
50
89
  function collectColumnsFromSelectColumn(col, columns) {
51
90
  if (col.kind === 'derived') {
52
91
  collectColumnsFromExpr(col.expr, columns)
53
- } else if (col.kind === 'aggregate') {
54
- if (col.arg.kind === 'expression') {
55
- collectColumnsFromExpr(col.arg.expr, columns)
56
- }
57
- // 'star' aggregate (COUNT(*)) doesn't reference specific columns
58
92
  }
59
93
  // 'star' columns handled separately (returns undefined for all columns)
60
94
  }
@@ -2,15 +2,14 @@ import { missingClauseError } from '../parseErrors.js'
2
2
  import { tableNotFoundError, unsupportedOperationError } from '../executionErrors.js'
3
3
  import { generatorSource, memorySource } from '../backend/dataSource.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
- import { defaultAggregateAlias, evaluateAggregate } from './aggregates.js'
6
- import { extractColumns } from './columns.js'
5
+ import { containsAggregate, extractColumns } from './columns.js'
7
6
  import { evaluateExpr } from './expression.js'
8
7
  import { evaluateHavingExpr } from './having.js'
9
8
  import { executeJoins } from './join.js'
10
9
  import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
11
10
 
12
11
  /**
13
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive } from '../types.js'
12
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive, UserDefinedFunction } from '../types.js'
14
13
  */
15
14
 
16
15
  /**
@@ -19,8 +18,8 @@ import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
19
18
  * @param {ExecuteSqlOptions} options - the execution options
20
19
  * @yields {AsyncRow} async generator yielding result rows
21
20
  */
22
- export async function* executeSql({ tables, query }) {
23
- const select = typeof query === 'string' ? parseSql(query) : query
21
+ export async function* executeSql({ tables, query, functions, signal }) {
22
+ const select = typeof query === 'string' ? parseSql({ query, functions }) : query
24
23
 
25
24
  // Check for unsupported operations
26
25
  if (!select.from) {
@@ -41,41 +40,44 @@ export async function* executeSql({ tables, query }) {
41
40
  }
42
41
  }
43
42
 
44
- yield* executeSelect(select, normalizedTables)
43
+ yield* executeSelect({ select, tables: normalizedTables, functions, signal })
45
44
  }
46
45
 
47
46
  /**
48
47
  * Executes a SELECT query against the provided tables
49
48
  *
50
- * @param {SelectStatement} select
51
- * @param {Record<string, AsyncDataSource>} tables
49
+ * @param {Object} options
50
+ * @param {SelectStatement} options.select
51
+ * @param {Record<string, AsyncDataSource>} options.tables
52
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
53
+ * @param {AbortSignal} [options.signal]
52
54
  * @yields {AsyncRow}
53
55
  */
54
- export async function* executeSelect(select, tables) {
56
+ export async function* executeSelect({ select, tables, functions, signal }) {
55
57
  /** @type {AsyncDataSource} */
56
58
  let dataSource
57
59
  /** @type {string} */
58
- let fromTableName
60
+ let leftTable
59
61
 
60
62
  if (select.from.kind === 'table') {
61
63
  // Use alias for column prefixing, but look up the actual table name
62
- fromTableName = select.from.alias ?? select.from.table
64
+ leftTable = select.from.alias ?? select.from.table
63
65
  dataSource = tables[select.from.table]
64
66
  if (dataSource === undefined) {
65
67
  throw tableNotFoundError({ tableName: select.from.table })
66
68
  }
67
69
  } else {
68
70
  // Nested subquery - recursively resolve
69
- fromTableName = select.from.alias
70
- dataSource = generatorSource(executeSelect(select.from.query, tables))
71
+ leftTable = select.from.alias
72
+ dataSource = generatorSource(executeSelect({ select: select.from.query, tables, functions, signal }))
71
73
  }
72
74
 
73
75
  // Execute JOINs if present
74
76
  if (select.joins.length) {
75
- dataSource = await executeJoins(dataSource, select.joins, fromTableName, tables)
77
+ dataSource = await executeJoins({ leftSource: dataSource, joins: select.joins, leftTable, tables, functions })
76
78
  }
77
79
 
78
- yield* evaluateSelectAst(select, dataSource, tables)
80
+ yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
79
81
  }
80
82
 
81
83
  /**
@@ -116,17 +118,20 @@ async function applyDistinct(rows, distinct) {
116
118
  }
117
119
  return result
118
120
  }
121
+
119
122
  /**
120
123
  * Applies ORDER BY sorting to rows using multi-pass lazy evaluation.
121
124
  * Secondary ORDER BY columns are only evaluated for rows that tie on
122
125
  * previous columns, reducing expensive cell evaluations.
123
126
  *
124
- * @param {AsyncRow[]} rows - the input rows
125
- * @param {OrderByItem[]} orderBy - the sort specifications
126
- * @param {Record<string, AsyncDataSource>} tables
127
+ * @param {Object} options
128
+ * @param {AsyncRow[]} options.rows - the input rows
129
+ * @param {OrderByItem[]} options.orderBy - the sort specifications
130
+ * @param {Record<string, AsyncDataSource>} options.tables
131
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
127
132
  * @returns {Promise<AsyncRow[]>} the sorted rows
128
133
  */
129
- async function sortRows(rows, orderBy, tables) {
134
+ async function sortRows({ rows, orderBy, tables, functions }) {
130
135
  if (!orderBy.length) return rows
131
136
 
132
137
  // Cache for evaluated values: evaluatedValues[rowIdx][colIdx]
@@ -157,6 +162,7 @@ async function sortRows(rows, orderBy, tables) {
157
162
  node: term.expr,
158
163
  row: rows[idx],
159
164
  tables,
165
+ functions,
160
166
  })
161
167
  }
162
168
  }
@@ -204,24 +210,27 @@ async function sortRows(rows, orderBy, tables) {
204
210
  /**
205
211
  * Evaluates a select with a resolved FROM data source
206
212
  *
207
- * @param {SelectStatement} select
208
- * @param {AsyncDataSource} dataSource
209
- * @param {Record<string, AsyncDataSource>} tables
213
+ * @param {Object} options
214
+ * @param {SelectStatement} options.select
215
+ * @param {AsyncDataSource} options.dataSource
216
+ * @param {Record<string, AsyncDataSource>} options.tables
217
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
218
+ * @param {AbortSignal} [options.signal]
210
219
  * @yields {AsyncRow}
211
220
  */
212
- async function* evaluateSelectAst(select, dataSource, tables) {
221
+ async function* evaluateSelectAst({ select, dataSource, tables, functions, signal }) {
213
222
  // SQL priority: from, where, group by, having, select, order by, offset, limit
214
223
 
215
- const hasAggregate = select.columns.some(col => col.kind === 'aggregate')
224
+ const hasAggregate = select.columns.some(col => col.kind === 'derived' && containsAggregate(col.expr))
216
225
  const useGrouping = hasAggregate || select.groupBy.length > 0
217
226
  const needsBuffering = useGrouping || select.orderBy.length > 0
218
227
 
219
228
  if (needsBuffering) {
220
229
  // BUFFERING PATH: Collect all rows, process, then yield
221
- yield* evaluateBuffered(select, dataSource, tables, hasAggregate, useGrouping)
230
+ yield* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal })
222
231
  } else {
223
232
  // STREAMING PATH: Yield rows one by one
224
- yield* evaluateStreaming(select, dataSource, tables)
233
+ yield* evaluateStreaming({ select, dataSource, tables, functions, signal })
225
234
  }
226
235
  }
227
236
 
@@ -229,12 +238,15 @@ async function* evaluateSelectAst(select, dataSource, tables) {
229
238
  * Streaming evaluation for simple queries (no ORDER BY or GROUP BY)
230
239
  * Supports DISTINCT by tracking seen row keys without buffering full rows
231
240
  *
232
- * @param {SelectStatement} select
233
- * @param {AsyncDataSource} dataSource
234
- * @param {Record<string, AsyncDataSource>} tables
241
+ * @param {Object} options
242
+ * @param {SelectStatement} options.select
243
+ * @param {AsyncDataSource} options.dataSource
244
+ * @param {Record<string, AsyncDataSource>} options.tables
245
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
246
+ * @param {AbortSignal} [options.signal]
235
247
  * @yields {AsyncRow}
236
248
  */
237
- async function* evaluateStreaming(select, dataSource, tables) {
249
+ async function* evaluateStreaming({ select, dataSource, tables, functions, signal }) {
238
250
  let rowsYielded = 0
239
251
  let rowsSkipped = 0
240
252
  let rowIndex = 0
@@ -255,11 +267,11 @@ async function* evaluateStreaming(select, dataSource, tables) {
255
267
  offset: select.offset,
256
268
  }
257
269
 
258
- for await (const row of dataSource.scan(hints)) {
270
+ for await (const row of dataSource.scan({ hints, signal })) {
259
271
  rowIndex++
260
272
  // WHERE filter
261
273
  if (select.where) {
262
- const pass = await evaluateExpr({ node: select.where, row, tables, rowIndex })
274
+ const pass = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
263
275
  if (!pass) continue
264
276
  }
265
277
 
@@ -284,11 +296,7 @@ async function* evaluateStreaming(select, dataSource, tables) {
284
296
  } else if (col.kind === 'derived') {
285
297
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
286
298
  columns.push(alias)
287
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, rowIndex: currentRowIndex })
288
- } else if (col.kind === 'aggregate') {
289
- throw new Error(
290
- 'Aggregate functions require GROUP BY or will act on the whole dataset; add GROUP BY or remove aggregates'
291
- )
299
+ cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions, rowIndex: currentRowIndex })
292
300
  }
293
301
  }
294
302
 
@@ -315,14 +323,17 @@ async function* evaluateStreaming(select, dataSource, tables) {
315
323
  /**
316
324
  * Buffered evaluation for complex queries (with ORDER BY or GROUP BY)
317
325
  *
318
- * @param {SelectStatement} select
319
- * @param {AsyncDataSource} dataSource
320
- * @param {Record<string, AsyncDataSource>} tables
321
- * @param {boolean} hasAggregate
322
- * @param {boolean} useGrouping
326
+ * @param {Object} options
327
+ * @param {SelectStatement} options.select
328
+ * @param {AsyncDataSource} options.dataSource
329
+ * @param {Record<string, AsyncDataSource>} options.tables
330
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
331
+ * @param {boolean} options.hasAggregate
332
+ * @param {boolean} options.useGrouping
333
+ * @param {AbortSignal} [options.signal]
323
334
  * @yields {AsyncRow}
324
335
  */
325
- async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGrouping) {
336
+ async function* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal }) {
326
337
  // Build hints for data source optimization
327
338
  // Note: limit/offset not passed here since buffering needs all rows for sorting/grouping
328
339
  /** @type {QueryHints} */
@@ -334,7 +345,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
334
345
  // Step 1: Collect all rows from data source
335
346
  /** @type {AsyncRow[]} */
336
347
  const working = []
337
- for await (const row of dataSource.scan(hints)) {
348
+ for await (const row of dataSource.scan({ hints, signal })) {
338
349
  working.push(row)
339
350
  }
340
351
 
@@ -346,7 +357,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
346
357
  const row = working[i]
347
358
  const rowIndex = i + 1 // 1-based
348
359
  if (select.where) {
349
- const passes = await evaluateExpr({ node: select.where, row, tables, rowIndex })
360
+ const passes = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
350
361
 
351
362
  if (!passes) {
352
363
  continue
@@ -371,7 +382,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
371
382
  /** @type {string[]} */
372
383
  const keyParts = []
373
384
  for (const expr of select.groupBy) {
374
- const v = await evaluateExpr({ node: expr, row, tables })
385
+ const v = await evaluateExpr({ node: expr, row, tables, functions })
375
386
  keyParts.push(stringify(v))
376
387
  }
377
388
  const key = keyParts.join('|')
@@ -414,18 +425,9 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
414
425
  if (col.kind === 'derived') {
415
426
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
416
427
  columns.push(alias)
417
- if (group.length > 0) {
418
- cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0], tables })
419
- } else {
420
- delete cells[alias]
421
- }
422
- continue
423
- }
424
-
425
- if (col.kind === 'aggregate') {
426
- const alias = col.alias ?? defaultAggregateAlias(col)
427
- columns.push(alias)
428
- cells[alias] = () => evaluateAggregate({ col, rows: group, tables })
428
+ // Pass group to evaluateExpr so it can handle aggregate functions within expressions
429
+ // For empty groups, still provide an empty row context for aggregates to return appropriate values
430
+ cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, functions, rows: group })
429
431
  continue
430
432
  }
431
433
  }
@@ -433,7 +435,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
433
435
 
434
436
  // Apply HAVING filter before adding to projected results
435
437
  if (select.having) {
436
- if (!await evaluateHavingExpr(select.having, asyncRow, group, tables)) {
438
+ if (!await evaluateHavingExpr({ expr: select.having, row: asyncRow, group, tables, functions })) {
437
439
  continue
438
440
  }
439
441
  }
@@ -443,7 +445,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
443
445
  } else {
444
446
  // No grouping, simple projection
445
447
  // Sort before projection so ORDER BY can access columns not in SELECT
446
- const sorted = await sortRows(filtered, select.orderBy, tables)
448
+ const sorted = await sortRows({ rows: filtered, orderBy: select.orderBy, tables, functions })
447
449
 
448
450
  // OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
449
451
  // to avoid reading expensive cells for rows that won't be in the final result
@@ -467,7 +469,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
467
469
  } else if (col.kind === 'derived') {
468
470
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
469
471
  columns.push(alias)
470
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables })
472
+ cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions })
471
473
  }
472
474
  }
473
475
  projected.push({ columns, cells })
@@ -479,7 +481,7 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
479
481
 
480
482
  // Step 5: ORDER BY (final sort for grouped queries)
481
483
  if (useGrouping) {
482
- projected = await sortRows(projected, select.orderBy, tables)
484
+ projected = await sortRows({ rows: projected, orderBy: select.orderBy, tables, functions })
483
485
  }
484
486
 
485
487
  // Step 6: OFFSET and LIMIT