squirreling 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,8 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
19
19
  - Lets you move query execution closer to your users
20
20
  - Supports standard SQL queries
21
21
  - Async streaming for large datasets
22
+ - Native javascript Promises, AsyncGenerators, AbortSignals
23
+ - Async user-defined functions (UDFs)
22
24
  - Constant memory usage for simple queries with LIMIT
23
25
  - Robust error handling and validation designed for LLM tool use
24
26
  - In-memory data option for simple use cases
@@ -27,12 +29,12 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
27
29
 
28
30
  ## Usage
29
31
 
30
- Squirreling returns an async generator, allowing you to process rows one at a time without loading everything into memory.
32
+ Squirreling returns an AsyncGenerator of AsyncRows, allowing you to process rows one at a time without loading everything into memory. AsyncRows are made up of AsyncCells, allowing for late materialization of values.
31
33
 
32
34
  ```typescript
33
35
  import { executeSql } from 'squirreling'
34
36
 
35
- // In-memory table
37
+ // Input table (in-memory for this example)
36
38
  const users = [
37
39
  { id: 1, name: 'Alice', active: true },
38
40
  { id: 2, name: 'Bob', active: false },
@@ -40,35 +42,37 @@ const users = [
40
42
  // ...more rows
41
43
  ]
42
44
 
45
+ // Squirreling return types
43
46
  interface AsyncRow {
44
47
  columns: string[]
45
48
  cells: Record<string, AsyncCell>
46
49
  }
47
50
  type AsyncCell = () => Promise<SqlPrimitive>
48
51
 
49
- // Returns an async iterable of rows with async cells
52
+ // Returns an AsyncIterable of rows with async cell loading
50
53
  const asyncRows: AsyncIterable<AsyncRow> = executeSql({
51
54
  tables: { users },
52
- query: 'SELECT count(*) as cnt FROM users WHERE active = TRUE LIMIT 10',
55
+ query: 'SELECT * FROM users',
53
56
  })
54
57
 
55
58
  // Process rows as they arrive (streaming)
56
- for await (const { cnt } of asyncRows) {
57
- console.log('Count', await cnt())
59
+ for await (const { id, name } of asyncRows) {
60
+ console.log(`User id=${await id()}, name=${await name()}`)
58
61
  }
59
62
  ```
60
63
 
61
- There is an exported helper function `collect` to gather all rows into an array if needed:
64
+ Squirreling exports a helper function `collect` to gather all rows into an array:
62
65
 
63
66
  ```javascript
64
67
  import { collect, executeSql } from 'squirreling'
65
68
 
66
69
  // Collect all rows and cells into a materialized array
67
- const allUsers: Record<string, SqlPrimitive>[] = await collect(executeSql({
70
+ const rows: Record<string, SqlPrimitive>[] = await collect(executeSql({
68
71
  tables: { users },
69
- query: 'SELECT * FROM users',
72
+ query: 'SELECT active, count(*) as cnt FROM users GROUP BY active',
70
73
  }))
71
- console.log(allUsers)
74
+ console.log(`Collected rows:`, rows)
75
+ // Collected rows: [ { active: true, cnt: 2 }, { active: false, cnt: 1 } ]
72
76
  ```
73
77
 
74
78
  ## Supported SQL Features
@@ -77,9 +81,14 @@ console.log(allUsers)
77
81
  - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
78
82
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`
79
83
  - `GROUP BY` and `HAVING` clauses
80
- - Aggregate functions: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `JSON_ARRAYAGG`
81
- - String functions: `CONCAT`, `SUBSTRING`, `LENGTH`, `UPPER`, `LOWER`
82
- - Math functions: `ABS`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`, `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
83
- - Date functions: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `INTERVAL`
84
- - Json functions: `JSON_VALUE`, `JSON_QUERY`, `JSON_OBJECT`
85
- - Basic expressions and arithmetic operations
84
+
85
+ ### Functions
86
+
87
+ - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `JSON_ARRAYAGG`
88
+ - String: `CONCAT`, `SUBSTRING`, `REPLACE`, `LENGTH`, `UPPER`, `LOWER`, `TRIM`, `LEFT`, `RIGHT`, `INSTR`
89
+ - Math: `ABS`, `CEIL`, `FLOOR`, `ROUND`, `MOD`, `RAND`, `RANDOM`, `LN`, `LOG10`, `EXP`, `POWER`, `SQRT`
90
+ - Trig: `SIN`, `COS`, `TAN`, `COT`, `ASIN`, `ACOS`, `ATAN`, `ATAN2`, `DEGREES`, `RADIANS`, `PI`
91
+ - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `INTERVAL`
92
+ - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_OBJECT`
93
+ - Regex: `REGEXP_SUBSTR`, `REGEXP_REPLACE`
94
+ - User-defined functions (UDFs)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.6.1",
3
+ "version": "0.7.1",
4
4
  "description": "Squirreling SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -9,7 +9,7 @@ import { executeJoins } from './join.js'
9
9
  import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
10
10
 
11
11
  /**
12
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive } from '../types.js'
12
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive, UserDefinedFunction } from '../types.js'
13
13
  */
14
14
 
15
15
  /**
@@ -18,8 +18,8 @@ import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
18
18
  * @param {ExecuteSqlOptions} options - the execution options
19
19
  * @yields {AsyncRow} async generator yielding result rows
20
20
  */
21
- export async function* executeSql({ tables, query, signal }) {
22
- const select = typeof query === 'string' ? parseSql(query) : query
21
+ export async function* executeSql({ tables, query, functions, signal }) {
22
+ const select = typeof query === 'string' ? parseSql({ query, functions }) : query
23
23
 
24
24
  // Check for unsupported operations
25
25
  if (!select.from) {
@@ -40,47 +40,44 @@ export async function* executeSql({ tables, query, signal }) {
40
40
  }
41
41
  }
42
42
 
43
- yield* executeSelect({ select, tables: normalizedTables, signal })
43
+ yield* executeSelect({ select, tables: normalizedTables, functions, signal })
44
44
  }
45
45
 
46
- /**
47
- * @typedef {Object} ExecuteSelectOptions
48
- * @property {SelectStatement} select
49
- * @property {Record<string, AsyncDataSource>} tables
50
- * @property {AbortSignal} [signal]
51
- */
52
-
53
46
  /**
54
47
  * Executes a SELECT query against the provided tables
55
48
  *
56
- * @param {ExecuteSelectOptions} options
49
+ * @param {Object} options
50
+ * @param {SelectStatement} options.select
51
+ * @param {Record<string, AsyncDataSource>} options.tables
52
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
53
+ * @param {AbortSignal} [options.signal]
57
54
  * @yields {AsyncRow}
58
55
  */
59
- export async function* executeSelect({ select, tables, signal }) {
56
+ export async function* executeSelect({ select, tables, functions, signal }) {
60
57
  /** @type {AsyncDataSource} */
61
58
  let dataSource
62
59
  /** @type {string} */
63
- let fromTableName
60
+ let leftTable
64
61
 
65
62
  if (select.from.kind === 'table') {
66
63
  // Use alias for column prefixing, but look up the actual table name
67
- fromTableName = select.from.alias ?? select.from.table
64
+ leftTable = select.from.alias ?? select.from.table
68
65
  dataSource = tables[select.from.table]
69
66
  if (dataSource === undefined) {
70
67
  throw tableNotFoundError({ tableName: select.from.table })
71
68
  }
72
69
  } else {
73
70
  // Nested subquery - recursively resolve
74
- fromTableName = select.from.alias
75
- dataSource = generatorSource(executeSelect({ select: select.from.query, tables, signal }))
71
+ leftTable = select.from.alias
72
+ dataSource = generatorSource(executeSelect({ select: select.from.query, tables, functions, signal }))
76
73
  }
77
74
 
78
75
  // Execute JOINs if present
79
76
  if (select.joins.length) {
80
- dataSource = await executeJoins(dataSource, select.joins, fromTableName, tables)
77
+ dataSource = await executeJoins({ leftSource: dataSource, joins: select.joins, leftTable, tables, functions })
81
78
  }
82
79
 
83
- yield* evaluateSelectAst({ select, dataSource, tables, signal })
80
+ yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
84
81
  }
85
82
 
86
83
  /**
@@ -121,17 +118,20 @@ async function applyDistinct(rows, distinct) {
121
118
  }
122
119
  return result
123
120
  }
121
+
124
122
  /**
125
123
  * Applies ORDER BY sorting to rows using multi-pass lazy evaluation.
126
124
  * Secondary ORDER BY columns are only evaluated for rows that tie on
127
125
  * previous columns, reducing expensive cell evaluations.
128
126
  *
129
- * @param {AsyncRow[]} rows - the input rows
130
- * @param {OrderByItem[]} orderBy - the sort specifications
131
- * @param {Record<string, AsyncDataSource>} tables
127
+ * @param {Object} options
128
+ * @param {AsyncRow[]} options.rows - the input rows
129
+ * @param {OrderByItem[]} options.orderBy - the sort specifications
130
+ * @param {Record<string, AsyncDataSource>} options.tables
131
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
132
132
  * @returns {Promise<AsyncRow[]>} the sorted rows
133
133
  */
134
- async function sortRows(rows, orderBy, tables) {
134
+ async function sortRows({ rows, orderBy, tables, functions }) {
135
135
  if (!orderBy.length) return rows
136
136
 
137
137
  // Cache for evaluated values: evaluatedValues[rowIdx][colIdx]
@@ -162,6 +162,7 @@ async function sortRows(rows, orderBy, tables) {
162
162
  node: term.expr,
163
163
  row: rows[idx],
164
164
  tables,
165
+ functions,
165
166
  })
166
167
  }
167
168
  }
@@ -206,21 +207,18 @@ async function sortRows(rows, orderBy, tables) {
206
207
  return groups.flat().map(i => rows[i])
207
208
  }
208
209
 
209
- /**
210
- * @typedef {Object} EvaluateSelectAstOptions
211
- * @property {SelectStatement} select
212
- * @property {AsyncDataSource} dataSource
213
- * @property {Record<string, AsyncDataSource>} tables
214
- * @property {AbortSignal} [signal]
215
- */
216
-
217
210
  /**
218
211
  * Evaluates a select with a resolved FROM data source
219
212
  *
220
- * @param {EvaluateSelectAstOptions} options
213
+ * @param {Object} options
214
+ * @param {SelectStatement} options.select
215
+ * @param {AsyncDataSource} options.dataSource
216
+ * @param {Record<string, AsyncDataSource>} options.tables
217
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
218
+ * @param {AbortSignal} [options.signal]
221
219
  * @yields {AsyncRow}
222
220
  */
223
- async function* evaluateSelectAst({ select, dataSource, tables, signal }) {
221
+ async function* evaluateSelectAst({ select, dataSource, tables, functions, signal }) {
224
222
  // SQL priority: from, where, group by, having, select, order by, offset, limit
225
223
 
226
224
  const hasAggregate = select.columns.some(col => col.kind === 'derived' && containsAggregate(col.expr))
@@ -229,29 +227,26 @@ async function* evaluateSelectAst({ select, dataSource, tables, signal }) {
229
227
 
230
228
  if (needsBuffering) {
231
229
  // BUFFERING PATH: Collect all rows, process, then yield
232
- yield* evaluateBuffered({ select, dataSource, tables, hasAggregate, useGrouping, signal })
230
+ yield* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal })
233
231
  } else {
234
232
  // STREAMING PATH: Yield rows one by one
235
- yield* evaluateStreaming({ select, dataSource, tables, signal })
233
+ yield* evaluateStreaming({ select, dataSource, tables, functions, signal })
236
234
  }
237
235
  }
238
236
 
239
- /**
240
- * @typedef {Object} EvaluateStreamingOptions
241
- * @property {SelectStatement} select
242
- * @property {AsyncDataSource} dataSource
243
- * @property {Record<string, AsyncDataSource>} tables
244
- * @property {AbortSignal} [signal]
245
- */
246
-
247
237
  /**
248
238
  * Streaming evaluation for simple queries (no ORDER BY or GROUP BY)
249
239
  * Supports DISTINCT by tracking seen row keys without buffering full rows
250
240
  *
251
- * @param {EvaluateStreamingOptions} options
241
+ * @param {Object} options
242
+ * @param {SelectStatement} options.select
243
+ * @param {AsyncDataSource} options.dataSource
244
+ * @param {Record<string, AsyncDataSource>} options.tables
245
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
246
+ * @param {AbortSignal} [options.signal]
252
247
  * @yields {AsyncRow}
253
248
  */
254
- async function* evaluateStreaming({ select, dataSource, tables, signal }) {
249
+ async function* evaluateStreaming({ select, dataSource, tables, functions, signal }) {
255
250
  let rowsYielded = 0
256
251
  let rowsSkipped = 0
257
252
  let rowIndex = 0
@@ -276,7 +271,7 @@ async function* evaluateStreaming({ select, dataSource, tables, signal }) {
276
271
  rowIndex++
277
272
  // WHERE filter
278
273
  if (select.where) {
279
- const pass = await evaluateExpr({ node: select.where, row, tables, rowIndex })
274
+ const pass = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
280
275
  if (!pass) continue
281
276
  }
282
277
 
@@ -301,7 +296,7 @@ async function* evaluateStreaming({ select, dataSource, tables, signal }) {
301
296
  } else if (col.kind === 'derived') {
302
297
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
303
298
  columns.push(alias)
304
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, rowIndex: currentRowIndex })
299
+ cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions, rowIndex: currentRowIndex })
305
300
  }
306
301
  }
307
302
 
@@ -325,23 +320,20 @@ async function* evaluateStreaming({ select, dataSource, tables, signal }) {
325
320
  }
326
321
  }
327
322
 
328
- /**
329
- * @typedef {Object} EvaluateBufferedOptions
330
- * @property {SelectStatement} select
331
- * @property {AsyncDataSource} dataSource
332
- * @property {Record<string, AsyncDataSource>} tables
333
- * @property {boolean} hasAggregate
334
- * @property {boolean} useGrouping
335
- * @property {AbortSignal} [signal]
336
- */
337
-
338
323
  /**
339
324
  * Buffered evaluation for complex queries (with ORDER BY or GROUP BY)
340
325
  *
341
- * @param {EvaluateBufferedOptions} options
326
+ * @param {Object} options
327
+ * @param {SelectStatement} options.select
328
+ * @param {AsyncDataSource} options.dataSource
329
+ * @param {Record<string, AsyncDataSource>} options.tables
330
+ * @param {Record<string, UserDefinedFunction>} [options.functions]
331
+ * @param {boolean} options.hasAggregate
332
+ * @param {boolean} options.useGrouping
333
+ * @param {AbortSignal} [options.signal]
342
334
  * @yields {AsyncRow}
343
335
  */
344
- async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, useGrouping, signal }) {
336
+ async function* evaluateBuffered({ select, dataSource, tables, functions, hasAggregate, useGrouping, signal }) {
345
337
  // Build hints for data source optimization
346
338
  // Note: limit/offset not passed here since buffering needs all rows for sorting/grouping
347
339
  /** @type {QueryHints} */
@@ -365,7 +357,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
365
357
  const row = working[i]
366
358
  const rowIndex = i + 1 // 1-based
367
359
  if (select.where) {
368
- const passes = await evaluateExpr({ node: select.where, row, tables, rowIndex })
360
+ const passes = await evaluateExpr({ node: select.where, row, tables, functions, rowIndex })
369
361
 
370
362
  if (!passes) {
371
363
  continue
@@ -390,7 +382,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
390
382
  /** @type {string[]} */
391
383
  const keyParts = []
392
384
  for (const expr of select.groupBy) {
393
- const v = await evaluateExpr({ node: expr, row, tables })
385
+ const v = await evaluateExpr({ node: expr, row, tables, functions })
394
386
  keyParts.push(stringify(v))
395
387
  }
396
388
  const key = keyParts.join('|')
@@ -435,7 +427,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
435
427
  columns.push(alias)
436
428
  // Pass group to evaluateExpr so it can handle aggregate functions within expressions
437
429
  // For empty groups, still provide an empty row context for aggregates to return appropriate values
438
- cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, rows: group })
430
+ cells[alias] = () => evaluateExpr({ node: col.expr, row: group[0] ?? { columns: [], cells: {} }, tables, functions, rows: group })
439
431
  continue
440
432
  }
441
433
  }
@@ -443,7 +435,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
443
435
 
444
436
  // Apply HAVING filter before adding to projected results
445
437
  if (select.having) {
446
- if (!await evaluateHavingExpr(select.having, asyncRow, group, tables)) {
438
+ if (!await evaluateHavingExpr({ expr: select.having, row: asyncRow, group, tables, functions })) {
447
439
  continue
448
440
  }
449
441
  }
@@ -453,7 +445,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
453
445
  } else {
454
446
  // No grouping, simple projection
455
447
  // Sort before projection so ORDER BY can access columns not in SELECT
456
- const sorted = await sortRows(filtered, select.orderBy, tables)
448
+ const sorted = await sortRows({ rows: filtered, orderBy: select.orderBy, tables, functions })
457
449
 
458
450
  // OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
459
451
  // to avoid reading expensive cells for rows that won't be in the final result
@@ -477,7 +469,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
477
469
  } else if (col.kind === 'derived') {
478
470
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
479
471
  columns.push(alias)
480
- cells[alias] = () => evaluateExpr({ node: col.expr, row, tables })
472
+ cells[alias] = () => evaluateExpr({ node: col.expr, row, tables, functions })
481
473
  }
482
474
  }
483
475
  projected.push({ columns, cells })
@@ -489,7 +481,7 @@ async function* evaluateBuffered({ select, dataSource, tables, hasAggregate, use
489
481
 
490
482
  // Step 5: ORDER BY (final sort for grouped queries)
491
483
  if (useGrouping) {
492
- projected = await sortRows(projected, select.orderBy, tables)
484
+ projected = await sortRows({ rows: projected, orderBy: select.orderBy, tables, functions })
493
485
  }
494
486
 
495
487
  // Step 6: OFFSET and LIMIT