squirreling 0.7.9 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,6 +85,45 @@ const rows = await collect(executeSql({
85
85
 
86
86
  Because Squirreling uses lazy cell evaluation, the `AI_SCORE` function only executes for cells that are actually materialized. Combined with `LIMIT` or `WHERE`, you can efficiently query expensive operations.
87
87
 
88
+ ### Custom Data Sources
89
+
90
+ Squirreling can work with any data source that implements the `AsyncDataSource` interface.
91
+
92
+ ```typescript
93
+ interface AsyncDataSource {
94
+ scan(options: ScanOptions): ScanResults
95
+ }
96
+
97
+ interface ScanOptions {
98
+ columns?: string[]
99
+ where?: ExprNode
100
+ limit?: number
101
+ offset?: number
102
+ signal?: AbortSignal
103
+ }
104
+
105
+ interface ScanResults {
106
+ rows: AsyncIterable<AsyncRow> // async iterable of rows
107
+ appliedWhere: boolean // WHERE filter applied at scan time?
108
+ appliedLimitOffset: boolean // LIMIT and OFFSET applied at scan time?
109
+ }
110
+ ```
111
+
112
+ The `scan()` method returns a `ScanResults` object containing a row stream and flags indicating which query hints were applied by the data source. This allows optional push down optimizations like filtering, limiting, and offsetting at the data source level when possible. Set `appliedWhere` or `appliedLimitOffset` to `true` if the data source handled them, `false` if the engine should apply them.
113
+
114
+ ```typescript
115
+ const customSource: AsyncDataSource = {
116
+ scan({ columns, where, limit, offset, signal }) {
117
+ // Use hints to optimize your scan, or ignore them
118
+ return {
119
+ rows: fetchAllRows({ columns, signal }),
120
+ appliedWhere: false, // source returned all rows, engine will filter
121
+ appliedLimitOffset: false, // source returned all rows, engine will limit/skip
122
+ }
123
+ },
124
+ }
125
+ ```
126
+
88
127
  ## Supported SQL Syntax
89
128
 
90
129
  Squirreling mostly follows the SQL standard. The following features are supported:
@@ -95,6 +134,12 @@ Squirreling mostly follows the SQL standard. The following features are supporte
95
134
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `POSITIONAL JOIN`
96
135
  - `GROUP BY` and `HAVING` clauses
97
136
 
137
+ ### Quoting
138
+
139
+ - Single quotes for string literals: `'hello world'`
140
+ - Double quotes for identifiers with spaces or special characters: `"column name"`
141
+ - Escape quotes by doubling: `'can''t'` or `"col""name"`
142
+
98
143
  ### Functions
99
144
 
100
145
  - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `JSON_ARRAYAGG`
@@ -104,4 +149,5 @@ Squirreling mostly follows the SQL standard. The following features are supporte
104
149
  - Date: `CURRENT_DATE`, `CURRENT_TIME`, `CURRENT_TIMESTAMP`, `INTERVAL`
105
150
  - Json: `JSON_VALUE`, `JSON_QUERY`, `JSON_OBJECT`
106
151
  - Regex: `REGEXP_SUBSTR`, `REGEXP_REPLACE`
152
+ - Conditional: `COALESCE`, `NULLIF`
107
153
  - User-defined functions (UDFs)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.7.9",
4
- "description": "Squirreling SQL Engine",
3
+ "version": "0.8.0",
4
+ "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
7
7
  "keywords": [
@@ -37,11 +37,11 @@
37
37
  "test": "vitest run"
38
38
  },
39
39
  "devDependencies": {
40
- "@types/node": "25.0.9",
41
- "@vitest/coverage-v8": "4.0.17",
40
+ "@types/node": "25.2.2",
41
+ "@vitest/coverage-v8": "4.0.18",
42
42
  "eslint": "9.39.2",
43
- "eslint-plugin-jsdoc": "62.0.0",
43
+ "eslint-plugin-jsdoc": "62.5.4",
44
44
  "typescript": "5.9.3",
45
- "vitest": "4.0.17"
45
+ "vitest": "4.0.18"
46
46
  }
47
47
  }
@@ -1,24 +1,7 @@
1
1
  /**
2
- * @import { AsyncCell, AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, SqlPrimitive } from '../types.js'
2
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, ScanResults, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
- /**
6
- * Wraps an async generator of plain objects into an AsyncDataSource
7
- *
8
- * @param {AsyncGenerator<AsyncRow>} gen
9
- * @returns {AsyncDataSource}
10
- */
11
- export function generatorSource(gen) {
12
- return {
13
- async *scan({ signal }) {
14
- for await (const row of gen) {
15
- if (signal?.aborted) break
16
- yield row
17
- }
18
- },
19
- }
20
- }
21
-
22
5
  /**
23
6
  * Creates an async row accessor that wraps a plain JavaScript object
24
7
  *
@@ -42,10 +25,19 @@ function asyncRow(obj) {
42
25
  */
43
26
  export function memorySource(data) {
44
27
  return {
45
- async *scan({ signal }) {
46
- for (const item of data) {
47
- if (signal?.aborted) break
48
- yield asyncRow(item)
28
+ scan({ where, limit, offset, signal }) {
29
+ // Only apply offset and limit if no where clause
30
+ const start = !where ? offset ?? 0 : 0
31
+ const end = !where && limit !== undefined ? start + limit : data.length
32
+ return {
33
+ rows: (async function* () {
34
+ for (let i = start; i < end && i < data.length; i++) {
35
+ if (signal?.aborted) break
36
+ yield asyncRow(data[i])
37
+ }
38
+ })(),
39
+ appliedWhere: false,
40
+ appliedLimitOffset: !where,
49
41
  }
50
42
  },
51
43
  }
@@ -60,33 +52,46 @@ export function cachedDataSource(source) {
60
52
  /** @type {Map<string, Promise<SqlPrimitive>>} */
61
53
  const cache = new Map()
62
54
  return {
63
- /**
64
- * @param {ScanOptions} options
65
- * @yields {AsyncRow}
66
- */
67
- async *scan(options) {
68
- const { signal } = options
69
- let index = 0
70
- for await (const row of source.scan(options)) {
71
- if (signal?.aborted) break
72
- const rowIndex = index
73
- /** @type {AsyncCells} */
74
- const cells = {}
75
- for (const key of row.columns) {
76
- const cell = row.cells[key]
77
- // Wrap the cell to cache accesses
78
- cells[key] = () => {
79
- const cacheKey = `${rowIndex}:${key}`
80
- let value = cache.get(cacheKey)
81
- if (!value) {
82
- value = cell()
83
- cache.set(cacheKey, value)
55
+ scan(options) {
56
+ // Does re-run the scan, but cache avoids re-computing expensive async cells
57
+ // TODO: check cache first to avoid re-scanning when possible
58
+ const { rows, appliedWhere, appliedLimitOffset } = source.scan(options)
59
+
60
+ // Applied where clause changes which rows are returned so can't be cached
61
+ if (appliedWhere && options.where) {
62
+ return { rows, appliedWhere, appliedLimitOffset }
63
+ }
64
+
65
+ // Adjust index when source applied offset so cache keys match original rows
66
+ const indexOffset = appliedLimitOffset && options.offset ? options.offset : 0
67
+
68
+ return {
69
+ rows: (async function* () {
70
+ let index = 0
71
+ for await (const row of rows) {
72
+ if (options.signal?.aborted) break
73
+ const rowIndex = index + indexOffset
74
+ /** @type {AsyncCells} */
75
+ const cells = {}
76
+ for (const key of row.columns) {
77
+ const cell = row.cells[key]
78
+ // Wrap the cell to cache accesses
79
+ cells[key] = () => {
80
+ const cacheKey = `${rowIndex}:${key}`
81
+ let value = cache.get(cacheKey)
82
+ if (!value) {
83
+ value = cell()
84
+ cache.set(cacheKey, value)
85
+ }
86
+ return value
87
+ }
84
88
  }
85
- return value
89
+ yield { columns: row.columns, cells }
90
+ index++
86
91
  }
87
- }
88
- yield { columns: row.columns, cells }
89
- index++
92
+ })(),
93
+ appliedWhere,
94
+ appliedLimitOffset,
90
95
  }
91
96
  },
92
97
  }
@@ -0,0 +1,150 @@
1
+ import { evaluateExpr } from '../expression/evaluate.js'
2
+ import { defaultDerivedAlias, stringify } from './utils.js'
3
+ import { executePlan } from './execute.js'
4
+
5
+ /**
6
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, SelectColumn, UserDefinedFunction } from '../types.js'
7
+ * @import { ExecuteContext, HashAggregateNode, ScalarAggregateNode } from '../plan/types.js'
8
+ */
9
+
10
+ /**
11
+ * Projects aggregate columns from a group of rows
12
+ *
13
+ * @param {SelectColumn[]} selectColumns
14
+ * @param {AsyncRow[]} group
15
+ * @param {Record<string, AsyncDataSource>} tables
16
+ * @param {Record<string, UserDefinedFunction>} [functions]
17
+ * @param {AbortSignal} [signal]
18
+ * @returns {AsyncRow}
19
+ */
20
+ function projectAggregateColumns(selectColumns, group, tables, functions, signal) {
21
+ /** @type {string[]} */
22
+ const columns = []
23
+ /** @type {AsyncCells} */
24
+ const cells = {}
25
+
26
+ for (const col of selectColumns) {
27
+ if (col.kind === 'star') {
28
+ const firstRow = group[0]
29
+ if (firstRow) {
30
+ for (const key of firstRow.columns) {
31
+ columns.push(key)
32
+ cells[key] = firstRow.cells[key]
33
+ }
34
+ }
35
+ } else if (col.kind === 'derived') {
36
+ const alias = col.alias ?? defaultDerivedAlias(col.expr)
37
+ columns.push(alias)
38
+ cells[alias] = () => evaluateExpr({
39
+ node: col.expr,
40
+ row: group[0] ?? { columns: [], cells: {} },
41
+ tables,
42
+ functions,
43
+ rows: group,
44
+ signal,
45
+ })
46
+ }
47
+ }
48
+
49
+ return { columns, cells }
50
+ }
51
+
52
+ /**
53
+ * Executes a hash aggregate operation (GROUP BY)
54
+ *
55
+ * @param {HashAggregateNode} plan
56
+ * @param {ExecuteContext} context
57
+ * @yields {AsyncRow}
58
+ */
59
+ export async function* executeHashAggregate(plan, context) {
60
+ const { tables, functions, signal } = context
61
+
62
+ // Collect all rows
63
+ /** @type {AsyncRow[]} */
64
+ const allRows = []
65
+ for await (const row of executePlan(plan.child, context)) {
66
+ if (signal?.aborted) return
67
+ allRows.push(row)
68
+ }
69
+
70
+ // Group rows by GROUP BY keys
71
+ /** @type {Map<string, AsyncRow[]>} */
72
+ const groupMap = new Map()
73
+ /** @type {AsyncRow[][]} */
74
+ const groups = []
75
+
76
+ for (const row of allRows) {
77
+ /** @type {string[]} */
78
+ const keyParts = []
79
+ for (const expr of plan.groupBy) {
80
+ const v = await evaluateExpr({ node: expr, row, tables, functions, signal })
81
+ keyParts.push(stringify(v))
82
+ }
83
+ const key = keyParts.join('|')
84
+ let group = groupMap.get(key)
85
+ if (!group) {
86
+ group = []
87
+ groupMap.set(key, group)
88
+ groups.push(group)
89
+ }
90
+ group.push(row)
91
+ }
92
+
93
+ // Yield one row per group
94
+ for (const group of groups) {
95
+ const asyncRow = projectAggregateColumns(plan.columns, group, tables, functions, signal)
96
+
97
+ // Apply HAVING filter
98
+ if (plan.having) {
99
+ const context = { ...group[0], ...asyncRow }
100
+ const passes = await evaluateExpr({
101
+ node: plan.having,
102
+ row: context,
103
+ rows: group,
104
+ tables,
105
+ functions,
106
+ signal,
107
+ })
108
+ if (!passes) continue
109
+ }
110
+
111
+ yield asyncRow
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Executes a scalar aggregate operation (no GROUP BY, whole table aggregate)
117
+ *
118
+ * @param {ScalarAggregateNode} plan
119
+ * @param {ExecuteContext} context
120
+ * @yields {AsyncRow}
121
+ */
122
+ export async function* executeScalarAggregate(plan, context) {
123
+ const { tables, functions, signal } = context
124
+
125
+ // Collect all rows into single group
126
+ /** @type {AsyncRow[]} */
127
+ const group = []
128
+ for await (const row of executePlan(plan.child, context)) {
129
+ if (signal?.aborted) return
130
+ group.push(row)
131
+ }
132
+
133
+ const asyncRow = projectAggregateColumns(plan.columns, group, tables, functions, signal)
134
+
135
+ // Apply HAVING filter
136
+ if (plan.having) {
137
+ const context = { ...group[0], ...asyncRow }
138
+ const passes = await evaluateExpr({
139
+ node: plan.having,
140
+ row: context,
141
+ rows: group,
142
+ tables,
143
+ functions,
144
+ signal,
145
+ })
146
+ if (!passes) return
147
+ }
148
+
149
+ yield asyncRow
150
+ }
@@ -1,46 +1,7 @@
1
- import { isAggregateFunc } from '../validation.js'
2
-
3
1
  /**
4
2
  * @import { ExprNode, SelectStatement, SelectColumn } from '../types.js'
5
3
  */
6
4
 
7
- /**
8
- * Checks if an expression contains any aggregate function calls
9
- *
10
- * @param {ExprNode | undefined} expr
11
- * @returns {boolean}
12
- */
13
- export function containsAggregate(expr) {
14
- if (!expr) return false
15
- if (expr.type === 'function' && isAggregateFunc(expr.name.toUpperCase())) {
16
- return true
17
- }
18
- if (expr.type === 'binary') {
19
- return containsAggregate(expr.left) || containsAggregate(expr.right)
20
- }
21
- if (expr.type === 'unary') {
22
- return containsAggregate(expr.argument)
23
- }
24
- if (expr.type === 'cast') {
25
- return containsAggregate(expr.expr)
26
- }
27
- if (expr.type === 'case') {
28
- if (expr.caseExpr && containsAggregate(expr.caseExpr)) return true
29
- for (const when of expr.whenClauses) {
30
- if (containsAggregate(when.condition) || containsAggregate(when.result)) return true
31
- }
32
- if (containsAggregate(expr.elseResult)) return true
33
- }
34
- if (expr.type === 'in valuelist') {
35
- if (containsAggregate(expr.expr)) return true
36
- for (const val of expr.values) {
37
- if (containsAggregate(val)) return true
38
- }
39
- }
40
- // Note: Don't recurse into subqueries - they have their own aggregate scope
41
- return false
42
- }
43
-
44
5
  /**
45
6
  * Extracts column names needed from a SELECT statement.
46
7
  *