squirreling 0.7.10 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,6 +85,45 @@ const rows = await collect(executeSql({
85
85
 
86
86
  Because Squirreling uses lazy cell evaluation, the `AI_SCORE` function only executes for cells that are actually materialized. Combined with `LIMIT` or `WHERE`, you can efficiently query expensive operations.
87
87
 
88
+ ### Custom Data Sources
89
+
90
+ Squirreling can work with any data source that implements the `AsyncDataSource` interface.
91
+
92
+ ```typescript
93
+ interface AsyncDataSource {
94
+ scan(options: ScanOptions): ScanResults
95
+ }
96
+
97
+ interface ScanOptions {
98
+ columns?: string[]
99
+ where?: ExprNode
100
+ limit?: number
101
+ offset?: number
102
+ signal?: AbortSignal
103
+ }
104
+
105
+ interface ScanResults {
106
+ rows: AsyncIterable<AsyncRow> // async iterable of rows
107
+ appliedWhere: boolean // WHERE filter applied at scan time?
108
+ appliedLimitOffset: boolean // LIMIT and OFFSET applied at scan time?
109
+ }
110
+ ```
111
+
112
+ The `scan()` method returns a `ScanResults` object containing a row stream and flags indicating which query hints were applied by the data source. This allows optional push down optimizations like filtering, limiting, and offsetting at the data source level when possible. Set `appliedWhere` or `appliedLimitOffset` to `true` if the data source handled them, `false` if the engine should apply them.
113
+
114
+ ```typescript
115
+ const customSource: AsyncDataSource = {
116
+ scan({ columns, where, limit, offset, signal }) {
117
+ // Use hints to optimize your scan, or ignore them
118
+ return {
119
+ rows: fetchAllRows({ columns, signal }),
120
+ appliedWhere: false, // source returned all rows, engine will filter
121
+ appliedLimitOffset: false, // source returned all rows, engine will limit/skip
122
+ }
123
+ },
124
+ }
125
+ ```
126
+
88
127
  ## Supported SQL Syntax
89
128
 
90
129
  Squirreling mostly follows the SQL standard. The following features are supported:
@@ -95,6 +134,12 @@ Squirreling mostly follows the SQL standard. The following features are supporte
95
134
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `POSITIONAL JOIN`
96
135
  - `GROUP BY` and `HAVING` clauses
97
136
 
137
+ ### Quoting
138
+
139
+ - Single quotes for string literals: `'hello world'`
140
+ - Double quotes for identifiers with spaces or special characters: `"column name"`
141
+ - Escape quotes by doubling: `'can''t'` or `"col""name"`
142
+
98
143
  ### Functions
99
144
 
100
145
  - Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `JSON_ARRAYAGG`
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.7.10",
4
- "description": "Squirreling SQL Engine",
3
+ "version": "0.9.0",
4
+ "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
7
7
  "keywords": [
@@ -37,10 +37,10 @@
37
37
  "test": "vitest run"
38
38
  },
39
39
  "devDependencies": {
40
- "@types/node": "25.2.0",
40
+ "@types/node": "25.2.3",
41
41
  "@vitest/coverage-v8": "4.0.18",
42
42
  "eslint": "9.39.2",
43
- "eslint-plugin-jsdoc": "62.5.0",
43
+ "eslint-plugin-jsdoc": "62.5.5",
44
44
  "typescript": "5.9.3",
45
45
  "vitest": "4.0.18"
46
46
  }
@@ -1,24 +1,7 @@
1
1
  /**
2
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, SqlPrimitive } from '../types.js'
2
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, ScanResults, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
- /**
6
- * Wraps an async generator of plain objects into an AsyncDataSource
7
- *
8
- * @param {AsyncGenerator<AsyncRow>} gen
9
- * @returns {AsyncDataSource}
10
- */
11
- export function generatorSource(gen) {
12
- return {
13
- async *scan({ signal }) {
14
- for await (const row of gen) {
15
- if (signal?.aborted) break
16
- yield row
17
- }
18
- },
19
- }
20
- }
21
-
22
5
  /**
23
6
  * Creates an async row accessor that wraps a plain JavaScript object
24
7
  *
@@ -42,10 +25,19 @@ function asyncRow(obj) {
42
25
  */
43
26
  export function memorySource(data) {
44
27
  return {
45
- async *scan({ signal }) {
46
- for (const item of data) {
47
- if (signal?.aborted) break
48
- yield asyncRow(item)
28
+ scan({ where, limit, offset, signal }) {
29
+ // Only apply offset and limit if no where clause
30
+ const start = !where ? offset ?? 0 : 0
31
+ const end = !where && limit !== undefined ? start + limit : data.length
32
+ return {
33
+ rows: (async function* () {
34
+ for (let i = start; i < end && i < data.length; i++) {
35
+ if (signal?.aborted) break
36
+ yield asyncRow(data[i])
37
+ }
38
+ })(),
39
+ appliedWhere: false,
40
+ appliedLimitOffset: !where,
49
41
  }
50
42
  },
51
43
  }
@@ -60,33 +52,46 @@ export function cachedDataSource(source) {
60
52
  /** @type {Map<string, Promise<SqlPrimitive>>} */
61
53
  const cache = new Map()
62
54
  return {
63
- /**
64
- * @param {ScanOptions} options
65
- * @yields {AsyncRow}
66
- */
67
- async *scan(options) {
68
- const { signal } = options
69
- let index = 0
70
- for await (const row of source.scan(options)) {
71
- if (signal?.aborted) break
72
- const rowIndex = index
73
- /** @type {AsyncCells} */
74
- const cells = {}
75
- for (const key of row.columns) {
76
- const cell = row.cells[key]
77
- // Wrap the cell to cache accesses
78
- cells[key] = () => {
79
- const cacheKey = `${rowIndex}:${key}`
80
- let value = cache.get(cacheKey)
81
- if (!value) {
82
- value = cell()
83
- cache.set(cacheKey, value)
55
+ scan(options) {
56
+ // Does re-run the scan, but cache avoids re-computing expensive async cells
57
+ // TODO: check cache first to avoid re-scanning when possible
58
+ const { rows, appliedWhere, appliedLimitOffset } = source.scan(options)
59
+
60
+ // Applied where clause changes which rows are returned so can't be cached
61
+ if (appliedWhere && options.where) {
62
+ return { rows, appliedWhere, appliedLimitOffset }
63
+ }
64
+
65
+ // Adjust index when source applied offset so cache keys match original rows
66
+ const indexOffset = appliedLimitOffset && options.offset ? options.offset : 0
67
+
68
+ return {
69
+ rows: (async function* () {
70
+ let index = 0
71
+ for await (const row of rows) {
72
+ if (options.signal?.aborted) break
73
+ const rowIndex = index + indexOffset
74
+ /** @type {AsyncCells} */
75
+ const cells = {}
76
+ for (const key of row.columns) {
77
+ const cell = row.cells[key]
78
+ // Wrap the cell to cache accesses
79
+ cells[key] = () => {
80
+ const cacheKey = `${rowIndex}:${key}`
81
+ let value = cache.get(cacheKey)
82
+ if (!value) {
83
+ value = cell()
84
+ cache.set(cacheKey, value)
85
+ }
86
+ return value
87
+ }
84
88
  }
85
- return value
89
+ yield { columns: row.columns, cells }
90
+ index++
86
91
  }
87
- }
88
- yield { columns: row.columns, cells }
89
- index++
92
+ })(),
93
+ appliedWhere,
94
+ appliedLimitOffset,
90
95
  }
91
96
  },
92
97
  }
@@ -0,0 +1,138 @@
1
+ import { evaluateExpr } from '../expression/evaluate.js'
2
+ import { defaultDerivedAlias, stringify } from './utils.js'
3
+ import { executePlan } from './execute.js'
4
+
5
+ /**
6
+ * @import { AsyncCells, AsyncRow, ExecuteContext, SelectColumn } from '../types.js'
7
+ * @import { HashAggregateNode, ScalarAggregateNode } from '../plan/types.js'
8
+ */
9
+
10
+ /**
11
+ * Projects aggregate columns from a group of rows
12
+ *
13
+ * @param {SelectColumn[]} selectColumns
14
+ * @param {AsyncRow[]} group
15
+ * @param {ExecuteContext} context
16
+ * @returns {AsyncRow}
17
+ */
18
+ function projectAggregateColumns(selectColumns, group, context) {
19
+ /** @type {string[]} */
20
+ const columns = []
21
+ /** @type {AsyncCells} */
22
+ const cells = {}
23
+
24
+ for (const col of selectColumns) {
25
+ if (col.kind === 'star') {
26
+ const firstRow = group[0]
27
+ if (firstRow) {
28
+ for (const key of firstRow.columns) {
29
+ columns.push(key)
30
+ cells[key] = firstRow.cells[key]
31
+ }
32
+ }
33
+ } else if (col.kind === 'derived') {
34
+ const alias = col.alias ?? defaultDerivedAlias(col.expr)
35
+ columns.push(alias)
36
+ cells[alias] = () => evaluateExpr({
37
+ node: col.expr,
38
+ row: group[0] ?? { columns: [], cells: {} },
39
+ rows: group,
40
+ context,
41
+ })
42
+ }
43
+ }
44
+
45
+ return { columns, cells }
46
+ }
47
+
48
+ /**
49
+ * Executes a hash aggregate operation (GROUP BY)
50
+ *
51
+ * @param {HashAggregateNode} plan
52
+ * @param {ExecuteContext} context
53
+ * @yields {AsyncRow}
54
+ */
55
+ export async function* executeHashAggregate(plan, context) {
56
+ // Collect all rows
57
+ /** @type {AsyncRow[]} */
58
+ const allRows = []
59
+ for await (const row of executePlan({ plan: plan.child, context })) {
60
+ if (context.signal?.aborted) return
61
+ allRows.push(row)
62
+ }
63
+
64
+ // Group rows by GROUP BY keys
65
+ /** @type {Map<string, AsyncRow[]>} */
66
+ const groupMap = new Map()
67
+ /** @type {AsyncRow[][]} */
68
+ const groups = []
69
+
70
+ for (const row of allRows) {
71
+ /** @type {string[]} */
72
+ const keyParts = []
73
+ for (const expr of plan.groupBy) {
74
+ const v = await evaluateExpr({ node: expr, row, context })
75
+ keyParts.push(stringify(v))
76
+ }
77
+ const key = keyParts.join('|')
78
+ let group = groupMap.get(key)
79
+ if (!group) {
80
+ group = []
81
+ groupMap.set(key, group)
82
+ groups.push(group)
83
+ }
84
+ group.push(row)
85
+ }
86
+
87
+ // Yield one row per group
88
+ for (const group of groups) {
89
+ const asyncRow = projectAggregateColumns(plan.columns, group, context)
90
+
91
+ // Apply HAVING filter
92
+ if (plan.having) {
93
+ const havingRow = { ...group[0], ...asyncRow }
94
+ const passes = await evaluateExpr({
95
+ node: plan.having,
96
+ row: havingRow,
97
+ rows: group,
98
+ context,
99
+ })
100
+ if (!passes) continue
101
+ }
102
+
103
+ yield asyncRow
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Executes a scalar aggregate operation (no GROUP BY, whole table aggregate)
109
+ *
110
+ * @param {ScalarAggregateNode} plan
111
+ * @param {ExecuteContext} context
112
+ * @yields {AsyncRow}
113
+ */
114
+ export async function* executeScalarAggregate(plan, context) {
115
+ // Collect all rows into single group
116
+ /** @type {AsyncRow[]} */
117
+ const group = []
118
+ for await (const row of executePlan({ plan: plan.child, context })) {
119
+ if (context.signal?.aborted) return
120
+ group.push(row)
121
+ }
122
+
123
+ const asyncRow = projectAggregateColumns(plan.columns, group, context)
124
+
125
+ // Apply HAVING filter
126
+ if (plan.having) {
127
+ const havingRow = { ...group[0], ...asyncRow }
128
+ const passes = await evaluateExpr({
129
+ node: plan.having,
130
+ row: havingRow,
131
+ rows: group,
132
+ context,
133
+ })
134
+ if (!passes) return
135
+ }
136
+
137
+ yield asyncRow
138
+ }