squirreling 0.7.10 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/package.json +4 -4
- package/src/backend/dataSource.js +52 -47
- package/src/execute/aggregates.js +150 -0
- package/src/execute/columns.js +0 -39
- package/src/execute/execute.js +158 -415
- package/src/execute/join.js +179 -333
- package/src/execute/sort.js +99 -0
- package/src/execute/utils.js +18 -49
- package/src/executionErrors.js +10 -10
- package/src/expression/binary.js +51 -0
- package/src/{execute → expression}/date.js +18 -18
- package/src/{execute/expression.js → expression/evaluate.js} +56 -64
- package/src/{execute → expression}/math.js +46 -81
- package/src/{execute → expression}/regexp.js +7 -7
- package/src/{execute → expression}/strings.js +33 -45
- package/src/index.d.ts +2 -1
- package/src/parse/expression.js +42 -50
- package/src/parse/joins.js +7 -2
- package/src/parse/parse.js +14 -3
- package/src/parse/state.js +2 -1
- package/src/parse/types.d.ts +30 -0
- package/src/plan/plan.js +234 -0
- package/src/plan/types.d.ts +101 -0
- package/src/types.d.ts +19 -39
- package/src/validation.js +64 -1
- package/src/validationErrors.js +7 -7
- package/src/execute/having.js +0 -202
- package/src/execute/tableSource.js +0 -63
package/README.md
CHANGED
|
@@ -85,6 +85,45 @@ const rows = await collect(executeSql({
|
|
|
85
85
|
|
|
86
86
|
Because Squirreling uses lazy cell evaluation, the `AI_SCORE` function only executes for cells that are actually materialized. Combined with `LIMIT` or `WHERE`, you can efficiently query expensive operations.
|
|
87
87
|
|
|
88
|
+
### Custom Data Sources
|
|
89
|
+
|
|
90
|
+
Squirreling can work with any data source that implements the `AsyncDataSource` interface.
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
interface AsyncDataSource {
|
|
94
|
+
scan(options: ScanOptions): ScanResults
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
interface ScanOptions {
|
|
98
|
+
columns?: string[]
|
|
99
|
+
where?: ExprNode
|
|
100
|
+
limit?: number
|
|
101
|
+
offset?: number
|
|
102
|
+
signal?: AbortSignal
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
interface ScanResults {
|
|
106
|
+
rows: AsyncIterable<AsyncRow> // async iterable of rows
|
|
107
|
+
appliedWhere: boolean // WHERE filter applied at scan time?
|
|
108
|
+
appliedLimitOffset: boolean // LIMIT and OFFSET applied at scan time?
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The `scan()` method returns a `ScanResults` object containing a row stream and flags indicating which query hints were applied by the data source. This allows optional push down optimizations like filtering, limiting, and offsetting at the data source level when possible. Set `appliedWhere` or `appliedLimitOffset` to `true` if the data source handled them, `false` if the engine should apply them.
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
const customSource: AsyncDataSource = {
|
|
116
|
+
scan({ columns, where, limit, offset, signal }) {
|
|
117
|
+
// Use hints to optimize your scan, or ignore them
|
|
118
|
+
return {
|
|
119
|
+
rows: fetchAllRows({ columns, signal }),
|
|
120
|
+
appliedWhere: false, // source returned all rows, engine will filter
|
|
121
|
+
appliedLimitOffset: false, // source returned all rows, engine will limit/skip
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
}
|
|
125
|
+
```
|
|
126
|
+
|
|
88
127
|
## Supported SQL Syntax
|
|
89
128
|
|
|
90
129
|
Squirreling mostly follows the SQL standard. The following features are supported:
|
|
@@ -95,6 +134,12 @@ Squirreling mostly follows the SQL standard. The following features are supporte
|
|
|
95
134
|
- `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `POSITIONAL JOIN`
|
|
96
135
|
- `GROUP BY` and `HAVING` clauses
|
|
97
136
|
|
|
137
|
+
### Quoting
|
|
138
|
+
|
|
139
|
+
- Single quotes for string literals: `'hello world'`
|
|
140
|
+
- Double quotes for identifiers with spaces or special characters: `"column name"`
|
|
141
|
+
- Escape quotes by doubling: `'can''t'` or `"col""name"`
|
|
142
|
+
|
|
98
143
|
### Functions
|
|
99
144
|
|
|
100
145
|
- Aggregate: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `JSON_ARRAYAGG`
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "squirreling",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Squirreling SQL Engine",
|
|
3
|
+
"version": "0.8.0",
|
|
4
|
+
"description": "Squirreling Async SQL Engine",
|
|
5
5
|
"author": "Hyperparam",
|
|
6
6
|
"homepage": "https://hyperparam.app",
|
|
7
7
|
"keywords": [
|
|
@@ -37,10 +37,10 @@
|
|
|
37
37
|
"test": "vitest run"
|
|
38
38
|
},
|
|
39
39
|
"devDependencies": {
|
|
40
|
-
"@types/node": "25.2.
|
|
40
|
+
"@types/node": "25.2.2",
|
|
41
41
|
"@vitest/coverage-v8": "4.0.18",
|
|
42
42
|
"eslint": "9.39.2",
|
|
43
|
-
"eslint-plugin-jsdoc": "62.5.
|
|
43
|
+
"eslint-plugin-jsdoc": "62.5.4",
|
|
44
44
|
"typescript": "5.9.3",
|
|
45
45
|
"vitest": "4.0.18"
|
|
46
46
|
}
|
|
@@ -1,24 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @import { AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, SqlPrimitive } from '../types.js'
|
|
2
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, ScanOptions, ScanResults, SqlPrimitive } from '../types.js'
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
/**
|
|
6
|
-
* Wraps an async generator of plain objects into an AsyncDataSource
|
|
7
|
-
*
|
|
8
|
-
* @param {AsyncGenerator<AsyncRow>} gen
|
|
9
|
-
* @returns {AsyncDataSource}
|
|
10
|
-
*/
|
|
11
|
-
export function generatorSource(gen) {
|
|
12
|
-
return {
|
|
13
|
-
async *scan({ signal }) {
|
|
14
|
-
for await (const row of gen) {
|
|
15
|
-
if (signal?.aborted) break
|
|
16
|
-
yield row
|
|
17
|
-
}
|
|
18
|
-
},
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
|
|
22
5
|
/**
|
|
23
6
|
* Creates an async row accessor that wraps a plain JavaScript object
|
|
24
7
|
*
|
|
@@ -42,10 +25,19 @@ function asyncRow(obj) {
|
|
|
42
25
|
*/
|
|
43
26
|
export function memorySource(data) {
|
|
44
27
|
return {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
28
|
+
scan({ where, limit, offset, signal }) {
|
|
29
|
+
// Only apply offset and limit if no where clause
|
|
30
|
+
const start = !where ? offset ?? 0 : 0
|
|
31
|
+
const end = !where && limit !== undefined ? start + limit : data.length
|
|
32
|
+
return {
|
|
33
|
+
rows: (async function* () {
|
|
34
|
+
for (let i = start; i < end && i < data.length; i++) {
|
|
35
|
+
if (signal?.aborted) break
|
|
36
|
+
yield asyncRow(data[i])
|
|
37
|
+
}
|
|
38
|
+
})(),
|
|
39
|
+
appliedWhere: false,
|
|
40
|
+
appliedLimitOffset: !where,
|
|
49
41
|
}
|
|
50
42
|
},
|
|
51
43
|
}
|
|
@@ -60,33 +52,46 @@ export function cachedDataSource(source) {
|
|
|
60
52
|
/** @type {Map<string, Promise<SqlPrimitive>>} */
|
|
61
53
|
const cache = new Map()
|
|
62
54
|
return {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
55
|
+
scan(options) {
|
|
56
|
+
// Does re-run the scan, but cache avoids re-computing expensive async cells
|
|
57
|
+
// TODO: check cache first to avoid re-scanning when possible
|
|
58
|
+
const { rows, appliedWhere, appliedLimitOffset } = source.scan(options)
|
|
59
|
+
|
|
60
|
+
// Applied where clause changes which rows are returned so can't be cached
|
|
61
|
+
if (appliedWhere && options.where) {
|
|
62
|
+
return { rows, appliedWhere, appliedLimitOffset }
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Adjust index when source applied offset so cache keys match original rows
|
|
66
|
+
const indexOffset = appliedLimitOffset && options.offset ? options.offset : 0
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
rows: (async function* () {
|
|
70
|
+
let index = 0
|
|
71
|
+
for await (const row of rows) {
|
|
72
|
+
if (options.signal?.aborted) break
|
|
73
|
+
const rowIndex = index + indexOffset
|
|
74
|
+
/** @type {AsyncCells} */
|
|
75
|
+
const cells = {}
|
|
76
|
+
for (const key of row.columns) {
|
|
77
|
+
const cell = row.cells[key]
|
|
78
|
+
// Wrap the cell to cache accesses
|
|
79
|
+
cells[key] = () => {
|
|
80
|
+
const cacheKey = `${rowIndex}:${key}`
|
|
81
|
+
let value = cache.get(cacheKey)
|
|
82
|
+
if (!value) {
|
|
83
|
+
value = cell()
|
|
84
|
+
cache.set(cacheKey, value)
|
|
85
|
+
}
|
|
86
|
+
return value
|
|
87
|
+
}
|
|
84
88
|
}
|
|
85
|
-
|
|
89
|
+
yield { columns: row.columns, cells }
|
|
90
|
+
index++
|
|
86
91
|
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
})(),
|
|
93
|
+
appliedWhere,
|
|
94
|
+
appliedLimitOffset,
|
|
90
95
|
}
|
|
91
96
|
},
|
|
92
97
|
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { evaluateExpr } from '../expression/evaluate.js'
|
|
2
|
+
import { defaultDerivedAlias, stringify } from './utils.js'
|
|
3
|
+
import { executePlan } from './execute.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @import { AsyncCells, AsyncDataSource, AsyncRow, SelectColumn, UserDefinedFunction } from '../types.js'
|
|
7
|
+
* @import { ExecuteContext, HashAggregateNode, ScalarAggregateNode } from '../plan/types.js'
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Projects aggregate columns from a group of rows
|
|
12
|
+
*
|
|
13
|
+
* @param {SelectColumn[]} selectColumns
|
|
14
|
+
* @param {AsyncRow[]} group
|
|
15
|
+
* @param {Record<string, AsyncDataSource>} tables
|
|
16
|
+
* @param {Record<string, UserDefinedFunction>} [functions]
|
|
17
|
+
* @param {AbortSignal} [signal]
|
|
18
|
+
* @returns {AsyncRow}
|
|
19
|
+
*/
|
|
20
|
+
function projectAggregateColumns(selectColumns, group, tables, functions, signal) {
|
|
21
|
+
/** @type {string[]} */
|
|
22
|
+
const columns = []
|
|
23
|
+
/** @type {AsyncCells} */
|
|
24
|
+
const cells = {}
|
|
25
|
+
|
|
26
|
+
for (const col of selectColumns) {
|
|
27
|
+
if (col.kind === 'star') {
|
|
28
|
+
const firstRow = group[0]
|
|
29
|
+
if (firstRow) {
|
|
30
|
+
for (const key of firstRow.columns) {
|
|
31
|
+
columns.push(key)
|
|
32
|
+
cells[key] = firstRow.cells[key]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
} else if (col.kind === 'derived') {
|
|
36
|
+
const alias = col.alias ?? defaultDerivedAlias(col.expr)
|
|
37
|
+
columns.push(alias)
|
|
38
|
+
cells[alias] = () => evaluateExpr({
|
|
39
|
+
node: col.expr,
|
|
40
|
+
row: group[0] ?? { columns: [], cells: {} },
|
|
41
|
+
tables,
|
|
42
|
+
functions,
|
|
43
|
+
rows: group,
|
|
44
|
+
signal,
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return { columns, cells }
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Executes a hash aggregate operation (GROUP BY)
|
|
54
|
+
*
|
|
55
|
+
* @param {HashAggregateNode} plan
|
|
56
|
+
* @param {ExecuteContext} context
|
|
57
|
+
* @yields {AsyncRow}
|
|
58
|
+
*/
|
|
59
|
+
export async function* executeHashAggregate(plan, context) {
|
|
60
|
+
const { tables, functions, signal } = context
|
|
61
|
+
|
|
62
|
+
// Collect all rows
|
|
63
|
+
/** @type {AsyncRow[]} */
|
|
64
|
+
const allRows = []
|
|
65
|
+
for await (const row of executePlan(plan.child, context)) {
|
|
66
|
+
if (signal?.aborted) return
|
|
67
|
+
allRows.push(row)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Group rows by GROUP BY keys
|
|
71
|
+
/** @type {Map<string, AsyncRow[]>} */
|
|
72
|
+
const groupMap = new Map()
|
|
73
|
+
/** @type {AsyncRow[][]} */
|
|
74
|
+
const groups = []
|
|
75
|
+
|
|
76
|
+
for (const row of allRows) {
|
|
77
|
+
/** @type {string[]} */
|
|
78
|
+
const keyParts = []
|
|
79
|
+
for (const expr of plan.groupBy) {
|
|
80
|
+
const v = await evaluateExpr({ node: expr, row, tables, functions, signal })
|
|
81
|
+
keyParts.push(stringify(v))
|
|
82
|
+
}
|
|
83
|
+
const key = keyParts.join('|')
|
|
84
|
+
let group = groupMap.get(key)
|
|
85
|
+
if (!group) {
|
|
86
|
+
group = []
|
|
87
|
+
groupMap.set(key, group)
|
|
88
|
+
groups.push(group)
|
|
89
|
+
}
|
|
90
|
+
group.push(row)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Yield one row per group
|
|
94
|
+
for (const group of groups) {
|
|
95
|
+
const asyncRow = projectAggregateColumns(plan.columns, group, tables, functions, signal)
|
|
96
|
+
|
|
97
|
+
// Apply HAVING filter
|
|
98
|
+
if (plan.having) {
|
|
99
|
+
const context = { ...group[0], ...asyncRow }
|
|
100
|
+
const passes = await evaluateExpr({
|
|
101
|
+
node: plan.having,
|
|
102
|
+
row: context,
|
|
103
|
+
rows: group,
|
|
104
|
+
tables,
|
|
105
|
+
functions,
|
|
106
|
+
signal,
|
|
107
|
+
})
|
|
108
|
+
if (!passes) continue
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
yield asyncRow
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Executes a scalar aggregate operation (no GROUP BY, whole table aggregate)
|
|
117
|
+
*
|
|
118
|
+
* @param {ScalarAggregateNode} plan
|
|
119
|
+
* @param {ExecuteContext} context
|
|
120
|
+
* @yields {AsyncRow}
|
|
121
|
+
*/
|
|
122
|
+
export async function* executeScalarAggregate(plan, context) {
|
|
123
|
+
const { tables, functions, signal } = context
|
|
124
|
+
|
|
125
|
+
// Collect all rows into single group
|
|
126
|
+
/** @type {AsyncRow[]} */
|
|
127
|
+
const group = []
|
|
128
|
+
for await (const row of executePlan(plan.child, context)) {
|
|
129
|
+
if (signal?.aborted) return
|
|
130
|
+
group.push(row)
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const asyncRow = projectAggregateColumns(plan.columns, group, tables, functions, signal)
|
|
134
|
+
|
|
135
|
+
// Apply HAVING filter
|
|
136
|
+
if (plan.having) {
|
|
137
|
+
const context = { ...group[0], ...asyncRow }
|
|
138
|
+
const passes = await evaluateExpr({
|
|
139
|
+
node: plan.having,
|
|
140
|
+
row: context,
|
|
141
|
+
rows: group,
|
|
142
|
+
tables,
|
|
143
|
+
functions,
|
|
144
|
+
signal,
|
|
145
|
+
})
|
|
146
|
+
if (!passes) return
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
yield asyncRow
|
|
150
|
+
}
|
package/src/execute/columns.js
CHANGED
|
@@ -1,46 +1,7 @@
|
|
|
1
|
-
import { isAggregateFunc } from '../validation.js'
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
2
|
* @import { ExprNode, SelectStatement, SelectColumn } from '../types.js'
|
|
5
3
|
*/
|
|
6
4
|
|
|
7
|
-
/**
|
|
8
|
-
* Checks if an expression contains any aggregate function calls
|
|
9
|
-
*
|
|
10
|
-
* @param {ExprNode | undefined} expr
|
|
11
|
-
* @returns {boolean}
|
|
12
|
-
*/
|
|
13
|
-
export function containsAggregate(expr) {
|
|
14
|
-
if (!expr) return false
|
|
15
|
-
if (expr.type === 'function' && isAggregateFunc(expr.name.toUpperCase())) {
|
|
16
|
-
return true
|
|
17
|
-
}
|
|
18
|
-
if (expr.type === 'binary') {
|
|
19
|
-
return containsAggregate(expr.left) || containsAggregate(expr.right)
|
|
20
|
-
}
|
|
21
|
-
if (expr.type === 'unary') {
|
|
22
|
-
return containsAggregate(expr.argument)
|
|
23
|
-
}
|
|
24
|
-
if (expr.type === 'cast') {
|
|
25
|
-
return containsAggregate(expr.expr)
|
|
26
|
-
}
|
|
27
|
-
if (expr.type === 'case') {
|
|
28
|
-
if (expr.caseExpr && containsAggregate(expr.caseExpr)) return true
|
|
29
|
-
for (const when of expr.whenClauses) {
|
|
30
|
-
if (containsAggregate(when.condition) || containsAggregate(when.result)) return true
|
|
31
|
-
}
|
|
32
|
-
if (containsAggregate(expr.elseResult)) return true
|
|
33
|
-
}
|
|
34
|
-
if (expr.type === 'in valuelist') {
|
|
35
|
-
if (containsAggregate(expr.expr)) return true
|
|
36
|
-
for (const val of expr.values) {
|
|
37
|
-
if (containsAggregate(val)) return true
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
// Note: Don't recurse into subqueries - they have their own aggregate scope
|
|
41
|
-
return false
|
|
42
|
-
}
|
|
43
|
-
|
|
44
5
|
/**
|
|
45
6
|
* Extracts column names needed from a SELECT statement.
|
|
46
7
|
*
|