squirreling 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/backend/dataSource.js +1 -1
- package/src/execute/aggregates.js +2 -23
- package/src/execute/execute.js +28 -49
- package/src/execute/expression.js +15 -4
- package/src/execute/having.js +3 -19
- package/src/execute/join.js +357 -0
- package/src/execute/utils.js +33 -1
- package/src/parse/expression.js +4 -4
- package/src/parse/parse.js +59 -16
- package/src/types.d.ts +9 -2
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
[](https://www.npmjs.com/package/squirreling)
|
|
8
8
|
[](https://github.com/hyparam/squirreling/actions)
|
|
9
9
|
[](https://opensource.org/licenses/MIT)
|
|
10
|
-

|
|
11
11
|
[](https://www.npmjs.com/package/squirreling?activeTab=dependencies)
|
|
12
12
|
|
|
13
13
|
Squirreling is a streaming async SQL engine for JavaScript. It is designed to provide efficient streaming of results from pluggable backends for highly efficient retrieval of data for browser applications.
|
|
@@ -22,8 +22,8 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
|
|
|
22
22
|
- Constant memory usage for simple queries with LIMIT
|
|
23
23
|
- Robust error handling and validation designed for LLM tool use
|
|
24
24
|
- In-memory data option for simple use cases
|
|
25
|
+
- Late materialization for efficiency
|
|
25
26
|
- Select only
|
|
26
|
-
- No joins (yet)
|
|
27
27
|
|
|
28
28
|
## Usage
|
|
29
29
|
|
package/package.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { evaluateExpr } from './expression.js'
|
|
2
|
+
import { defaultDerivedAlias } from './utils.js'
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Evaluates an aggregate function over a set of rows
|
|
@@ -72,27 +73,5 @@ export async function evaluateAggregate({ col, rows, tables }) {
|
|
|
72
73
|
export function defaultAggregateAlias(col) {
|
|
73
74
|
const base = col.func.toLowerCase()
|
|
74
75
|
if (col.arg.kind === 'star') return base + '_all'
|
|
75
|
-
return base + '_' +
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* @param {ExprNode} expr
|
|
80
|
-
* @returns {string}
|
|
81
|
-
*/
|
|
82
|
-
export function defaultAggregateAliasExpr(expr) {
|
|
83
|
-
if (expr.type === 'identifier') {
|
|
84
|
-
return expr.name
|
|
85
|
-
} else if (expr.type === 'literal') {
|
|
86
|
-
return String(expr.value)
|
|
87
|
-
} else if (expr.type === 'cast') {
|
|
88
|
-
return defaultAggregateAliasExpr(expr.expr) + '_as_' + expr.toType
|
|
89
|
-
} else if (expr.type === 'unary') {
|
|
90
|
-
return expr.op + '_' + defaultAggregateAliasExpr(expr.argument)
|
|
91
|
-
} else if (expr.type === 'binary') {
|
|
92
|
-
return defaultAggregateAliasExpr(expr.left) + '_' + expr.op + '_' + defaultAggregateAliasExpr(expr.right)
|
|
93
|
-
} else if (expr.type === 'function') {
|
|
94
|
-
return expr.name.toLowerCase() + '_' + expr.args.map(defaultAggregateAliasExpr).join('_')
|
|
95
|
-
} else {
|
|
96
|
-
return 'expr'
|
|
97
|
-
}
|
|
76
|
+
return base + '_' + defaultDerivedAlias(col.arg.expr)
|
|
98
77
|
}
|
package/src/execute/execute.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import { evaluateExpr } from './expression.js'
|
|
2
|
-
import { parseSql } from '../parse/parse.js'
|
|
3
1
|
import { generatorSource, memorySource } from '../backend/dataSource.js'
|
|
2
|
+
import { parseSql } from '../parse/parse.js'
|
|
4
3
|
import { defaultAggregateAlias, evaluateAggregate } from './aggregates.js'
|
|
4
|
+
import { evaluateExpr } from './expression.js'
|
|
5
5
|
import { evaluateHavingExpr } from './having.js'
|
|
6
|
+
import { executeJoins } from './join.js'
|
|
7
|
+
import { defaultDerivedAlias } from './utils.js'
|
|
6
8
|
|
|
7
9
|
/**
|
|
8
10
|
* @import { AsyncDataSource, ExecuteSqlOptions, ExprNode, OrderByItem, AsyncRow, SelectStatement, SqlPrimitive } from '../types.js'
|
|
@@ -12,15 +14,12 @@ import { evaluateHavingExpr } from './having.js'
|
|
|
12
14
|
* Executes a SQL SELECT query against named data sources
|
|
13
15
|
*
|
|
14
16
|
* @param {ExecuteSqlOptions} options - the execution options
|
|
15
|
-
* @
|
|
17
|
+
* @yields {AsyncRow} async generator yielding result rows
|
|
16
18
|
*/
|
|
17
19
|
export async function* executeSql({ tables, query }) {
|
|
18
20
|
const select = parseSql(query)
|
|
19
21
|
|
|
20
22
|
// Check for unsupported operations
|
|
21
|
-
if (select.joins.length) {
|
|
22
|
-
throw new Error('JOIN is not supported')
|
|
23
|
-
}
|
|
24
23
|
if (!select.from) {
|
|
25
24
|
throw new Error('FROM clause is required')
|
|
26
25
|
}
|
|
@@ -44,53 +43,33 @@ export async function* executeSql({ tables, query }) {
|
|
|
44
43
|
*
|
|
45
44
|
* @param {SelectStatement} select
|
|
46
45
|
* @param {Record<string, AsyncDataSource>} tables
|
|
47
|
-
* @
|
|
46
|
+
* @yields {AsyncRow}
|
|
48
47
|
*/
|
|
49
48
|
export async function* executeSelect(select, tables) {
|
|
50
49
|
/** @type {AsyncDataSource} */
|
|
51
50
|
let dataSource
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
51
|
+
/** @type {string} */
|
|
52
|
+
let fromTableName
|
|
53
|
+
|
|
54
|
+
if (select.from.kind === 'table') {
|
|
55
|
+
// Use alias for column prefixing, but look up the actual table name
|
|
56
|
+
fromTableName = select.from.alias ?? select.from.table
|
|
57
|
+
dataSource = tables[select.from.table]
|
|
58
|
+
if (dataSource === undefined) {
|
|
59
|
+
throw new Error(`Table "${select.from.table}" not found`)
|
|
57
60
|
}
|
|
58
|
-
|
|
59
|
-
dataSource = table
|
|
60
61
|
} else {
|
|
61
62
|
// Nested subquery - recursively resolve
|
|
63
|
+
fromTableName = select.from.alias
|
|
62
64
|
dataSource = generatorSource(executeSelect(select.from.query, tables))
|
|
63
65
|
}
|
|
64
66
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Generates a default alias for a derived column expression
|
|
70
|
-
*
|
|
71
|
-
* @param {ExprNode} expr - the expression node
|
|
72
|
-
* @returns {string} the generated alias
|
|
73
|
-
*/
|
|
74
|
-
function defaultDerivedAlias(expr) {
|
|
75
|
-
if (expr.type === 'identifier') {
|
|
76
|
-
return expr.name
|
|
77
|
-
}
|
|
78
|
-
if (expr.type === 'function') {
|
|
79
|
-
const base = expr.name.toLowerCase()
|
|
80
|
-
// Try to extract column names from identifier arguments
|
|
81
|
-
const columnNames = expr.args
|
|
82
|
-
.filter(arg => arg.type === 'identifier')
|
|
83
|
-
.map(arg => arg.name)
|
|
84
|
-
if (columnNames.length > 0) {
|
|
85
|
-
return base + '_' + columnNames.join('_')
|
|
86
|
-
}
|
|
87
|
-
return base
|
|
88
|
-
}
|
|
89
|
-
if (expr.type === 'cast') return 'cast_expr'
|
|
90
|
-
if (expr.type === 'unary' && expr.argument.type === 'identifier') {
|
|
91
|
-
return expr.op === '-' ? 'neg_' + expr.argument.name : 'expr'
|
|
67
|
+
// Execute JOINs if present
|
|
68
|
+
if (select.joins.length) {
|
|
69
|
+
dataSource = await executeJoins(dataSource, select.joins, fromTableName, tables)
|
|
92
70
|
}
|
|
93
|
-
|
|
71
|
+
|
|
72
|
+
yield* evaluateSelectAst(select, dataSource, tables)
|
|
94
73
|
}
|
|
95
74
|
|
|
96
75
|
/**
|
|
@@ -128,10 +107,10 @@ function compareValues(a, b) {
|
|
|
128
107
|
return 0
|
|
129
108
|
}
|
|
130
109
|
|
|
131
|
-
const
|
|
132
|
-
const
|
|
133
|
-
if (
|
|
134
|
-
if (
|
|
110
|
+
const aa = String(a)
|
|
111
|
+
const bb = String(b)
|
|
112
|
+
if (aa < bb) return -1
|
|
113
|
+
if (aa > bb) return 1
|
|
135
114
|
return 0
|
|
136
115
|
}
|
|
137
116
|
|
|
@@ -285,7 +264,7 @@ async function applyOrderBy(rows, orderBy, tables) {
|
|
|
285
264
|
* @param {SelectStatement} select
|
|
286
265
|
* @param {AsyncDataSource} dataSource
|
|
287
266
|
* @param {Record<string, AsyncDataSource>} tables
|
|
288
|
-
* @
|
|
267
|
+
* @yields {AsyncRow}
|
|
289
268
|
*/
|
|
290
269
|
async function* evaluateSelectAst(select, dataSource, tables) {
|
|
291
270
|
// SQL priority: from, where, group by, having, select, order by, offset, limit
|
|
@@ -310,7 +289,7 @@ async function* evaluateSelectAst(select, dataSource, tables) {
|
|
|
310
289
|
* @param {SelectStatement} select
|
|
311
290
|
* @param {AsyncDataSource} dataSource
|
|
312
291
|
* @param {Record<string, AsyncDataSource>} tables
|
|
313
|
-
* @
|
|
292
|
+
* @yields {AsyncRow}
|
|
314
293
|
*/
|
|
315
294
|
async function* evaluateStreaming(select, dataSource, tables) {
|
|
316
295
|
let rowsYielded = 0
|
|
@@ -382,7 +361,7 @@ async function* evaluateStreaming(select, dataSource, tables) {
|
|
|
382
361
|
* @param {Record<string, AsyncDataSource>} tables
|
|
383
362
|
* @param {boolean} hasAggregate
|
|
384
363
|
* @param {boolean} useGrouping
|
|
385
|
-
* @
|
|
364
|
+
* @yields {AsyncRow}
|
|
386
365
|
*/
|
|
387
366
|
async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGrouping) {
|
|
388
367
|
// Step 1: Collect all rows from data source
|
|
@@ -19,15 +19,26 @@ export async function evaluateExpr({ node, row, tables }) {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
if (node.type === 'identifier') {
|
|
22
|
-
|
|
22
|
+
// Try exact match first (handles both qualified and unqualified names)
|
|
23
|
+
if (row[node.name]) {
|
|
24
|
+
return row[node.name]()
|
|
25
|
+
}
|
|
26
|
+
// For qualified names like 'users.id', also try just the column part
|
|
27
|
+
if (node.name.includes('.')) {
|
|
28
|
+
const colName = node.name.split('.').pop()
|
|
29
|
+
if (colName && row[colName]) {
|
|
30
|
+
return row[colName]()
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return undefined
|
|
23
34
|
}
|
|
24
35
|
|
|
25
36
|
// Scalar subquery - returns a single value
|
|
26
37
|
if (node.type === 'subquery') {
|
|
27
38
|
const gen = executeSelect(node.subquery, tables)
|
|
28
39
|
const first = await gen.next() // Start the generator
|
|
29
|
-
gen.return() // Stop further execution
|
|
30
|
-
if (first.
|
|
40
|
+
gen.return(undefined) // Stop further execution
|
|
41
|
+
if (!first.value) return null
|
|
31
42
|
/** @type {AsyncRow} */
|
|
32
43
|
const firstRow = first.value
|
|
33
44
|
const firstKey = Object.keys(firstRow)[0]
|
|
@@ -275,7 +286,7 @@ export async function evaluateExpr({ node, row, tables }) {
|
|
|
275
286
|
// CASE expressions
|
|
276
287
|
if (node.type === 'case') {
|
|
277
288
|
// For simple CASE: evaluate the case expression once
|
|
278
|
-
const caseValue = node.caseExpr
|
|
289
|
+
const caseValue = node.caseExpr && await evaluateExpr({ node: node.caseExpr, row, tables })
|
|
279
290
|
|
|
280
291
|
// Iterate through WHEN clauses
|
|
281
292
|
for (const whenClause of node.whenClauses) {
|
package/src/execute/having.js
CHANGED
|
@@ -1,26 +1,9 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @import { AggregateFunc, AsyncDataSource, ExprNode, AsyncRow, SqlPrimitive } from '../types.js'
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
1
|
import { isAggregateFunc } from '../validation.js'
|
|
6
2
|
import { evaluateExpr } from './expression.js'
|
|
7
3
|
|
|
8
4
|
/**
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* @param {AsyncRow} resultRow - the aggregated result row
|
|
12
|
-
* @param {AsyncRow[]} group - the group of rows
|
|
13
|
-
* @returns {AsyncRow} a context row for HAVING evaluation
|
|
5
|
+
* @import { AggregateFunc, AsyncDataSource, ExprNode, AsyncRow, SqlPrimitive } from '../types.js'
|
|
14
6
|
*/
|
|
15
|
-
function createHavingContext(resultRow, group) {
|
|
16
|
-
// Include the first row of the group (for GROUP BY columns)
|
|
17
|
-
const firstRow = group[0]
|
|
18
|
-
if (firstRow) {
|
|
19
|
-
return { ...firstRow, ...resultRow }
|
|
20
|
-
} else {
|
|
21
|
-
return resultRow
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
7
|
|
|
25
8
|
/**
|
|
26
9
|
* Evaluates a HAVING expression with support for aggregate functions
|
|
@@ -32,7 +15,8 @@ function createHavingContext(resultRow, group) {
|
|
|
32
15
|
* @returns {Promise<boolean>} whether the HAVING condition is satisfied
|
|
33
16
|
*/
|
|
34
17
|
export async function evaluateHavingExpr(expr, row, group, tables) {
|
|
35
|
-
|
|
18
|
+
// Having context
|
|
19
|
+
const context = { ...group[0] ?? {}, ...row }
|
|
36
20
|
|
|
37
21
|
// For HAVING, we need special handling of aggregate functions
|
|
38
22
|
// They need to be re-evaluated against the group
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import { evaluateExpr } from './expression.js'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @import { AsyncRow, AsyncDataSource, JoinClause, ExprNode } from '../types.js'
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Executes JOIN operations against a base data source
|
|
9
|
+
*
|
|
10
|
+
* @param {AsyncDataSource} leftSource - the left side of the join (FROM table)
|
|
11
|
+
* @param {JoinClause[]} joins - array of join clauses to execute
|
|
12
|
+
* @param {string} leftTableName - name of the left table (for column prefixing)
|
|
13
|
+
* @param {Record<string, AsyncDataSource>} tables - all available tables
|
|
14
|
+
* @returns {Promise<AsyncDataSource>} data source yielding joined rows
|
|
15
|
+
*/
|
|
16
|
+
export async function executeJoins(leftSource, joins, leftTableName, tables) {
|
|
17
|
+
let currentLeftTable = leftTableName
|
|
18
|
+
|
|
19
|
+
// Single join optimization: stream left rows without buffering
|
|
20
|
+
if (joins.length === 1) {
|
|
21
|
+
const join = joins[0]
|
|
22
|
+
const rightSource = tables[join.table]
|
|
23
|
+
if (rightSource === undefined) {
|
|
24
|
+
throw new Error(`Table "${join.table}" not found`)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Buffer right rows for hash index (required for hash join)
|
|
28
|
+
/** @type {AsyncRow[]} */
|
|
29
|
+
const rightRows = []
|
|
30
|
+
for await (const row of rightSource.getRows()) {
|
|
31
|
+
rightRows.push(row)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Use alias for column prefixing if present
|
|
35
|
+
const rightTableName = join.alias ?? join.table
|
|
36
|
+
|
|
37
|
+
// Return streaming data source - left rows stream through without buffering
|
|
38
|
+
return {
|
|
39
|
+
async *getRows() {
|
|
40
|
+
yield* hashJoin({
|
|
41
|
+
leftRows: leftSource.getRows(), // Stream directly, not buffered
|
|
42
|
+
rightRows,
|
|
43
|
+
join,
|
|
44
|
+
leftTable: currentLeftTable,
|
|
45
|
+
rightTable: rightTableName,
|
|
46
|
+
tables,
|
|
47
|
+
})
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Multiple joins: buffer intermediate results, stream final join
|
|
53
|
+
/** @type {AsyncRow[]} */
|
|
54
|
+
let leftRows = []
|
|
55
|
+
for await (const row of leftSource.getRows()) {
|
|
56
|
+
leftRows.push(row)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Process all but the last join, buffering intermediate results
|
|
60
|
+
for (let i = 0; i < joins.length - 1; i++) {
|
|
61
|
+
const join = joins[i]
|
|
62
|
+
const rightSource = tables[join.table]
|
|
63
|
+
if (rightSource === undefined) {
|
|
64
|
+
throw new Error(`Table "${join.table}" not found`)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** @type {AsyncRow[]} */
|
|
68
|
+
const rightRows = []
|
|
69
|
+
for await (const row of rightSource.getRows()) {
|
|
70
|
+
rightRows.push(row)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Use alias for column prefixing if present
|
|
74
|
+
const rightTableName = join.alias ?? join.table
|
|
75
|
+
|
|
76
|
+
// Collect intermediate results into array for next join
|
|
77
|
+
/** @type {AsyncRow[]} */
|
|
78
|
+
const newLeftRows = []
|
|
79
|
+
const joined = hashJoin({
|
|
80
|
+
leftRows,
|
|
81
|
+
rightRows,
|
|
82
|
+
join,
|
|
83
|
+
leftTable: currentLeftTable,
|
|
84
|
+
rightTable: rightTableName,
|
|
85
|
+
tables,
|
|
86
|
+
})
|
|
87
|
+
for await (const row of joined) {
|
|
88
|
+
newLeftRows.push(row)
|
|
89
|
+
}
|
|
90
|
+
leftRows = newLeftRows
|
|
91
|
+
|
|
92
|
+
// After join, the "left" table for the next join includes all joined tables
|
|
93
|
+
currentLeftTable = `${currentLeftTable}_${rightTableName}`
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Final join: stream the results
|
|
97
|
+
const lastJoin = joins[joins.length - 1]
|
|
98
|
+
const rightSource = tables[lastJoin.table]
|
|
99
|
+
if (rightSource === undefined) {
|
|
100
|
+
throw new Error(`Table "${lastJoin.table}" not found`)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** @type {AsyncRow[]} */
|
|
104
|
+
const rightRows = []
|
|
105
|
+
for await (const row of rightSource.getRows()) {
|
|
106
|
+
rightRows.push(row)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Use alias for column prefixing if present
|
|
110
|
+
const lastRightTableName = lastJoin.alias ?? lastJoin.table
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
async *getRows() {
|
|
114
|
+
yield* hashJoin({
|
|
115
|
+
leftRows,
|
|
116
|
+
rightRows,
|
|
117
|
+
join: lastJoin,
|
|
118
|
+
leftTable: currentLeftTable,
|
|
119
|
+
rightTable: lastRightTableName,
|
|
120
|
+
tables,
|
|
121
|
+
})
|
|
122
|
+
},
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Checks if an expression references a specific table.
|
|
128
|
+
* Returns true if the expression is an identifier prefixed with the table name.
|
|
129
|
+
*
|
|
130
|
+
* @param {ExprNode} expr
|
|
131
|
+
* @param {string} tableName
|
|
132
|
+
* @returns {boolean}
|
|
133
|
+
*/
|
|
134
|
+
function exprReferencesTable(expr, tableName) {
|
|
135
|
+
return expr.type === 'identifier' && expr.name.startsWith(`${tableName}.`)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Extracts the join key expressions from an ON condition.
|
|
140
|
+
* Handles both `left.col = right.col` and `right.col = left.col` orderings.
|
|
141
|
+
*
|
|
142
|
+
* @param {ExprNode} onCondition
|
|
143
|
+
* @param {string} leftTable
|
|
144
|
+
* @param {string} rightTable
|
|
145
|
+
* @returns {{ leftKey: ExprNode, rightKey: ExprNode } | undefined}
|
|
146
|
+
*/
|
|
147
|
+
function extractJoinKeys(onCondition, leftTable, rightTable) {
|
|
148
|
+
if (onCondition.type === 'binary' && onCondition.op === '=') {
|
|
149
|
+
const { left, right } = onCondition
|
|
150
|
+
|
|
151
|
+
// Check if keys are swapped (right table referenced in left position)
|
|
152
|
+
const leftRefsRight = exprReferencesTable(left, rightTable)
|
|
153
|
+
const rightRefsLeft = exprReferencesTable(right, leftTable)
|
|
154
|
+
|
|
155
|
+
if (leftRefsRight && rightRefsLeft) {
|
|
156
|
+
// Keys are swapped, return them in correct order
|
|
157
|
+
return { leftKey: right, rightKey: left }
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Default: assume left operand is for left table
|
|
161
|
+
return { leftKey: left, rightKey: right }
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Creates a NULL-filled row with the given column names
|
|
167
|
+
*
|
|
168
|
+
* @param {string[]} columnNames
|
|
169
|
+
* @returns {AsyncRow}
|
|
170
|
+
*/
|
|
171
|
+
function createNullRow(columnNames) {
|
|
172
|
+
/** @type {AsyncRow} */
|
|
173
|
+
const row = {}
|
|
174
|
+
for (const col of columnNames) {
|
|
175
|
+
row[col] = () => Promise.resolve(null)
|
|
176
|
+
}
|
|
177
|
+
return row
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Merges two rows into one, prefixing columns with table names
|
|
182
|
+
*
|
|
183
|
+
* @param {AsyncRow} leftRow
|
|
184
|
+
* @param {AsyncRow} rightRow
|
|
185
|
+
* @param {string} leftTable
|
|
186
|
+
* @param {string} rightTable
|
|
187
|
+
* @returns {AsyncRow}
|
|
188
|
+
*/
|
|
189
|
+
function mergeRows(leftRow, rightRow, leftTable, rightTable) {
|
|
190
|
+
/** @type {AsyncRow} */
|
|
191
|
+
const merged = {}
|
|
192
|
+
|
|
193
|
+
// Add left table columns with prefix
|
|
194
|
+
for (const [key, cell] of Object.entries(leftRow)) {
|
|
195
|
+
// Skip already-prefixed keys (from previous joins)
|
|
196
|
+
if (!key.includes('.')) {
|
|
197
|
+
merged[`${leftTable}.${key}`] = cell
|
|
198
|
+
} else {
|
|
199
|
+
merged[key] = cell
|
|
200
|
+
}
|
|
201
|
+
// Also keep unqualified name for convenience (may be overwritten if ambiguous)
|
|
202
|
+
merged[key] = cell
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Add right table columns with prefix
|
|
206
|
+
for (const [key, cell] of Object.entries(rightRow)) {
|
|
207
|
+
if (!key.includes('.')) {
|
|
208
|
+
merged[`${rightTable}.${key}`] = cell
|
|
209
|
+
} else {
|
|
210
|
+
merged[key] = cell
|
|
211
|
+
}
|
|
212
|
+
// Unqualified name (overwrites if same name exists in left table)
|
|
213
|
+
merged[key] = cell
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return merged
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Performs a hash join between left and right row sets (streaming).
|
|
221
|
+
* Yields rows as they are found instead of buffering all results.
|
|
222
|
+
*
|
|
223
|
+
* @param {Object} params
|
|
224
|
+
* @param {AsyncIterable<AsyncRow>|AsyncRow[]} params.leftRows - rows from left table (can stream)
|
|
225
|
+
* @param {AsyncRow[]} params.rightRows - rows from right table (must be buffered for hash index)
|
|
226
|
+
* @param {JoinClause} params.join - join specification
|
|
227
|
+
* @param {string} params.leftTable - name of left table (for column prefixing)
|
|
228
|
+
* @param {string} params.rightTable - name of right table (for column prefixing, may be alias)
|
|
229
|
+
* @param {Record<string, AsyncDataSource>} params.tables - all tables for expression evaluation
|
|
230
|
+
* @yields {AsyncRow} joined rows
|
|
231
|
+
*/
|
|
232
|
+
async function* hashJoin({ leftRows, rightRows, join, leftTable, rightTable, tables }) {
|
|
233
|
+
const { joinType, on: onCondition } = join
|
|
234
|
+
|
|
235
|
+
if (!onCondition) {
|
|
236
|
+
throw new Error('JOIN requires ON condition')
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const keys = extractJoinKeys(onCondition, leftTable, rightTable)
|
|
240
|
+
|
|
241
|
+
// Get column names for NULL row generation (right side is always buffered)
|
|
242
|
+
const rightCols = rightRows.length ? Object.keys(rightRows[0]) : []
|
|
243
|
+
const rightPrefixedCols = rightCols.flatMap(col =>
|
|
244
|
+
col.includes('.') ? [col] : [`${rightTable}.${col}`, col]
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
// Track left column info - captured from first row during iteration
|
|
248
|
+
/** @type {string[]|null} */
|
|
249
|
+
let leftPrefixedCols = null
|
|
250
|
+
|
|
251
|
+
if (keys) {
|
|
252
|
+
// Hash join: build hash map on right table
|
|
253
|
+
/** @type {Map<string, AsyncRow[]>} */
|
|
254
|
+
const hashMap = new Map()
|
|
255
|
+
|
|
256
|
+
// BUILD PHASE: Index right rows by join key
|
|
257
|
+
// Skip null keys - SQL semantics: NULL != NULL
|
|
258
|
+
for (const rightRow of rightRows) {
|
|
259
|
+
const keyValue = await evaluateExpr({ node: keys.rightKey, row: rightRow, tables })
|
|
260
|
+
if (keyValue == null) continue // NULL keys never match
|
|
261
|
+
const keyStr = JSON.stringify(keyValue)
|
|
262
|
+
|
|
263
|
+
let bucket = hashMap.get(keyStr)
|
|
264
|
+
if (!bucket) {
|
|
265
|
+
bucket = []
|
|
266
|
+
hashMap.set(keyStr, bucket)
|
|
267
|
+
}
|
|
268
|
+
bucket.push(rightRow)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Track which right rows matched (only needed for RIGHT/FULL joins)
|
|
272
|
+
/** @type {Set<AsyncRow>|null} */
|
|
273
|
+
const matchedRightRows = joinType === 'RIGHT' || joinType === 'FULL' ? new Set() : null
|
|
274
|
+
|
|
275
|
+
// PROBE PHASE: Stream through left rows, yield matches immediately
|
|
276
|
+
for await (const leftRow of leftRows) {
|
|
277
|
+
// Capture left column info from first row (for NULL row generation)
|
|
278
|
+
if (!leftPrefixedCols) {
|
|
279
|
+
const leftCols = Object.keys(leftRow)
|
|
280
|
+
leftPrefixedCols = leftCols.flatMap(col =>
|
|
281
|
+
col.includes('.') ? [col] : [`${leftTable}.${col}`, col]
|
|
282
|
+
)
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const keyValue = await evaluateExpr({ node: keys.leftKey, row: leftRow, tables })
|
|
286
|
+
const keyStr = JSON.stringify(keyValue)
|
|
287
|
+
|
|
288
|
+
const matchingRightRows = hashMap.get(keyStr)
|
|
289
|
+
|
|
290
|
+
if (matchingRightRows && matchingRightRows.length > 0) {
|
|
291
|
+
for (const rightRow of matchingRightRows) {
|
|
292
|
+
if (matchedRightRows) matchedRightRows.add(rightRow)
|
|
293
|
+
yield mergeRows(leftRow, rightRow, leftTable, rightTable)
|
|
294
|
+
}
|
|
295
|
+
} else if (joinType === 'LEFT' || joinType === 'FULL') {
|
|
296
|
+
const nullRight = createNullRow(rightPrefixedCols)
|
|
297
|
+
yield mergeRows(leftRow, nullRight, leftTable, rightTable)
|
|
298
|
+
}
|
|
299
|
+
// INNER join with no match: don't yield anything
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// UNMATCHED PHASE: Handle unmatched right rows for RIGHT/FULL joins
|
|
303
|
+
if (matchedRightRows) {
|
|
304
|
+
for (const rightRow of rightRows) {
|
|
305
|
+
if (!matchedRightRows.has(rightRow)) {
|
|
306
|
+
// Use empty array if left table was empty (no rows to derive columns from)
|
|
307
|
+
const nullLeft = createNullRow(leftPrefixedCols || [])
|
|
308
|
+
yield mergeRows(nullLeft, rightRow, leftTable, rightTable)
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
// Fallback to nested loop for complex ON conditions
|
|
314
|
+
// Left rows stream through, right rows are iterated for each left row
|
|
315
|
+
/** @type {Set<AsyncRow>|null} */
|
|
316
|
+
const matchedRightRows = joinType === 'RIGHT' || joinType === 'FULL' ? new Set() : null
|
|
317
|
+
|
|
318
|
+
for await (const leftRow of leftRows) {
|
|
319
|
+
// Capture left column info from first row (for NULL row generation)
|
|
320
|
+
if (!leftPrefixedCols) {
|
|
321
|
+
const leftCols = Object.keys(leftRow)
|
|
322
|
+
leftPrefixedCols = leftCols.flatMap(col =>
|
|
323
|
+
col.includes('.') ? [col] : [`${leftTable}.${col}`, col]
|
|
324
|
+
)
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
let hasMatch = false
|
|
328
|
+
|
|
329
|
+
for (const rightRow of rightRows) {
|
|
330
|
+
const tempMerged = mergeRows(leftRow, rightRow, leftTable, rightTable)
|
|
331
|
+
const matches = await evaluateExpr({ node: onCondition, row: tempMerged, tables })
|
|
332
|
+
|
|
333
|
+
if (matches) {
|
|
334
|
+
hasMatch = true
|
|
335
|
+
if (matchedRightRows) matchedRightRows.add(rightRow)
|
|
336
|
+
yield tempMerged
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!hasMatch && (joinType === 'LEFT' || joinType === 'FULL')) {
|
|
341
|
+
const nullRight = createNullRow(rightPrefixedCols)
|
|
342
|
+
yield mergeRows(leftRow, nullRight, leftTable, rightTable)
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Handle unmatched right rows for RIGHT/FULL joins
|
|
347
|
+
if (matchedRightRows) {
|
|
348
|
+
for (const rightRow of rightRows) {
|
|
349
|
+
if (!matchedRightRows.has(rightRow)) {
|
|
350
|
+
// Use empty array if left table was empty (no rows to derive columns from)
|
|
351
|
+
const nullLeft = createNullRow(leftPrefixedCols || [])
|
|
352
|
+
yield mergeRows(nullLeft, rightRow, leftTable, rightTable)
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
package/src/execute/utils.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Collects and materialize all results from an async row generator into an array
|
|
3
3
|
*
|
|
4
|
-
* @import {AsyncRow, SqlPrimitive} from '../types.js'
|
|
4
|
+
* @import {AsyncRow, ExprNode, SqlPrimitive} from '../types.js'
|
|
5
5
|
* @param {AsyncGenerator<AsyncRow>} asyncRows
|
|
6
6
|
* @returns {Promise<Record<string, SqlPrimitive>[]>} array of all yielded values
|
|
7
7
|
*/
|
|
@@ -18,3 +18,35 @@ export async function collect(asyncRows) {
|
|
|
18
18
|
}
|
|
19
19
|
return results
|
|
20
20
|
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Generates a default alias for a derived column expression
|
|
24
|
+
*
|
|
25
|
+
* @param {ExprNode} expr - the expression node
|
|
26
|
+
* @returns {string} the generated alias
|
|
27
|
+
*/
|
|
28
|
+
export function defaultDerivedAlias(expr) {
|
|
29
|
+
if (expr.type === 'identifier') {
|
|
30
|
+
// For qualified names like 'users.name', use just the column part as alias
|
|
31
|
+
if (expr.name.includes('.')) {
|
|
32
|
+
return expr.name.split('.').pop()
|
|
33
|
+
}
|
|
34
|
+
return expr.name
|
|
35
|
+
}
|
|
36
|
+
if (expr.type === 'literal') {
|
|
37
|
+
return String(expr.value)
|
|
38
|
+
}
|
|
39
|
+
if (expr.type === 'cast') {
|
|
40
|
+
return defaultDerivedAlias(expr.expr) + '_as_' + expr.toType
|
|
41
|
+
}
|
|
42
|
+
if (expr.type === 'unary') {
|
|
43
|
+
return expr.op + '_' + defaultDerivedAlias(expr.argument)
|
|
44
|
+
}
|
|
45
|
+
if (expr.type === 'binary') {
|
|
46
|
+
return defaultDerivedAlias(expr.left) + '_' + expr.op + '_' + defaultDerivedAlias(expr.right)
|
|
47
|
+
}
|
|
48
|
+
if (expr.type === 'function') {
|
|
49
|
+
return expr.name.toLowerCase() + '_' + expr.args.map(defaultDerivedAlias).join('_')
|
|
50
|
+
}
|
|
51
|
+
return 'expr'
|
|
52
|
+
}
|
package/src/parse/expression.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { isAggregateFunc, isStringFunc } from '../validation.js'
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* @import { ExprCursor, ExprNode,
|
|
4
|
+
* @import { BinaryOp, ExprCursor, ExprNode, WhenClause } from '../types.js'
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/**
|
|
@@ -153,7 +153,7 @@ function parsePrimary(c) {
|
|
|
153
153
|
c.consume() // CASE
|
|
154
154
|
|
|
155
155
|
// Check if it's simple CASE (CASE expr WHEN ...) or searched CASE (CASE WHEN ...)
|
|
156
|
-
/** @type {
|
|
156
|
+
/** @type {ExprNode | undefined} */
|
|
157
157
|
let caseExpr
|
|
158
158
|
const nextTok = c.current()
|
|
159
159
|
if (nextTok.type !== 'keyword' || nextTok.value !== 'WHEN') {
|
|
@@ -162,7 +162,7 @@ function parsePrimary(c) {
|
|
|
162
162
|
}
|
|
163
163
|
|
|
164
164
|
// Parse WHEN clauses
|
|
165
|
-
/** @type {
|
|
165
|
+
/** @type {WhenClause[]} */
|
|
166
166
|
const whenClauses = []
|
|
167
167
|
while (c.match('keyword', 'WHEN')) {
|
|
168
168
|
const condition = parseExpression(c)
|
|
@@ -176,7 +176,7 @@ function parsePrimary(c) {
|
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Parse optional ELSE clause
|
|
179
|
-
/** @type {
|
|
179
|
+
/** @type {ExprNode | undefined} */
|
|
180
180
|
let elseResult
|
|
181
181
|
if (c.match('keyword', 'ELSE')) {
|
|
182
182
|
elseResult = parseExpression(c)
|
package/src/parse/parse.js
CHANGED
|
@@ -3,7 +3,7 @@ import { parseExpression } from './expression.js'
|
|
|
3
3
|
import { isAggregateFunc } from '../validation.js'
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
* @import { AggregateColumn, AggregateArg, AggregateFunc, ExprCursor, ExprNode, FromSubquery, JoinClause, JoinType, OrderByItem, ParserState, SelectStatement, SelectColumn, Token, TokenType } from '../types.js'
|
|
6
|
+
* @import { AggregateColumn, AggregateArg, AggregateFunc, ExprCursor, ExprNode, FromSubquery, FromTable, JoinClause, JoinType, OrderByItem, ParserState, SelectStatement, SelectColumn, Token, TokenType } from '../types.js'
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
// Keywords that cannot be used as implicit aliases after a column
|
|
@@ -17,6 +17,23 @@ const RESERVED_AFTER_COLUMN = new Set([
|
|
|
17
17
|
'OFFSET',
|
|
18
18
|
])
|
|
19
19
|
|
|
20
|
+
// Keywords that cannot be used as table aliases
|
|
21
|
+
const RESERVED_AFTER_TABLE = new Set([
|
|
22
|
+
'WHERE',
|
|
23
|
+
'GROUP',
|
|
24
|
+
'HAVING',
|
|
25
|
+
'ORDER',
|
|
26
|
+
'LIMIT',
|
|
27
|
+
'OFFSET',
|
|
28
|
+
'JOIN',
|
|
29
|
+
'INNER',
|
|
30
|
+
'LEFT',
|
|
31
|
+
'RIGHT',
|
|
32
|
+
'FULL',
|
|
33
|
+
'CROSS',
|
|
34
|
+
'ON',
|
|
35
|
+
])
|
|
36
|
+
|
|
20
37
|
/**
|
|
21
38
|
* @param {string} query
|
|
22
39
|
* @returns {SelectStatement}
|
|
@@ -219,17 +236,24 @@ function parseAggregateItem(state, func) {
|
|
|
219
236
|
const cursor = createExprCursor(state)
|
|
220
237
|
const expr = parseExpression(cursor)
|
|
221
238
|
expect(state, 'keyword', 'AS')
|
|
222
|
-
const
|
|
239
|
+
const toType = expectIdentifier(state).value
|
|
223
240
|
expect(state, 'paren', ')')
|
|
224
241
|
arg = {
|
|
225
242
|
kind: 'expression',
|
|
226
|
-
expr: { type: 'cast', expr, toType
|
|
243
|
+
expr: { type: 'cast', expr, toType },
|
|
227
244
|
}
|
|
228
245
|
} else {
|
|
229
|
-
|
|
246
|
+
// column name
|
|
247
|
+
let name = expectIdentifier(state).value
|
|
248
|
+
// Handle qualified column names like orders.amount
|
|
249
|
+
if (current(state).type === 'dot') {
|
|
250
|
+
consume(state) // consume dot
|
|
251
|
+
const qualifiedPart = expectIdentifier(state)
|
|
252
|
+
name = `${name}.${qualifiedPart.value}`
|
|
253
|
+
}
|
|
230
254
|
arg = {
|
|
231
255
|
kind: 'expression',
|
|
232
|
-
expr: { type: 'identifier', name
|
|
256
|
+
expr: { type: 'identifier', name },
|
|
233
257
|
}
|
|
234
258
|
}
|
|
235
259
|
|
|
@@ -240,6 +264,25 @@ function parseAggregateItem(state, func) {
|
|
|
240
264
|
return { kind: 'aggregate', func, arg, alias }
|
|
241
265
|
}
|
|
242
266
|
|
|
267
|
+
/**
|
|
268
|
+
* Parses an optional table alias (e.g., "FROM users u" or "FROM users AS u")
|
|
269
|
+
* @param {ParserState} state
|
|
270
|
+
* @returns {string | undefined}
|
|
271
|
+
*/
|
|
272
|
+
function parseTableAlias(state) {
|
|
273
|
+
// Check for explicit AS keyword
|
|
274
|
+
if (match(state, 'keyword', 'AS')) {
|
|
275
|
+
const aliasTok = expectIdentifier(state)
|
|
276
|
+
return aliasTok.value
|
|
277
|
+
}
|
|
278
|
+
// Check for implicit alias (identifier not in reserved list)
|
|
279
|
+
const maybeAlias = current(state)
|
|
280
|
+
if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_TABLE.has(maybeAlias.value.toUpperCase())) {
|
|
281
|
+
consume(state)
|
|
282
|
+
return maybeAlias.value
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
243
286
|
/**
|
|
244
287
|
* @param {ParserState} state
|
|
245
288
|
* @returns {string | undefined}
|
|
@@ -322,9 +365,9 @@ function parseJoins(state) {
|
|
|
322
365
|
break
|
|
323
366
|
}
|
|
324
367
|
|
|
325
|
-
// Parse table name
|
|
326
|
-
const
|
|
327
|
-
const
|
|
368
|
+
// Parse table name and optional alias
|
|
369
|
+
const tableName = expectIdentifier(state).value
|
|
370
|
+
const tableAlias = parseTableAlias(state)
|
|
328
371
|
|
|
329
372
|
// Parse ON condition
|
|
330
373
|
expect(state, 'keyword', 'ON')
|
|
@@ -332,8 +375,9 @@ function parseJoins(state) {
|
|
|
332
375
|
const condition = parseExpression(cursor)
|
|
333
376
|
|
|
334
377
|
joins.push({
|
|
335
|
-
|
|
378
|
+
joinType,
|
|
336
379
|
table: tableName,
|
|
380
|
+
alias: tableAlias,
|
|
337
381
|
on: condition,
|
|
338
382
|
})
|
|
339
383
|
}
|
|
@@ -351,12 +395,8 @@ function parseSubquery(state) {
|
|
|
351
395
|
const query = parseSelectInternal(state)
|
|
352
396
|
expect(state, 'paren', ')')
|
|
353
397
|
expect(state, 'keyword', 'AS')
|
|
354
|
-
const
|
|
355
|
-
return {
|
|
356
|
-
kind: 'subquery',
|
|
357
|
-
query,
|
|
358
|
-
alias: aliasTok.value,
|
|
359
|
-
}
|
|
398
|
+
const alias = expectIdentifier(state).value
|
|
399
|
+
return { kind: 'subquery', query, alias }
|
|
360
400
|
}
|
|
361
401
|
|
|
362
402
|
/**
|
|
@@ -376,6 +416,7 @@ function parseSelectInternal(state) {
|
|
|
376
416
|
expect(state, 'keyword', 'FROM')
|
|
377
417
|
|
|
378
418
|
// Check if it's a subquery or table name
|
|
419
|
+
/** @type {FromTable | FromSubquery} */
|
|
379
420
|
let from
|
|
380
421
|
const tok = current(state)
|
|
381
422
|
if (tok.type === 'paren' && tok.value === '(') {
|
|
@@ -383,7 +424,9 @@ function parseSelectInternal(state) {
|
|
|
383
424
|
from = parseSubquery(state)
|
|
384
425
|
} else {
|
|
385
426
|
// Simple table name: SELECT * FROM users
|
|
386
|
-
|
|
427
|
+
const table = expectIdentifier(state).value
|
|
428
|
+
const alias = parseTableAlias(state)
|
|
429
|
+
from = { kind: 'table', table, alias }
|
|
387
430
|
}
|
|
388
431
|
|
|
389
432
|
// Parse JOIN clauses
|
package/src/types.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export type SqlPrimitive = string | number | bigint | boolean | null
|
|
|
21
21
|
export interface SelectStatement {
|
|
22
22
|
distinct: boolean
|
|
23
23
|
columns: SelectColumn[]
|
|
24
|
-
from:
|
|
24
|
+
from: FromTable | FromSubquery
|
|
25
25
|
joins: JoinClause[]
|
|
26
26
|
where?: ExprNode
|
|
27
27
|
groupBy: ExprNode[]
|
|
@@ -31,6 +31,12 @@ export interface SelectStatement {
|
|
|
31
31
|
offset?: number
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
export interface FromTable {
|
|
35
|
+
kind: 'table'
|
|
36
|
+
table: string
|
|
37
|
+
alias?: string
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
export interface FromSubquery {
|
|
35
41
|
kind: 'subquery'
|
|
36
42
|
query: SelectStatement
|
|
@@ -184,8 +190,9 @@ export interface OrderByItem {
|
|
|
184
190
|
export type JoinType = 'INNER' | 'LEFT' | 'RIGHT' | 'FULL' | 'CROSS'
|
|
185
191
|
|
|
186
192
|
export interface JoinClause {
|
|
187
|
-
|
|
193
|
+
joinType: JoinType
|
|
188
194
|
table: string
|
|
195
|
+
alias?: string
|
|
189
196
|
on?: ExprNode
|
|
190
197
|
}
|
|
191
198
|
|