squirreling 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -78,6 +78,7 @@ console.log(`Collected rows:`, rows)
78
78
  ## Supported SQL Features
79
79
 
80
80
  - `SELECT` statements with `WHERE`, `ORDER BY`, `LIMIT`, `OFFSET`
81
+ - `WITH` clause for Common Table Expressions (CTEs)
81
82
  - Subqueries in `SELECT`, `FROM`, and `WHERE` clauses
82
83
  - `JOIN` operations: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `FULL JOIN`, `POSITIONAL JOIN`
83
84
  - `GROUP BY` and `HAVING` clauses
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.7.2",
3
+ "version": "0.7.3",
4
4
  "description": "Squirreling SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -1,15 +1,16 @@
1
1
  import { missingClauseError } from '../parseErrors.js'
2
- import { tableNotFoundError, unsupportedOperationError } from '../executionErrors.js'
2
+ import { unsupportedOperationError } from '../executionErrors.js'
3
3
  import { generatorSource, memorySource } from '../backend/dataSource.js'
4
4
  import { parseSql } from '../parse/parse.js'
5
5
  import { containsAggregate, extractColumns } from './columns.js'
6
6
  import { evaluateExpr } from './expression.js'
7
7
  import { evaluateHavingExpr } from './having.js'
8
8
  import { executeJoins } from './join.js'
9
+ import { resolveTableSource } from './tableSource.js'
9
10
  import { compareForTerm, defaultDerivedAlias, stringify } from './utils.js'
10
11
 
11
12
  /**
12
- * @import { AsyncCells, AsyncDataSource, AsyncRow, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive, UserDefinedFunction } from '../types.js'
13
+ * @import { AsyncCells, AsyncDataSource, AsyncRow, CTEDefinition, ExecuteSqlOptions, OrderByItem, QueryHints, SelectStatement, SqlPrimitive, UserDefinedFunction, WithClause } from '../types.js'
13
14
  */
14
15
 
15
16
  /**
@@ -40,7 +41,7 @@ export async function* executeSql({ tables, query, functions, signal }) {
40
41
  }
41
42
  }
42
43
 
43
- yield* executeSelect({ select, tables: normalizedTables, functions, signal })
44
+ yield* executeSelect({ select, tables: normalizedTables, withClause: select.with, functions, signal })
44
45
  }
45
46
 
46
47
  /**
@@ -49,32 +50,45 @@ export async function* executeSql({ tables, query, functions, signal }) {
49
50
  * @param {Object} options
50
51
  * @param {SelectStatement} options.select
51
52
  * @param {Record<string, AsyncDataSource>} options.tables
53
+ * @param {WithClause} [options.withClause] - WITH clause containing CTE definitions
52
54
  * @param {Record<string, UserDefinedFunction>} [options.functions]
53
55
  * @param {AbortSignal} [options.signal]
54
56
  * @yields {AsyncRow}
55
57
  */
56
- export async function* executeSelect({ select, tables, functions, signal }) {
58
+ export async function* executeSelect({ select, tables, withClause, functions, signal }) {
57
59
  /** @type {AsyncDataSource} */
58
60
  let dataSource
59
61
  /** @type {string} */
60
62
  let leftTable
61
63
 
62
64
  if (select.from.kind === 'table') {
63
- // Use alias for column prefixing, but look up the actual table name
64
- leftTable = select.from.alias ?? select.from.table
65
- dataSource = tables[select.from.table]
66
- if (dataSource === undefined) {
67
- throw tableNotFoundError({ tableName: select.from.table })
68
- }
65
+ const tableName = select.from.table
66
+ leftTable = select.from.alias ?? tableName
67
+ dataSource = resolveTableSource(tableName, tables, withClause, executeSelect, functions, signal)
69
68
  } else {
70
69
  // Nested subquery - recursively resolve
71
70
  leftTable = select.from.alias
72
- dataSource = generatorSource(executeSelect({ select: select.from.query, tables, functions, signal }))
71
+ dataSource = generatorSource(executeSelect({
72
+ select: select.from.query,
73
+ tables,
74
+ withClause,
75
+ functions,
76
+ signal,
77
+ }))
73
78
  }
74
79
 
75
80
  // Execute JOINs if present
76
81
  if (select.joins.length) {
77
- dataSource = await executeJoins({ leftSource: dataSource, joins: select.joins, leftTable, tables, functions })
82
+ dataSource = await executeJoins({
83
+ leftSource: dataSource,
84
+ joins: select.joins,
85
+ leftTable,
86
+ tables,
87
+ withClause,
88
+ functions,
89
+ executeSelectFn: executeSelect,
90
+ signal,
91
+ })
78
92
  }
79
93
 
80
94
  yield* evaluateSelectAst({ select, dataSource, tables, functions, signal })
@@ -1,10 +1,10 @@
1
1
  import { missingClauseError } from '../parseErrors.js'
2
- import { tableNotFoundError } from '../executionErrors.js'
3
2
  import { evaluateExpr } from './expression.js'
3
+ import { resolveTableSource } from './tableSource.js'
4
4
  import { stringify } from './utils.js'
5
5
 
6
6
  /**
7
- * @import { AsyncRow, AsyncDataSource, JoinClause, ExprNode, AsyncCells, UserDefinedFunction } from '../types.js'
7
+ * @import { AsyncRow, AsyncDataSource, JoinClause, ExprNode, AsyncCells, UserDefinedFunction, WithClause } from '../types.js'
8
8
  */
9
9
 
10
10
  /**
@@ -15,19 +15,19 @@ import { stringify } from './utils.js'
15
15
  * @param {JoinClause[]} options.joins - array of join clauses to execute
16
16
  * @param {string} options.leftTable - name of the left table (for column prefixing)
17
17
  * @param {Record<string, AsyncDataSource>} options.tables - all available tables
18
+ * @param {WithClause} [options.withClause] - WITH clause containing CTE definitions
18
19
  * @param {Record<string, UserDefinedFunction>} [options.functions]
20
+ * @param {Function} [options.executeSelectFn] - function to execute SELECT for CTEs (passed to avoid circular dep)
21
+ * @param {AbortSignal} [options.signal]
19
22
  * @returns {Promise<AsyncDataSource>} data source yielding joined rows
20
23
  */
21
- export async function executeJoins({ leftSource, joins, leftTable, tables, functions }) {
24
+ export async function executeJoins({ leftSource, joins, leftTable, tables, withClause, functions, executeSelectFn, signal }) {
22
25
  let currentLeftTable = leftTable
23
26
 
24
27
  // Single join optimization: stream left rows without buffering
25
28
  if (joins.length === 1) {
26
29
  const join = joins[0]
27
- const rightSource = tables[join.table]
28
- if (rightSource === undefined) {
29
- throw tableNotFoundError({ tableName: join.table })
30
- }
30
+ const rightSource = resolveTableSource(join.table, tables, withClause, executeSelectFn, functions, signal)
31
31
 
32
32
  // Buffer right rows for hash index (required for hash join)
33
33
  /** @type {AsyncRow[]} */
@@ -77,10 +77,7 @@ export async function executeJoins({ leftSource, joins, leftTable, tables, funct
77
77
  // Process all but the last join, buffering intermediate results
78
78
  for (let i = 0; i < joins.length - 1; i++) {
79
79
  const join = joins[i]
80
- const rightSource = tables[join.table]
81
- if (rightSource === undefined) {
82
- throw tableNotFoundError({ tableName: join.table })
83
- }
80
+ const rightSource = resolveTableSource(join.table, tables, withClause, executeSelectFn, functions, signal)
84
81
 
85
82
  /** @type {AsyncRow[]} */
86
83
  const rightRows = []
@@ -121,10 +118,7 @@ export async function executeJoins({ leftSource, joins, leftTable, tables, funct
121
118
 
122
119
  // Final join: stream the results
123
120
  const join = joins[joins.length - 1]
124
- const rightSource = tables[join.table]
125
- if (rightSource === undefined) {
126
- throw tableNotFoundError({ tableName: join.table })
127
- }
121
+ const rightSource = resolveTableSource(join.table, tables, withClause, executeSelectFn, functions, signal)
128
122
 
129
123
  /** @type {AsyncRow[]} */
130
124
  const rightRows = []
@@ -0,0 +1,63 @@
1
+ import { tableNotFoundError } from '../executionErrors.js'
2
+ import { generatorSource } from '../backend/dataSource.js'
3
+
4
+ /**
5
+ * @import { AsyncDataSource, CTEDefinition, UserDefinedFunction, WithClause } from '../types.js'
6
+ */
7
+
8
+ /**
9
+ * Gets CTEs defined before the target CTE (excluding the target itself).
10
+ * Enforces SQL scoping rules: each CTE can only reference CTEs defined before it.
11
+ *
12
+ * @param {CTEDefinition[]} allCtes - all CTE definitions in order
13
+ * @param {string} targetCteName - the CTE name (case-insensitive)
14
+ * @returns {WithClause} CTEs available to the target
15
+ */
16
+ export function getCtesDefinedBefore(allCtes, targetCteName) {
17
+ const available = []
18
+ for (const cte of allCtes) {
19
+ if (cte.name.toLowerCase() === targetCteName) break
20
+ available.push(cte)
21
+ }
22
+ return { ctes: available }
23
+ }
24
+
25
+ /**
26
+ * Resolves a table name to an AsyncDataSource, checking CTEs first
27
+ *
28
+ * @param {string} tableName - the table name to resolve
29
+ * @param {Record<string, AsyncDataSource>} tables - regular tables
30
+ * @param {import('../types.js').WithClause} [withClause] - WITH clause containing CTE definitions
31
+ * @param {Function} [executeSelectFn] - function to execute SELECT for CTEs
32
+ * @param {Record<string, UserDefinedFunction>} [functions]
33
+ * @param {AbortSignal} [signal]
34
+ * @returns {AsyncDataSource}
35
+ */
36
+ export function resolveTableSource(tableName, tables, withClause, executeSelectFn, functions, signal) {
37
+ // Check CTEs first (case-insensitive) - only build map when CTE is actually found
38
+ if (withClause && executeSelectFn) {
39
+ const lowerName = tableName.toLowerCase()
40
+ const cte = withClause.ctes.find(c => c.name.toLowerCase() === lowerName)
41
+
42
+ if (cte) {
43
+ // CTE reference: wrap in generatorSource, re-execute each time (streaming)
44
+ // Pass only CTEs defined before this one to prevent self-reference
45
+ const availableCtes = getCtesDefinedBefore(withClause.ctes, lowerName)
46
+
47
+ return generatorSource(executeSelectFn({
48
+ select: cte.query,
49
+ tables,
50
+ withClause: availableCtes,
51
+ functions,
52
+ signal,
53
+ }))
54
+ }
55
+ }
56
+
57
+ // Regular table lookup
58
+ const tableSource = tables[tableName]
59
+ if (tableSource === undefined) {
60
+ throw tableNotFoundError({ tableName })
61
+ }
62
+ return tableSource
63
+ }
@@ -1,13 +1,64 @@
1
- import { tokenizeSql } from './tokenize.js'
2
1
  import { parseExpression } from './expression.js'
3
- import { RESERVED_AFTER_COLUMN, RESERVED_AFTER_TABLE, isKnownFunction } from '../validation.js'
2
+ import { tokenizeSql } from './tokenize.js'
4
3
  import { consume, current, expect, expectIdentifier, match, parseError, peekToken } from './state.js'
5
4
  import { parseJoins } from './joins.js'
5
+ import { duplicateCTEError } from '../parseErrors.js'
6
+ import { RESERVED_AFTER_COLUMN, RESERVED_AFTER_TABLE, isKnownFunction } from '../validation.js'
6
7
 
7
8
  /**
8
- * @import { ExprNode, FromSubquery, FromTable, OrderByItem, ParseSqlOptions, ParserState, SelectStatement, SelectColumn } from '../types.js'
9
+ * @import { CTEDefinition, ExprNode, FromSubquery, FromTable, OrderByItem, ParseSqlOptions, ParserState, SelectStatement, SelectColumn, WithClause } from '../types.js'
9
10
  */
10
11
 
12
+ /**
13
+ * Parses a WITH clause containing one or more CTEs
14
+ * @param {ParserState} state
15
+ * @returns {WithClause}
16
+ */
17
+ function parseWithClause(state) {
18
+ /** @type {CTEDefinition[]} */
19
+ const ctes = []
20
+ /** @type {Set<string>} */
21
+ const seenNames = new Set()
22
+
23
+ while (true) {
24
+ // Parse CTE name
25
+ const nameTok = expectIdentifier(state)
26
+ const name = nameTok.value
27
+ const nameLower = name.toLowerCase()
28
+
29
+ // Check for duplicate CTE names
30
+ if (seenNames.has(nameLower)) {
31
+ throw duplicateCTEError({
32
+ cteName: name,
33
+ positionStart: nameTok.positionStart,
34
+ positionEnd: nameTok.positionEnd,
35
+ })
36
+ }
37
+ seenNames.add(nameLower)
38
+
39
+ // Expect AS keyword
40
+ expect(state, 'keyword', 'AS')
41
+
42
+ // Expect opening parenthesis
43
+ expect(state, 'paren', '(')
44
+
45
+ // Parse the CTE's SELECT statement
46
+ const query = parseSelectInternal(state)
47
+
48
+ // Expect closing parenthesis
49
+ expect(state, 'paren', ')')
50
+
51
+ ctes.push({ name, query })
52
+
53
+ // Check for comma (more CTEs) or end of WITH clause
54
+ if (!match(state, 'comma')) {
55
+ break
56
+ }
57
+ }
58
+
59
+ return { ctes }
60
+ }
61
+
11
62
  /**
12
63
  * @param {ParseSqlOptions} options
13
64
  * @returns {SelectStatement}
@@ -16,8 +67,21 @@ export function parseSql({ query, functions }) {
16
67
  const tokens = tokenizeSql(query)
17
68
  /** @type {ParserState} */
18
69
  const state = { tokens, pos: 0, functions }
70
+
71
+ // Check for WITH clause
72
+ /** @type {WithClause | undefined} */
73
+ let withClause
74
+ if (match(state, 'keyword', 'WITH')) {
75
+ withClause = parseWithClause(state)
76
+ }
77
+
19
78
  const select = parseSelectInternal(state)
20
79
 
80
+ // Attach WITH clause to the select statement
81
+ if (withClause) {
82
+ select.with = withClause
83
+ }
84
+
21
85
  const tok = current(state)
22
86
  if (tok.type !== 'eof') {
23
87
  throw parseError(state, 'end of query')
@@ -9,6 +9,7 @@ import {
9
9
  */
10
10
 
11
11
  const KEYWORDS = new Set([
12
+ 'WITH',
12
13
  'SELECT',
13
14
  'FROM',
14
15
  'WHERE',
@@ -79,7 +79,7 @@ export function invalidLiteralError({ type, value, positionStart, positionEnd, v
79
79
  export function unexpectedCharError({ char, positionStart, expectsSelect = false }) {
80
80
  const positionEnd = positionStart + 1
81
81
  if (expectsSelect) {
82
- return new ParseError({ message: `Expected SELECT but found "${char}" at position ${positionStart}. Queries must start with SELECT.`, positionStart, positionEnd })
82
+ return new ParseError({ message: `Expected SELECT but found "${char}" at position ${positionStart}. Queries must start with SELECT or WITH.`, positionStart, positionEnd })
83
83
  }
84
84
  return new ParseError({ message: `Unexpected character "${char}" at position ${positionStart}`, positionStart, positionEnd })
85
85
  }
@@ -145,3 +145,20 @@ export function argCountParseError({ funcName, expected, received, positionStart
145
145
  export function missingClauseError({ missing, context, positionStart, positionEnd }) {
146
146
  return new ParseError({ message: `${context} requires ${missing}`, positionStart: positionStart ?? 0, positionEnd: positionEnd ?? 0 })
147
147
  }
148
+
149
+ /**
150
+ * Error for duplicate CTE names in WITH clause.
151
+ *
152
+ * @param {Object} options
153
+ * @param {string} options.cteName - The duplicate CTE name
154
+ * @param {number} options.positionStart - Start position in query
155
+ * @param {number} options.positionEnd - End position in query
156
+ * @returns {ParseError}
157
+ */
158
+ export function duplicateCTEError({ cteName, positionStart, positionEnd }) {
159
+ return new ParseError({
160
+ message: `CTE "${cteName}" is defined more than once at position ${positionStart}`,
161
+ positionStart,
162
+ positionEnd,
163
+ })
164
+ }
package/src/types.d.ts CHANGED
@@ -67,7 +67,17 @@ export interface UserDefinedFunction {
67
67
  }
68
68
  }
69
69
 
70
+ export interface CTEDefinition {
71
+ name: string
72
+ query: SelectStatement
73
+ }
74
+
75
+ export interface WithClause {
76
+ ctes: CTEDefinition[]
77
+ }
78
+
70
79
  export interface SelectStatement {
80
+ with?: WithClause
71
81
  distinct: boolean
72
82
  columns: SelectColumn[]
73
83
  from: FromTable | FromSubquery