squirreling 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,215 +1,177 @@
1
1
  import { expectNoAggregate, findAggregate } from '../validation/aggregates.js'
2
- import { RESERVED_AFTER_COLUMN, RESERVED_AFTER_TABLE } from '../validation/functions.js'
3
- import { duplicateCTEError } from '../validation/parseErrors.js'
2
+ import { RESERVED_AFTER_COLUMN, RESERVED_AFTER_TABLE } from '../validation/keywords.js'
3
+ import { ParseError } from '../validation/parseErrors.js'
4
4
  import { parseExpression } from './expression.js'
5
5
  import { parseJoins } from './joins.js'
6
- import { consume, current, expect, expectIdentifier, match, parseError, peekToken } from './state.js'
6
+ import { consume, current, expect, match, parseError, peekToken } from './state.js'
7
7
  import { tokenizeSql } from './tokenize.js'
8
8
 
9
9
  /**
10
- * @import { CTEDefinition, ExprNode, FromSubquery, FromTable, OrderByItem, ParseSqlOptions, ParserState, SelectStatement, SelectColumn, WithClause } from '../types.js'
10
+ * @import { CTEDefinition, ExprNode, FromSubquery, FromTable, OrderByItem, ParseSqlOptions, ParserState, SelectColumn, SelectStatement, SetOperationStatement, SetOperator, Statement } from '../types.js'
11
11
  */
12
12
 
13
13
  /**
14
14
  * @param {ParseSqlOptions} options
15
- * @returns {SelectStatement}
15
+ * @returns {Statement}
16
16
  */
17
17
  export function parseSql({ query, functions }) {
18
18
  const tokens = tokenizeSql(query)
19
19
  /** @type {ParserState} */
20
20
  const state = { tokens, pos: 0, lastPos: 0, functions }
21
21
 
22
- // Check for WITH clause
23
- /** @type {WithClause | undefined} */
24
- let withClause
25
- if (match(state, 'keyword', 'WITH')) {
26
- withClause = parseWithClause(state)
27
- }
28
-
29
- const select = parseSelectInternal(state)
30
-
31
- // Attach WITH clause to the select statement
32
- if (withClause) {
33
- select.with = withClause
34
- }
22
+ // Parse optional WITH clause
23
+ const stmt = parseStatement(state)
35
24
 
36
25
  const tok = current(state)
37
26
  if (tok.type !== 'eof') {
38
27
  throw parseError(state, 'end of query')
39
28
  }
40
29
 
41
- return select
30
+ return stmt
42
31
  }
43
32
 
44
33
  /**
34
+ * Parses a WITH clause containing one or more CTEs, or a SELECT with optional set operations.
35
+ *
45
36
  * @param {ParserState} state
46
- * @returns {SelectColumn[]}
37
+ * @returns {Statement}
47
38
  */
48
- function parseSelectList(state) {
49
- /** @type {SelectColumn[]} */
50
- const cols = []
51
-
52
- while (true) {
53
- const tok = current(state)
39
+ export function parseStatement(state) {
40
+ const positionStart = state.lastPos
41
+ if (match(state, 'keyword', 'WITH')) {
42
+ /** @type {CTEDefinition[]} */
43
+ const ctes = []
44
+ /** @type {Set<string>} */
45
+ const seenNames = new Set()
54
46
 
55
- // Check for qualified asterisk (table.*)
56
- if (tok.type === 'identifier') {
57
- const next = peekToken(state, 1)
58
- const nextNext = peekToken(state, 2)
59
- if (next.type === 'dot' && nextNext.type === 'operator' && nextNext.value === '*') {
60
- const tableTok = consume(state) // consume table name
61
- consume(state) // consume dot
62
- consume(state) // consume asterisk
63
- cols.push({ kind: 'star', table: tableTok.value })
64
- if (!match(state, 'comma')) break
65
- continue
47
+ while (true) {
48
+ // Parse CTE name
49
+ const nameTok = expect(state, 'identifier')
50
+ const name = nameTok.value
51
+ const nameLower = name.toLowerCase()
52
+
53
+ // Check for duplicate CTE names
54
+ if (seenNames.has(nameLower)) {
55
+ throw new ParseError({
56
+ message: `CTE "${name}" is defined more than once at position ${positionStart}`,
57
+ ...nameTok,
58
+ })
66
59
  }
67
- }
60
+ seenNames.add(nameLower)
68
61
 
69
- // Check for unqualified asterisk (*)
70
- if (tok.type === 'operator' && tok.value === '*') {
71
- consume(state)
72
- cols.push({ kind: 'star' })
62
+ // Expect AS statement
63
+ expect(state, 'keyword', 'AS')
64
+ expect(state, 'paren', '(')
65
+
66
+ // Parse the CTE's SELECT statement
67
+ const query = parseStatement(state)
68
+
69
+ expect(state, 'paren', ')')
70
+
71
+ ctes.push({ name, query, positionStart: nameTok.positionStart, positionEnd: state.lastPos })
72
+
73
+ // Check for comma (more CTEs) or end of WITH clause
73
74
  if (!match(state, 'comma')) break
74
- continue
75
75
  }
76
76
 
77
- cols.push(parseSelectItem(state))
78
- if (!match(state, 'comma')) break
79
- }
77
+ const query = parseSetOperations(state)
80
78
 
81
- return cols
79
+ return { type: 'with', ctes, query, positionStart, positionEnd: state.lastPos }
80
+ } else {
81
+ return parseSetOperations(state)
82
+ }
82
83
  }
83
84
 
84
85
  /**
85
- * Parses a WITH clause containing one or more CTEs
86
+ * Checks for and parses UNION/INTERSECT/EXCEPT set operations after a SELECT.
87
+ * Handles chaining (e.g., SELECT ... UNION SELECT ... EXCEPT SELECT ...).
88
+ * ORDER BY and LIMIT/OFFSET on the last segment apply to the entire compound result.
86
89
  *
87
90
  * @param {ParserState} state
88
- * @returns {WithClause}
91
+ * @returns {SelectStatement | SetOperationStatement}
89
92
  */
90
- function parseWithClause(state) {
91
- /** @type {CTEDefinition[]} */
92
- const ctes = []
93
- /** @type {Set<string>} */
94
- const seenNames = new Set()
93
+ function parseSetOperations(state) {
94
+ let left = parseIntersectOperations(state)
95
95
 
96
96
  while (true) {
97
- // Parse CTE name
98
- const nameTok = expectIdentifier(state)
99
- const name = nameTok.value
100
- const nameLower = name.toLowerCase()
101
-
102
- // Check for duplicate CTE names
103
- if (seenNames.has(nameLower)) {
104
- throw duplicateCTEError({
105
- cteName: name,
106
- positionStart: nameTok.positionStart,
107
- positionEnd: nameTok.positionEnd,
108
- })
97
+ /** @type {SetOperator | undefined} */
98
+ let operator
99
+ if (match(state, 'keyword', 'UNION')) {
100
+ operator = 'UNION'
101
+ } else if (match(state, 'keyword', 'EXCEPT')) {
102
+ operator = 'EXCEPT'
109
103
  }
110
- seenNames.add(nameLower)
111
-
112
- // Expect AS statement
113
- expect(state, 'keyword', 'AS')
114
- expect(state, 'paren', '(')
115
-
116
- // Parse the CTE's SELECT statement
117
- const query = parseSelectInternal(state)
118
-
119
- expect(state, 'paren', ')')
120
-
121
- ctes.push({ name, query })
122
-
123
- // Check for comma (more CTEs) or end of WITH clause
124
- if (!match(state, 'comma')) {
125
- break
104
+ if (!operator) return left
105
+
106
+ const all = !!match(state, 'keyword', 'ALL')
107
+ const right = parseIntersectOperations(state)
108
+
109
+ // ORDER BY / LIMIT / OFFSET after a set operation apply to the compound result.
110
+ // If the right SELECT parsed them, lift them to the compound statement.
111
+ left = {
112
+ type: 'compound',
113
+ operator,
114
+ all,
115
+ left,
116
+ right,
117
+ orderBy: right.orderBy,
118
+ limit: right.limit,
119
+ offset: right.offset,
120
+ positionStart: left.positionStart,
121
+ positionEnd: right.positionEnd,
126
122
  }
127
- }
128
-
129
- return { ctes }
130
- }
131
123
 
132
- /**
133
- * @param {ParserState} state
134
- * @returns {SelectColumn}
135
- */
136
- function parseSelectItem(state) {
137
- const expr = parseExpression(state)
138
- const alias = parseAs(state)
139
- return { kind: 'derived', expr, alias }
140
- }
141
-
142
- /**
143
- * Parses an optional table alias (e.g., "FROM users u" or "FROM users AS u")
144
- * @param {ParserState} state
145
- * @returns {string | undefined}
146
- */
147
- export function parseTableAlias(state) {
148
- // Check for explicit AS keyword
149
- if (match(state, 'keyword', 'AS')) {
150
- const aliasTok = expectIdentifier(state)
151
- return aliasTok.value
152
- }
153
- // Check for implicit alias (identifier not in reserved list)
154
- const maybeAlias = current(state)
155
- if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_TABLE.has(maybeAlias.value.toUpperCase())) {
156
- consume(state)
157
- return maybeAlias.value
124
+ // Clear lifted clauses from the right SELECT
125
+ right.orderBy = []
126
+ right.limit = undefined
127
+ right.offset = undefined
158
128
  }
159
129
  }
160
130
 
161
131
  /**
132
+ * Parses a left-associative INTERSECT chain, which binds tighter than UNION/EXCEPT.
133
+ *
162
134
  * @param {ParserState} state
163
- * @returns {string | undefined}
135
+ * @returns {SelectStatement | SetOperationStatement}
164
136
  */
165
- function parseAs(state) {
166
- if (match(state, 'keyword', 'AS')) {
167
- // After AS, allow keywords as aliases (except reserved ones)
168
- const aliasTok = current(state)
169
- if (aliasTok.type === 'identifier') {
170
- consume(state)
171
- return aliasTok.value
172
- } else if (aliasTok.type === 'keyword' && !RESERVED_AFTER_COLUMN.has(aliasTok.value.toUpperCase())) {
173
- consume(state)
174
- // Use original case for keywords used as aliases
175
- return aliasTok.originalValue ?? aliasTok.value
176
- } else {
177
- throw parseError(state, 'alias')
178
- }
179
- } else {
180
- // Implicit alias SELECT UPPER(name) name_upper
181
- const maybeAlias = current(state)
182
- if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_COLUMN.has(maybeAlias.value.toUpperCase())) {
183
- consume(state)
184
- return maybeAlias.value
137
+ function parseIntersectOperations(state) {
138
+ /** @type {SelectStatement | SetOperationStatement} */
139
+ let left = parseSelect(state)
140
+
141
+ while (match(state, 'keyword', 'INTERSECT')) {
142
+ const all = !!match(state, 'keyword', 'ALL')
143
+ const right = parseSelect(state)
144
+
145
+ left = {
146
+ type: 'compound',
147
+ operator: 'INTERSECT',
148
+ all,
149
+ left,
150
+ right,
151
+ orderBy: right.orderBy,
152
+ limit: right.limit,
153
+ offset: right.offset,
154
+ positionStart: left.positionStart,
155
+ positionEnd: right.positionEnd,
185
156
  }
157
+
158
+ right.orderBy = []
159
+ right.limit = undefined
160
+ right.offset = undefined
186
161
  }
187
- }
188
162
 
189
- /**
190
- * Parses a subquery in parentheses with an alias
191
- * @param {ParserState} state
192
- * @returns {FromSubquery}
193
- */
194
- function parseFromSubquery(state) {
195
- expect(state, 'paren', '(')
196
- const query = parseSelectInternal(state)
197
- expect(state, 'paren', ')')
198
- const alias = parseTableAlias(state)
199
- return { kind: 'subquery', query, alias }
163
+ return left
200
164
  }
201
165
 
202
166
  /**
203
167
  * @param {ParserState} state
204
168
  * @returns {SelectStatement}
205
169
  */
206
- export function parseSelectInternal(state) {
170
+ function parseSelect(state) {
171
+ const { positionStart } = current(state)
207
172
  expect(state, 'keyword', 'SELECT')
208
173
 
209
- let distinct = false
210
- if (match(state, 'keyword', 'DISTINCT')) {
211
- distinct = true
212
- }
174
+ const distinct = match(state, 'keyword', 'DISTINCT')
213
175
 
214
176
  const columns = parseSelectList(state)
215
177
 
@@ -221,12 +183,28 @@ export function parseSelectInternal(state) {
221
183
  const tok = current(state)
222
184
  if (tok.type === 'paren' && tok.value === '(') {
223
185
  // Subquery: SELECT * FROM (SELECT ...) AS alias
224
- from = parseFromSubquery(state)
186
+ expect(state, 'paren', '(')
187
+ const query = parseStatement(state)
188
+ expect(state, 'paren', ')')
189
+ const alias = parseTableAlias(state)
190
+ from = {
191
+ type: 'subquery',
192
+ query,
193
+ alias,
194
+ positionStart: tok.positionStart,
195
+ positionEnd: state.lastPos,
196
+ }
225
197
  } else {
226
198
  // Simple table name: SELECT * FROM users
227
- const tableTok = expectIdentifier(state)
199
+ expect(state, 'identifier')
228
200
  const alias = parseTableAlias(state)
229
- from = { kind: 'table', table: tableTok.value, alias, positionStart: tableTok.positionStart, positionEnd: tableTok.positionEnd }
201
+ from = {
202
+ type: 'table',
203
+ table: tok.value,
204
+ alias,
205
+ positionStart: tok.positionStart,
206
+ positionEnd: state.lastPos,
207
+ }
230
208
  }
231
209
 
232
210
  // Parse JOIN clauses
@@ -265,7 +243,7 @@ export function parseSelectInternal(state) {
265
243
  }
266
244
 
267
245
  const hasAggregate = groupBy.length > 0 || columns.some(col =>
268
- col.kind === 'derived' && findAggregate(col.expr)
246
+ col.type === 'derived' && findAggregate(col.expr)
269
247
  )
270
248
 
271
249
  if (match(state, 'keyword', 'ORDER')) {
@@ -285,12 +263,11 @@ export function parseSelectInternal(state) {
285
263
  /** @type {'FIRST' | 'LAST' | undefined} */
286
264
  let nulls
287
265
  if (match(state, 'keyword', 'NULLS')) {
288
- const tok = current(state)
289
- if (tok.type === 'identifier' && tok.value.toUpperCase() === 'FIRST') {
290
- consume(state)
266
+ const tok = consume(state)
267
+ const upper = tok.value.toUpperCase()
268
+ if (tok.type === 'identifier' && upper === 'FIRST') {
291
269
  nulls = 'FIRST'
292
- } else if (tok.type === 'identifier' && tok.value.toUpperCase() === 'LAST') {
293
- consume(state)
270
+ } else if (tok.type === 'identifier' && upper === 'LAST') {
294
271
  nulls = 'LAST'
295
272
  } else {
296
273
  throw parseError(state, 'FIRST or LAST after NULLS')
@@ -306,57 +283,32 @@ export function parseSelectInternal(state) {
306
283
  }
307
284
 
308
285
  if (match(state, 'keyword', 'LIMIT')) {
309
- const tok = current(state)
310
- if (tok.type !== 'number') {
311
- throw parseError(state, 'numeric LIMIT')
312
- }
313
- consume(state)
314
- const n = parseInt(tok.value, 10)
315
- if (!Number.isFinite(n)) {
316
- throw parseError(state, 'valid LIMIT value')
286
+ const tok = consume(state)
287
+ if (tok.type !== 'number' || typeof tok.numericValue !== 'number') {
288
+ throw parseError(state, 'positive integer LIMIT')
317
289
  }
318
- if (n < 0) {
319
- throw parseError(state, 'non-negative LIMIT value')
290
+ if (!Number.isInteger(tok.numericValue) || tok.numericValue < 0) {
291
+ throw parseError(state, 'positive integer LIMIT value')
320
292
  }
321
- limit = n
293
+ limit = tok.numericValue
294
+ }
322
295
 
323
- if (match(state, 'keyword', 'OFFSET')) {
324
- const oTok = current(state)
325
- if (oTok.type !== 'number') {
326
- throw parseError(state, 'numeric OFFSET')
327
- }
328
- consume(state)
329
- const off = parseInt(oTok.value, 10)
330
- if (!Number.isFinite(off)) {
331
- throw parseError(state, 'valid OFFSET value')
332
- }
333
- if (off < 0) {
334
- throw parseError(state, 'non-negative OFFSET value')
335
- }
336
- offset = off
296
+ if (match(state, 'keyword', 'OFFSET')) {
297
+ const tok = consume(state)
298
+ if (tok.type !== 'number' || typeof tok.numericValue !== 'number') {
299
+ throw parseError(state, 'positive integer OFFSET value')
337
300
  }
338
- } else if (match(state, 'keyword', 'OFFSET')) {
339
- const oTok = current(state)
340
- if (oTok.type !== 'number') {
341
- throw parseError(state, 'numeric OFFSET')
301
+ if (!Number.isInteger(tok.numericValue) || tok.numericValue < 0) {
302
+ throw parseError(state, 'positive integer OFFSET value')
342
303
  }
343
- consume(state)
344
- const off = parseInt(oTok.value, 10)
345
- if (!Number.isFinite(off)) {
346
- throw parseError(state, 'valid OFFSET value')
347
- }
348
- if (off < 0) {
349
- throw parseError(state, 'non-negative OFFSET value')
350
- }
351
- offset = off
304
+ offset = tok.numericValue
352
305
  }
353
306
 
354
307
  // optional trailing semicolon
355
- if (current(state).type === 'semicolon') {
356
- consume(state)
357
- }
308
+ match(state, 'semicolon')
358
309
 
359
310
  return {
311
+ type: 'select',
360
312
  distinct,
361
313
  columns,
362
314
  from,
@@ -367,5 +319,97 @@ export function parseSelectInternal(state) {
367
319
  orderBy,
368
320
  limit,
369
321
  offset,
322
+ positionStart,
323
+ positionEnd: state.lastPos,
324
+ }
325
+ }
326
+
327
+ /**
328
+ * @param {ParserState} state
329
+ * @returns {SelectColumn[]}
330
+ */
331
+ function parseSelectList(state) {
332
+ /** @type {SelectColumn[]} */
333
+ const cols = []
334
+
335
+ while (true) {
336
+ const tok = current(state)
337
+
338
+ // Check for qualified asterisk (table.*)
339
+ if (tok.type === 'identifier') {
340
+ const next = peekToken(state, 1)
341
+ const nextNext = peekToken(state, 2)
342
+ if (next.type === 'dot' && nextNext.type === 'operator' && nextNext.value === '*') {
343
+ const table = consume(state).value
344
+ consume(state) // consume dot
345
+ consume(state) // consume asterisk
346
+ cols.push({ type: 'star', table })
347
+ if (!match(state, 'comma')) break
348
+ continue
349
+ }
350
+ }
351
+
352
+ // Check for unqualified asterisk (*)
353
+ if (match(state, 'operator', '*')) {
354
+ cols.push({ type: 'star' })
355
+ if (!match(state, 'comma')) break
356
+ continue
357
+ }
358
+
359
+ // Parse derived column with optional alias
360
+ const expr = parseExpression(state)
361
+ const alias = parseAs(state)
362
+ cols.push({ type: 'derived', expr, alias })
363
+
364
+ if (!match(state, 'comma')) break
365
+ }
366
+
367
+ return cols
368
+ }
369
+
370
+ /**
371
+ * Parses an optional table alias (e.g., "FROM users u" or "FROM users AS u")
372
+ * @param {ParserState} state
373
+ * @returns {string | undefined}
374
+ */
375
+ export function parseTableAlias(state) {
376
+ // Check for explicit AS keyword
377
+ if (match(state, 'keyword', 'AS')) {
378
+ const aliasTok = expect(state, 'identifier')
379
+ return aliasTok.value
380
+ }
381
+ // Check for implicit alias (identifier not in reserved list)
382
+ const maybeAlias = current(state)
383
+ if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_TABLE.has(maybeAlias.value.toUpperCase())) {
384
+ consume(state)
385
+ return maybeAlias.value
386
+ }
387
+ }
388
+
389
+ /**
390
+ * @param {ParserState} state
391
+ * @returns {string | undefined}
392
+ */
393
+ function parseAs(state) {
394
+ if (match(state, 'keyword', 'AS')) {
395
+ // After AS, allow keywords as aliases (except reserved ones)
396
+ const aliasTok = current(state)
397
+ if (aliasTok.type === 'identifier') {
398
+ consume(state)
399
+ return aliasTok.value
400
+ } else if (aliasTok.type === 'keyword' && !RESERVED_AFTER_COLUMN.has(aliasTok.value)) {
401
+ consume(state)
402
+ // Use original case for keywords used as aliases
403
+ return aliasTok.originalValue ?? aliasTok.value
404
+ } else {
405
+ throw parseError(state, 'alias')
406
+ }
407
+ } else {
408
+ // Implicit alias SELECT UPPER(name) name_upper
409
+ const maybeAlias = current(state)
410
+ if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_COLUMN.has(maybeAlias.value.toUpperCase())) {
411
+ consume(state)
412
+ return maybeAlias.value
413
+ }
370
414
  }
371
415
  }