squirreling 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,166 +1,90 @@
1
- import { invalidLiteralError, unexpectedCharError, unterminatedError } from '../validation/parseErrors.js'
1
+ import { KEYWORDS } from '../validation/keywords.js'
2
+ import { InvalidLiteralError, ParseError, UnexpectedCharError } from '../validation/parseErrors.js'
2
3
 
3
4
  /**
4
5
  * @import { Token } from '../types.d.ts'
5
6
  */
6
7
 
7
- const KEYWORDS = new Set([
8
- 'WITH',
9
- 'SELECT',
10
- 'FROM',
11
- 'WHERE',
12
- 'AND',
13
- 'OR',
14
- 'NOT',
15
- 'IS',
16
- 'GROUP',
17
- 'BY',
18
- 'HAVING',
19
- 'ORDER',
20
- 'ASC',
21
- 'DESC',
22
- 'NULLS',
23
- 'LIMIT',
24
- 'OFFSET',
25
- 'AS',
26
- 'ALL',
27
- 'DISTINCT',
28
- 'TRUE',
29
- 'FALSE',
30
- 'NULL',
31
- 'LIKE',
32
- 'IN',
33
- 'EXISTS',
34
- 'BETWEEN',
35
- 'CASE',
36
- 'WHEN',
37
- 'THEN',
38
- 'ELSE',
39
- 'END',
40
- 'JOIN',
41
- 'INNER',
42
- 'LEFT',
43
- 'RIGHT',
44
- 'FULL',
45
- 'OUTER',
46
- 'POSITIONAL',
47
- 'ON',
48
- 'INTERVAL',
49
- 'DAY',
50
- 'MONTH',
51
- 'YEAR',
52
- 'HOUR',
53
- 'MINUTE',
54
- 'SECOND',
55
- 'FILTER',
56
- ])
8
+ const NUMBER_REGEX = /^-?(?:\d+n|\d+(?:\.\d*)?(?:[eE][+-]?\d+)?)/
57
9
 
58
10
  /**
59
- * @param {string} sql
11
+ * @param {string} query
60
12
  * @returns {Token[]}
61
13
  */
62
- export function tokenizeSql(sql) {
14
+ export function tokenizeSql(query) {
63
15
  /** @type {Token[]} */
64
16
  const tokens = []
65
- const { length } = sql
66
- let i = 0
17
+ const len = query.length
18
+ let i = 0 // current position in query string
67
19
 
68
20
  /**
69
21
  * @returns {string}
70
22
  */
71
23
  function peek() {
72
- if (i >= length) return ''
73
- return sql[i]
24
+ return query[i]
74
25
  }
75
26
 
76
27
  /**
77
28
  * @returns {string}
78
29
  */
79
30
  function nextChar() {
80
- if (i >= length) return ''
81
- const ch = sql[i]
82
- i++
83
- return ch
31
+ return query[i++]
84
32
  }
85
33
 
86
34
  /**
87
- * @param {number} startPos
88
- * @param {string} prefix
35
+ * @param {number} positionStart
89
36
  * @returns {Token}
90
37
  */
91
- function parseNumber(startPos, prefix = '') {
92
- let text = prefix
93
- while (isDigit(peek())) {
94
- text += nextChar()
38
+ function parseNumber(positionStart) {
39
+ const value = query.slice(i).match(NUMBER_REGEX)?.[0]
40
+ if (!value) {
41
+ throw new InvalidLiteralError({ expected: 'number', value: query[i] || 'eof', positionStart, positionEnd: i + 1 })
95
42
  }
96
- if (peek() === '.') {
97
- text += nextChar()
98
- while (isDigit(peek())) {
99
- text += nextChar()
100
- }
43
+ i += value.length
44
+ const next = peek()
45
+ if (isAlpha(next) || next === '.') {
46
+ throw new InvalidLiteralError({ expected: 'number', value: value + next, positionStart, positionEnd: i + 1 })
101
47
  }
102
- // exponent
103
- if (peek() === 'e' || peek() === 'E') {
104
- text += nextChar()
105
- if (peek() === '+' || peek() === '-') {
106
- text += nextChar()
107
- }
108
- while (isDigit(peek())) {
109
- text += nextChar()
110
- }
111
- }
112
- // bigint suffix
113
- if (peek() === 'n') {
114
- text += nextChar()
115
- try {
116
- return {
117
- type: 'number',
118
- value: text,
119
- positionStart: startPos,
120
- positionEnd: i,
121
- numericValue: BigInt(text.slice(0, -1)),
122
- }
123
- } catch {
124
- throw invalidLiteralError({ type: 'bigint', value: text.slice(0, -1), positionStart: startPos, positionEnd: i })
48
+ if (value.endsWith('n')) {
49
+ return {
50
+ type: 'number',
51
+ value,
52
+ positionStart,
53
+ positionEnd: i,
54
+ numericValue: BigInt(value.slice(0, -1)),
125
55
  }
126
56
  }
127
- if (isAlpha(peek())) {
128
- throw invalidLiteralError({ type: 'number', value: text + peek(), positionStart: startPos, positionEnd: i + 1 })
129
- }
130
- const num = parseFloat(text)
131
- if (isNaN(num)) {
132
- throw invalidLiteralError({ type: 'number', value: text, positionStart: startPos, positionEnd: i })
133
- }
134
57
  return {
135
58
  type: 'number',
136
- value: text,
137
- positionStart: startPos,
59
+ value,
60
+ positionStart,
138
61
  positionEnd: i,
139
- numericValue: num,
62
+ numericValue: Number(value),
140
63
  }
141
64
  }
142
65
 
143
- while (i < length) {
66
+ while (i < len) {
67
+ const positionStart = i
144
68
  const ch = peek()
145
69
 
146
70
  if (isWhitespace(ch)) {
147
- nextChar()
71
+ i++
148
72
  continue
149
73
  }
150
74
 
151
75
  // line comment --
152
- if (ch === '-' && i + 1 < length && sql[i + 1] === '-') {
153
- while (i < length && sql[i] !== '\n') {
76
+ if (ch === '-' && query[i + 1] === '-') {
77
+ while (i < len && query[i] !== '\n') {
154
78
  i++
155
79
  }
156
80
  continue
157
81
  }
158
82
 
159
83
  // block comment /* ... */
160
- if (ch === '/' && i + 1 < length && sql[i + 1] === '*') {
84
+ if (ch === '/' && query[i + 1] === '*') {
161
85
  i += 2
162
- while (i < length) {
163
- if (sql[i] === '*' && i + 1 < length && sql[i + 1] === '/') {
86
+ while (i < len) {
87
+ if (query[i] === '*' && query[i + 1] === '/') {
164
88
  i += 2
165
89
  break
166
90
  }
@@ -169,10 +93,8 @@ export function tokenizeSql(sql) {
169
93
  continue
170
94
  }
171
95
 
172
- const pos = i
173
-
174
96
  // negative numbers (when not subtraction)
175
- if (ch === '-' && i + 1 < length && isDigit(sql[i + 1])) {
97
+ if (ch === '-' && isDigit(query[i + 1])) {
176
98
  const lastToken = tokens[tokens.length - 1]
177
99
  const isValueBefore = lastToken && (
178
100
  lastToken.type === 'identifier' ||
@@ -181,104 +103,75 @@ export function tokenizeSql(sql) {
181
103
  lastToken.type === 'paren' && lastToken.value === ')'
182
104
  )
183
105
  if (!isValueBefore) {
184
- nextChar() // consume '-'
185
- tokens.push(parseNumber(pos, '-'))
106
+ tokens.push(parseNumber(positionStart))
186
107
  continue
187
108
  }
188
109
  }
189
110
 
190
111
  // numbers
191
112
  if (isDigit(ch)) {
192
- tokens.push(parseNumber(pos))
113
+ tokens.push(parseNumber(positionStart))
193
114
  continue
194
115
  }
195
116
 
196
117
  // identifiers / keywords
197
118
  if (isAlpha(ch)) {
198
- let text = ''
119
+ let value = ''
199
120
  while (isAlphaNumeric(peek())) {
200
- text += nextChar()
121
+ value += nextChar()
201
122
  }
202
- const upper = text.toUpperCase()
123
+ const upper = value.toUpperCase()
203
124
  if (KEYWORDS.has(upper)) {
204
125
  tokens.push({
205
126
  type: 'keyword',
206
- value: upper,
207
- originalValue: text,
208
- positionStart: pos,
127
+ value: upper, // uppercase for keywords
128
+ originalValue: value, // preserve user casing
129
+ positionStart,
209
130
  positionEnd: i,
210
131
  })
211
132
  } else {
212
133
  tokens.push({
213
134
  type: 'identifier',
214
- value: text,
215
- positionStart: pos,
135
+ value,
136
+ positionStart,
216
137
  positionEnd: i,
217
138
  })
218
139
  }
219
140
  continue
220
141
  }
221
142
 
222
- // string literals: single quotes
223
- if (ch === '\'') {
224
- const quote = nextChar()
225
- let text = ''
226
- while (i <= length) {
227
- if (i === length) {
228
- throw unterminatedError({ type: 'string', positionStart: pos, positionEnd: length })
229
- }
230
- const c = nextChar()
231
- if (c === quote) {
232
- // check for escaped quote
233
- if (peek() === quote) {
234
- text += quote
235
- nextChar()
236
- continue
237
- }
238
- break
239
- }
240
- text += c
241
- }
242
- tokens.push({
243
- type: 'string',
244
- value: text,
245
- positionStart: pos,
246
- positionEnd: i,
247
- })
248
- continue
249
- }
250
-
251
- // quoted identifiers: double quotes
252
- if (ch === '"') {
143
+ // string literals (single quotes) and quoted identifiers (double quotes)
144
+ if (ch === '\'' || ch === '"') {
145
+ const type = ch === '\'' ? 'string' : 'identifier'
253
146
  const quote = nextChar()
254
- let text = ''
255
- while (i <= length) {
256
- if (i === length) {
257
- throw unterminatedError({ type: 'identifier', positionStart: pos, positionEnd: length })
147
+ let value = ''
148
+ while (i <= len) {
149
+ if (i === len) {
150
+ throw new ParseError({
151
+ message: `Unterminated ${type} starting at position ${positionStart}`,
152
+ positionStart,
153
+ positionEnd: i,
154
+ })
258
155
  }
259
156
  const c = nextChar()
260
157
  if (c === quote) {
261
158
  // check for escaped quote
262
159
  if (peek() === quote) {
263
- text += quote
264
- nextChar()
160
+ value += quote
161
+ i++
265
162
  continue
266
163
  }
164
+ // end quote
267
165
  break
268
166
  }
269
- text += c
167
+ value += c
270
168
  }
271
- tokens.push({
272
- type: 'identifier',
273
- value: text,
274
- positionStart: pos,
275
- positionEnd: i,
276
- })
169
+ tokens.push({ type, value, positionStart, positionEnd: i })
277
170
  continue
278
171
  }
279
172
 
280
- // two-character operators
281
- if (ch === '<' || ch === '>' || ch === '!' || ch === '=') {
173
+ // operators
174
+ if ('<>!=+-*/%'.includes(ch)) {
282
175
  let op = nextChar()
283
176
  if ((op === '<' || op === '>' || op === '!') && peek() === '=') {
284
177
  op += nextChar()
@@ -288,76 +181,64 @@ export function tokenizeSql(sql) {
288
181
  tokens.push({
289
182
  type: 'operator',
290
183
  value: op,
291
- positionStart: pos,
292
- positionEnd: i,
293
- })
294
- continue
295
- }
296
-
297
- // single-char operators
298
- if (ch === '*' || ch === '+' || ch === '-' || ch === '/' || ch === '%') {
299
- nextChar()
300
- tokens.push({
301
- type: 'operator',
302
- value: ch,
303
- positionStart: pos,
184
+ positionStart,
304
185
  positionEnd: i,
305
186
  })
306
187
  continue
307
188
  }
308
189
 
309
190
  if (ch === ',') {
310
- nextChar()
191
+ i++
311
192
  tokens.push({
312
193
  type: 'comma',
313
194
  value: ',',
314
- positionStart: pos,
195
+ positionStart,
315
196
  positionEnd: i,
316
197
  })
317
198
  continue
318
199
  }
319
200
 
320
201
  if (ch === '.') {
321
- nextChar()
202
+ i++
322
203
  tokens.push({
323
204
  type: 'dot',
324
205
  value: '.',
325
- positionStart: pos,
206
+ positionStart,
326
207
  positionEnd: i,
327
208
  })
328
209
  continue
329
210
  }
330
211
 
331
212
  if (ch === '(' || ch === ')') {
332
- nextChar()
213
+ i++
333
214
  tokens.push({
334
215
  type: 'paren',
335
216
  value: ch,
336
- positionStart: pos,
217
+ positionStart,
337
218
  positionEnd: i,
338
219
  })
339
220
  continue
340
221
  }
341
222
 
342
223
  if (ch === ';') {
343
- nextChar()
224
+ i++
344
225
  tokens.push({
345
226
  type: 'semicolon',
346
227
  value: ';',
347
- positionStart: pos,
228
+ positionStart,
348
229
  positionEnd: i,
349
230
  })
350
231
  continue
351
232
  }
352
233
 
353
- throw unexpectedCharError({ char: ch, positionStart: pos, expectsSelect: !tokens.length })
234
+ throw new UnexpectedCharError({ char: ch, positionStart, expectsSelect: !tokens.length })
354
235
  }
355
236
 
356
237
  tokens.push({
357
238
  type: 'eof',
358
239
  value: '',
359
- positionStart: length,
360
- positionEnd: length,
240
+ positionStart: len,
241
+ positionEnd: len,
361
242
  })
362
243
 
363
244
  return tokens
@@ -1,35 +1,48 @@
1
+ import { derivedAlias } from '../expression/alias.js'
2
+
3
+ /**
4
+ * @import { AsyncDataSource, ExprNode, FromSubquery, FromTable, SelectStatement, Statement } from '../types.js'
5
+ */
6
+
1
7
  /**
2
- * @import { ExprNode, SelectStatement } from '../types.js'
8
+ * @param {FromTable | FromSubquery} from
9
+ * @returns {string}
3
10
  */
11
+ export function fromAlias(from) {
12
+ return from.alias ?? (from.type === 'table' ? from.table : 'table')
13
+ }
4
14
 
5
15
  /**
6
16
  * Extracts per-table column names needed from a SELECT statement with joins.
7
17
  * Returns a Map from table alias to column names, or undefined if all columns needed.
8
18
  *
9
- * @param {SelectStatement} select
19
+ * @param {object} options
20
+ * @param {SelectStatement} options.select
21
+ * @param {string[]} [options.parentColumns] - columns needed by the parent query
10
22
  * @returns {Map<string, string[] | undefined>}
11
23
  */
12
- export function extractColumns(select) {
24
+ export function extractColumns({ select, parentColumns }) {
13
25
  /** @type {Map<string, string[] | undefined>} */
14
26
  const result = new Map()
15
27
 
16
28
  // Build alias list from FROM + JOINs
17
- const fromAlias = select.from.kind === 'table'
18
- ? select.from.alias ?? select.from.table
19
- : select.from.alias
20
- const aliases = [fromAlias]
29
+ const aliases = [fromAlias(select.from)]
21
30
  for (const join of select.joins) {
22
31
  aliases.push(join.alias ?? join.table)
23
32
  }
24
33
 
25
34
  // If any unqualified SELECT * exists, all tables need all columns
26
- if (select.columns.some(col => col.kind === 'star' && !col.table)) {
27
- /** @type {Map<string, string[] | undefined>} */
28
- const result = new Map()
29
- for (const alias of aliases) {
30
- result.set(alias, undefined)
35
+ if (select.columns.some(col => col.type === 'star' && !col.table)) {
36
+ if (!parentColumns) {
37
+ /** @type {Map<string, string[] | undefined>} */
38
+ const result = new Map()
39
+ for (const alias of aliases) {
40
+ result.set(alias, undefined)
41
+ }
42
+ return result
31
43
  }
32
- return result
44
+ // With parentColumns, fall through to collect internal clause columns
45
+ // and seed with what the parent needs
33
46
  }
34
47
 
35
48
  // Track per-table columns needed; undefined means all columns (table.*)
@@ -37,8 +50,11 @@ export function extractColumns(select) {
37
50
  const perTable = new Map(aliases.map(alias => [alias, new Set()]))
38
51
 
39
52
  // Collect all identifiers from all clauses
40
- /** @type {Set<string>} */
41
- const identifiers = new Set()
53
+ // For SELECT *, parent column names are real table columns, so seed them
54
+ // directly. For non-star queries, parent names may be aliases and are
55
+ // handled below by filtering derived columns and collecting from expressions.
56
+ const hasStar = select.columns.some(col => col.type === 'star' && !col.table)
57
+ const identifiers = new Set(hasStar ? parentColumns : undefined)
42
58
 
43
59
  // Collect ORDER BY identifiers, excluding SELECT aliases (their underlying
44
60
  // columns are already collected from select.columns expressions above)
@@ -46,10 +62,15 @@ export function extractColumns(select) {
46
62
  const selectAliases = new Set()
47
63
 
48
64
  for (const col of select.columns) {
49
- if (col.kind === 'star' && col.table) {
65
+ if (col.type === 'star' && col.table) {
50
66
  // SELECT table.* means all columns needed
51
67
  perTable.set(col.table, undefined)
52
- } else if (col.kind === 'derived') {
68
+ } else if (col.type === 'derived') {
69
+ // When parentColumns is set, skip columns the parent doesn't need
70
+ if (parentColumns) {
71
+ const outputName = col.alias ?? derivedAlias(col.expr)
72
+ if (!parentColumns.includes(outputName)) continue
73
+ }
53
74
  collectColumnsFromExpr(col.expr, identifiers)
54
75
  if (col.alias) {
55
76
  selectAliases.add(col.alias)
@@ -145,3 +166,80 @@ function collectColumnsFromExpr(expr, columns, aliases) {
145
166
  }
146
167
  // No columns: count(*), literal, interval, exists, not exists, subquery
147
168
  }
169
+
170
+ /**
171
+ * Infers output columns for set-operation validation.
172
+ *
173
+ * @param {object} options
174
+ * @param {Statement} options.stmt
175
+ * @param {Map<string, string[]>} [options.cteColumns]
176
+ * @param {Record<string, AsyncDataSource>} [options.tables]
177
+ * @returns {string[]}
178
+ */
179
+ export function inferStatementColumns({ stmt, cteColumns, tables }) {
180
+ if (stmt.type === 'with') {
181
+ return inferStatementColumns({ stmt: stmt.query, cteColumns, tables })
182
+ }
183
+ if (stmt.type === 'compound') {
184
+ return inferStatementColumns({ stmt: stmt.left, cteColumns, tables })
185
+ }
186
+
187
+ const sourceColumns = inferSelectSourceColumns({ select: stmt, cteColumns, tables })
188
+ /** @type {string[]} */
189
+ const result = []
190
+
191
+ for (const col of stmt.columns) {
192
+ if (col.type === 'star') {
193
+ result.push(...sourceColumns)
194
+ } else {
195
+ result.push(col.alias ?? derivedAlias(col.expr))
196
+ }
197
+ }
198
+
199
+ return result
200
+ }
201
+
202
+ /**
203
+ * Infers the source columns available before SELECT projection.
204
+ * Mirrors the column ordering used by join row materialization.
205
+ *
206
+ * @param {object} options
207
+ * @param {SelectStatement} options.select
208
+ * @param {Map<string, string[]>} [options.cteColumns]
209
+ * @param {Record<string, AsyncDataSource>} [options.tables]
210
+ * @returns {string[]}
211
+ */
212
+ function inferSelectSourceColumns({ select, cteColumns, tables }) {
213
+ if (select.from.type === 'subquery') {
214
+ return inferStatementColumns({ stmt: select.from.query, cteColumns, tables })
215
+ }
216
+
217
+ if (!select.joins.length) {
218
+ return lookupTableColumns(select.from.table, cteColumns, tables)
219
+ }
220
+
221
+ // Collect all sources, then prefix each table's columns
222
+ /** @type {string[]} */
223
+ const result = []
224
+ const fromAlias = select.from.alias ?? select.from.table
225
+ for (const col of lookupTableColumns(select.from.table, cteColumns, tables)) {
226
+ result.push(`${fromAlias}.${col}`, col)
227
+ }
228
+ for (const join of select.joins) {
229
+ const alias = join.alias ?? join.table
230
+ for (const col of lookupTableColumns(join.table, cteColumns, tables)) {
231
+ result.push(`${alias}.${col}`, col)
232
+ }
233
+ }
234
+ return result
235
+ }
236
+
237
+ /**
238
+ * @param {string} table
239
+ * @param {Map<string, string[]>} [cteColumns]
240
+ * @param {Record<string, AsyncDataSource>} [tables]
241
+ * @returns {string[]}
242
+ */
243
+ function lookupTableColumns(table, cteColumns, tables) {
244
+ return cteColumns?.get(table.toLowerCase()) ?? tables?.[table]?.columns ?? []
245
+ }