squirreling 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,68 @@
1
- import { isAggregateFunc, isStringFunc } from '../validation.js'
1
+ import {
2
+ invalidLiteralError,
3
+ missingClauseError,
4
+ syntaxError,
5
+ unknownFunctionError,
6
+ } from '../parseErrors.js'
7
+ import { isAggregateFunc, isIntervalUnit, isMathFunc, isStringFunc } from '../validation.js'
2
8
  import { parseComparison } from './comparison.js'
3
9
  import { parseSelectInternal } from './parse.js'
4
- import { consume, current, expect, expectIdentifier, match, peekToken } from './state.js'
10
+ import { consume, current, expect, expectIdentifier, lastPosition, match, peekToken } from './state.js'
5
11
 
6
12
  /**
7
- * @import { ExprNode, ParserState, SelectStatement, WhenClause } from '../types.js'
13
+ * @import { ExprNode, IntervalNode, ParserState, SelectStatement, WhenClause } from '../types.js'
8
14
  */
9
15
 
16
+ /**
17
+ * @param {ParserState} state
18
+ * @returns {IntervalNode}
19
+ */
20
+ function parseInterval(state) {
21
+ const { positionStart } = current(state)
22
+ consume(state) // INTERVAL
23
+
24
+ // Handle optional negative sign
25
+ let sign = 1
26
+ const signTok = current(state)
27
+ if (signTok.type === 'operator' && signTok.value === '-') {
28
+ consume(state)
29
+ sign = -1
30
+ }
31
+
32
+ // Get value (number or quoted string)
33
+ const valueTok = current(state)
34
+ /** @type {number} */
35
+ let value
36
+ if (valueTok.type === 'number') {
37
+ consume(state)
38
+ value = sign * Number(valueTok.numericValue)
39
+ } else if (valueTok.type === 'string') {
40
+ consume(state)
41
+ const parsed = parseFloat(valueTok.value)
42
+ if (isNaN(parsed)) {
43
+ throw invalidLiteralError({ type: 'interval value', value: valueTok.value, positionStart: valueTok.positionStart, positionEnd: valueTok.positionEnd })
44
+ }
45
+ value = sign * parsed
46
+ } else {
47
+ throw syntaxError({ expected: 'interval value (number)', received: `"${valueTok.value}"`, positionStart: valueTok.positionStart, positionEnd: valueTok.positionEnd })
48
+ }
49
+
50
+ // Get unit keyword
51
+ const unitTok = current(state)
52
+ if (unitTok.type !== 'keyword' || !isIntervalUnit(unitTok.value)) {
53
+ throw invalidLiteralError({
54
+ type: 'interval unit',
55
+ value: unitTok.value,
56
+ positionStart: unitTok.positionStart,
57
+ positionEnd: unitTok.positionEnd,
58
+ validValues: 'DAY, MONTH, YEAR, HOUR, MINUTE, SECOND',
59
+ })
60
+ }
61
+ consume(state)
62
+
63
+ return { type: 'interval', value, unit: unitTok.value, positionStart, positionEnd: lastPosition(state) }
64
+ }
65
+
10
66
  /**
11
67
  * @param {ParserState} state
12
68
  * @returns {ExprNode}
@@ -21,6 +77,7 @@ export function parseExpression(state) {
21
77
  */
22
78
  export function parsePrimary(state) {
23
79
  const tok = current(state)
80
+ const { positionStart } = tok
24
81
 
25
82
  if (tok.type === 'paren' && tok.value === '(') {
26
83
  // Peek ahead to see if this is a scalar subquery
@@ -31,6 +88,8 @@ export function parsePrimary(state) {
31
88
  return {
32
89
  type: 'subquery',
33
90
  subquery,
91
+ positionStart,
92
+ positionEnd: lastPosition(state),
34
93
  }
35
94
  }
36
95
  // Regular grouped expression
@@ -55,6 +114,8 @@ export function parsePrimary(state) {
55
114
  type: 'cast',
56
115
  expr,
57
116
  toType: typeTok.value,
117
+ positionStart,
118
+ positionEnd: lastPosition(state),
58
119
  }
59
120
  }
60
121
 
@@ -63,8 +124,8 @@ export function parsePrimary(state) {
63
124
  const funcName = tok.value
64
125
 
65
126
  // validate function names
66
- if (!isStringFunc(funcName) && !isAggregateFunc(funcName)) {
67
- throw new Error(`Unknown function "${funcName}" at position ${tok.position}`)
127
+ if (!isStringFunc(funcName) && !isAggregateFunc(funcName) && !isMathFunc(funcName)) {
128
+ throw unknownFunctionError({ funcName, positionStart: tok.positionStart, positionEnd: tok.positionEnd })
68
129
  }
69
130
 
70
131
  consume(state) // function name
@@ -77,10 +138,13 @@ export function parsePrimary(state) {
77
138
  while (true) {
78
139
  // Handle COUNT(*) - treat * as a special identifier
79
140
  if (current(state).type === 'operator' && current(state).value === '*') {
141
+ const starTok = current(state)
80
142
  consume(state)
81
143
  args.push({
82
144
  type: 'identifier',
83
145
  name: '*',
146
+ positionStart: starTok.positionStart,
147
+ positionEnd: lastPosition(state),
84
148
  })
85
149
  } else {
86
150
  const arg = parseExpression(state)
@@ -96,6 +160,21 @@ export function parsePrimary(state) {
96
160
  type: 'function',
97
161
  name: funcName,
98
162
  args,
163
+ positionStart,
164
+ positionEnd: lastPosition(state),
165
+ }
166
+ }
167
+
168
+ // Niladic datetime functions (no parentheses required per ANSI SQL)
169
+ const niladicFuncs = ['CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP']
170
+ if (niladicFuncs.includes(tok.value)) {
171
+ consume(state)
172
+ return {
173
+ type: 'function',
174
+ name: tok.value,
175
+ args: [],
176
+ positionStart,
177
+ positionEnd: lastPosition(state),
99
178
  }
100
179
  }
101
180
 
@@ -112,6 +191,8 @@ export function parsePrimary(state) {
112
191
  return {
113
192
  type: 'identifier',
114
193
  name,
194
+ positionStart,
195
+ positionEnd: lastPosition(state),
115
196
  }
116
197
  }
117
198
 
@@ -120,6 +201,8 @@ export function parsePrimary(state) {
120
201
  return {
121
202
  type: 'literal',
122
203
  value: tok.numericValue ?? null,
204
+ positionStart,
205
+ positionEnd: lastPosition(state),
123
206
  }
124
207
  }
125
208
 
@@ -128,21 +211,23 @@ export function parsePrimary(state) {
128
211
  return {
129
212
  type: 'literal',
130
213
  value: tok.value,
214
+ positionStart,
215
+ positionEnd: lastPosition(state),
131
216
  }
132
217
  }
133
218
 
134
219
  if (tok.type === 'keyword') {
135
220
  if (tok.value === 'TRUE') {
136
221
  consume(state)
137
- return { type: 'literal', value: true }
222
+ return { type: 'literal', value: true, positionStart, positionEnd: lastPosition(state) }
138
223
  }
139
224
  if (tok.value === 'FALSE') {
140
225
  consume(state)
141
- return { type: 'literal', value: false }
226
+ return { type: 'literal', value: false, positionStart, positionEnd: lastPosition(state) }
142
227
  }
143
228
  if (tok.value === 'NULL') {
144
229
  consume(state)
145
- return { type: 'literal', value: null }
230
+ return { type: 'literal', value: null, positionStart, positionEnd: lastPosition(state) }
146
231
  }
147
232
  if (tok.value === 'EXISTS') {
148
233
  consume(state) // EXISTS
@@ -150,6 +235,8 @@ export function parsePrimary(state) {
150
235
  return {
151
236
  type: 'exists',
152
237
  subquery,
238
+ positionStart,
239
+ positionEnd: lastPosition(state),
153
240
  }
154
241
  }
155
242
  if (tok.value === 'CASE') {
@@ -175,7 +262,10 @@ export function parsePrimary(state) {
175
262
  }
176
263
 
177
264
  if (whenClauses.length === 0) {
178
- throw new Error('CASE expression must have at least one WHEN clause')
265
+ throw missingClauseError({
266
+ missing: 'at least one WHEN clause',
267
+ context: 'CASE expression',
268
+ })
179
269
  }
180
270
 
181
271
  // Parse optional ELSE clause
@@ -192,8 +282,13 @@ export function parsePrimary(state) {
192
282
  caseExpr,
193
283
  whenClauses,
194
284
  elseResult,
285
+ positionStart,
286
+ positionEnd: lastPosition(state),
195
287
  }
196
288
  }
289
+ if (tok.value === 'INTERVAL') {
290
+ return parseInterval(state)
291
+ }
197
292
  }
198
293
 
199
294
  if (tok.type === 'operator' && tok.value === '-') {
@@ -203,11 +298,13 @@ export function parsePrimary(state) {
203
298
  type: 'unary',
204
299
  op: '-',
205
300
  argument,
301
+ positionStart,
302
+ positionEnd: argument.positionEnd,
206
303
  }
207
304
  }
208
305
 
209
306
  const found = tok.type === 'eof' ? 'end of query' : `"${tok.originalValue ?? tok.value}"`
210
- throw new Error(`Expected expression but found ${found} at position ${tok.position}`)
307
+ throw syntaxError({ expected: 'expression', received: found, positionStart: tok.positionStart, positionEnd: tok.positionEnd })
211
308
  }
212
309
 
213
310
  /**
@@ -223,6 +320,8 @@ function parseOr(state) {
223
320
  op: 'OR',
224
321
  left: node,
225
322
  right,
323
+ positionStart: node.positionStart,
324
+ positionEnd: right.positionEnd,
226
325
  }
227
326
  }
228
327
  return node
@@ -241,6 +340,8 @@ function parseAnd(state) {
241
340
  op: 'AND',
242
341
  left: node,
243
342
  right,
343
+ positionStart: node.positionStart,
344
+ positionEnd: right.positionEnd,
244
345
  }
245
346
  }
246
347
  return node
@@ -251,7 +352,9 @@ function parseAnd(state) {
251
352
  * @returns {ExprNode}
252
353
  */
253
354
  function parseNot(state) {
355
+ const tok = current(state)
254
356
  if (match(state, 'keyword', 'NOT')) {
357
+ const { positionStart } = tok
255
358
  // Check for NOT EXISTS
256
359
  const nextTok = current(state)
257
360
  if (nextTok.type === 'keyword' && nextTok.value === 'EXISTS') {
@@ -260,6 +363,8 @@ function parseNot(state) {
260
363
  return {
261
364
  type: 'not exists',
262
365
  subquery,
366
+ positionStart,
367
+ positionEnd: lastPosition(state),
263
368
  }
264
369
  }
265
370
  const argument = parseNot(state)
@@ -267,6 +372,8 @@ function parseNot(state) {
267
372
  type: 'unary',
268
373
  op: 'NOT',
269
374
  argument,
375
+ positionStart,
376
+ positionEnd: argument.positionEnd,
270
377
  }
271
378
  }
272
379
  return parseComparison(state)
@@ -288,6 +395,8 @@ export function parseAdditive(state) {
288
395
  op: tok.value,
289
396
  left: node,
290
397
  right,
398
+ positionStart: node.positionStart,
399
+ positionEnd: right.positionEnd,
291
400
  }
292
401
  } else {
293
402
  break
@@ -312,6 +421,8 @@ function parseMultiplicative(state) {
312
421
  op: tok.value,
313
422
  left: node,
314
423
  right,
424
+ positionStart: node.positionStart,
425
+ positionEnd: right.positionEnd,
315
426
  }
316
427
  } else {
317
428
  break
@@ -63,6 +63,11 @@ function parseSelectList(state) {
63
63
  return cols
64
64
  }
65
65
 
66
+ // Keywords that can start a valid expression in SELECT
67
+ const EXPRESSION_START_KEYWORDS = new Set([
68
+ 'CASE', 'TRUE', 'FALSE', 'NULL', 'EXISTS', 'NOT', 'INTERVAL',
69
+ ])
70
+
66
71
  /**
67
72
  * @param {ParserState} state
68
73
  * @returns {SelectColumn}
@@ -70,7 +75,7 @@ function parseSelectList(state) {
70
75
  function parseSelectItem(state) {
71
76
  const tok = current(state)
72
77
 
73
- if (tok.type === 'keyword' && tok.value !== 'CASE' || tok.type === 'eof') {
78
+ if (tok.type === 'keyword' && !EXPRESSION_START_KEYWORDS.has(tok.value) || tok.type === 'eof') {
74
79
  throw parseError(state, 'column name or expression')
75
80
  }
76
81
 
@@ -1,3 +1,5 @@
1
+ import { syntaxError } from '../parseErrors.js'
2
+
1
3
  /**
2
4
  * @import { ParserState, Token, TokenType } from '../types.js'
3
5
  */
@@ -29,12 +31,22 @@ export function peekToken(state, offset) {
29
31
  */
30
32
  export function consume(state) {
31
33
  const tok = current(state)
34
+ state.lastPos = tok.positionEnd
32
35
  if (state.pos < state.tokens.length - 1) {
33
36
  state.pos += 1
34
37
  }
35
38
  return tok
36
39
  }
37
40
 
41
+ /**
42
+ * Gets the position after the last consumed token.
43
+ * @param {ParserState} state
44
+ * @returns {number}
45
+ */
46
+ export function lastPosition(state) {
47
+ return state.lastPos ?? 0
48
+ }
49
+
38
50
  /**
39
51
  * @param {ParserState} state
40
52
  * @param {TokenType} type
@@ -81,12 +93,12 @@ export function expectIdentifier(state) {
81
93
  * Helper function to create consistent parser error messages.
82
94
  * @param {ParserState} state
83
95
  * @param {string} expected - Description of what was expected
84
- * @returns {Error}
96
+ * @returns {import('../parseErrors.js').ParseError}
85
97
  */
86
98
  export function parseError(state, expected) {
87
99
  const tok = current(state)
88
100
  const prevToken = state.tokens[state.pos - 1]
89
- const after = prevToken ? ` after "${prevToken.originalValue ?? prevToken.value}"` : ''
90
- const found = tok.type === 'eof' ? 'end of query' : `"${tok.originalValue ?? tok.value}"`
91
- return new Error(`Expected ${expected}${after} but found ${found} at position ${tok.position}`)
101
+ const after = prevToken ? prevToken.originalValue ?? prevToken.value : undefined
102
+ const received = tok.type === 'eof' ? 'end of query' : `"${tok.originalValue ?? tok.value}"`
103
+ return syntaxError({ expected, received, positionStart: tok.positionStart, positionEnd: tok.positionEnd, after })
92
104
  }
@@ -1,3 +1,9 @@
1
+ import {
2
+ invalidLiteralError,
3
+ unexpectedCharError,
4
+ unterminatedError,
5
+ } from '../parseErrors.js'
6
+
1
7
  /**
2
8
  * @import { Token } from '../types.d.ts'
3
9
  */
@@ -41,6 +47,13 @@ const KEYWORDS = new Set([
41
47
  'FULL',
42
48
  'OUTER',
43
49
  'ON',
50
+ 'INTERVAL',
51
+ 'DAY',
52
+ 'MONTH',
53
+ 'YEAR',
54
+ 'HOUR',
55
+ 'MINUTE',
56
+ 'SECOND',
44
57
  ])
45
58
 
46
59
  /**
@@ -71,6 +84,63 @@ export function tokenize(sql) {
71
84
  return ch
72
85
  }
73
86
 
87
+ /**
88
+ * @param {number} startPos
89
+ * @param {string} prefix
90
+ * @returns {Token}
91
+ */
92
+ function parseNumber(startPos, prefix = '') {
93
+ let text = prefix
94
+ while (isDigit(peek())) {
95
+ text += nextChar()
96
+ }
97
+ if (peek() === '.') {
98
+ text += nextChar()
99
+ while (isDigit(peek())) {
100
+ text += nextChar()
101
+ }
102
+ }
103
+ // exponent
104
+ if (peek() === 'e' || peek() === 'E') {
105
+ text += nextChar()
106
+ if (peek() === '+' || peek() === '-') {
107
+ text += nextChar()
108
+ }
109
+ while (isDigit(peek())) {
110
+ text += nextChar()
111
+ }
112
+ }
113
+ // bigint suffix
114
+ if (peek() === 'n') {
115
+ text += nextChar()
116
+ try {
117
+ return {
118
+ type: 'number',
119
+ value: text,
120
+ positionStart: startPos,
121
+ positionEnd: i,
122
+ numericValue: BigInt(text.slice(0, -1)),
123
+ }
124
+ } catch {
125
+ throw invalidLiteralError({ type: 'bigint', value: text.slice(0, -1), positionStart: startPos, positionEnd: i })
126
+ }
127
+ }
128
+ if (isAlpha(peek())) {
129
+ throw invalidLiteralError({ type: 'number', value: text + peek(), positionStart: startPos, positionEnd: i + 1 })
130
+ }
131
+ const num = parseFloat(text)
132
+ if (isNaN(num)) {
133
+ throw invalidLiteralError({ type: 'number', value: text, positionStart: startPos, positionEnd: i })
134
+ }
135
+ return {
136
+ type: 'number',
137
+ value: text,
138
+ positionStart: startPos,
139
+ positionEnd: i,
140
+ numericValue: num,
141
+ }
142
+ }
143
+
74
144
  while (i < length) {
75
145
  const ch = peek()
76
146
 
@@ -102,41 +172,25 @@ export function tokenize(sql) {
102
172
 
103
173
  const pos = i
104
174
 
175
+ // negative numbers (when not subtraction)
176
+ if (ch === '-' && i + 1 < length && isDigit(sql[i + 1])) {
177
+ const lastToken = tokens[tokens.length - 1]
178
+ const isValueBefore = lastToken && (
179
+ lastToken.type === 'identifier' ||
180
+ lastToken.type === 'number' ||
181
+ lastToken.type === 'string' ||
182
+ lastToken.type === 'paren' && lastToken.value === ')'
183
+ )
184
+ if (!isValueBefore) {
185
+ nextChar() // consume '-'
186
+ tokens.push(parseNumber(pos, '-'))
187
+ continue
188
+ }
189
+ }
190
+
105
191
  // numbers
106
192
  if (isDigit(ch)) {
107
- let text = ''
108
- while (isDigit(peek())) {
109
- text += nextChar()
110
- }
111
- if (peek() === '.') {
112
- text += nextChar()
113
- while (isDigit(peek())) {
114
- text += nextChar()
115
- }
116
- }
117
- // exponent
118
- if (peek() === 'e' || peek() === 'E') {
119
- text += nextChar()
120
- if (peek() === '+' || peek() === '-') {
121
- text += nextChar()
122
- }
123
- while (isDigit(peek())) {
124
- text += nextChar()
125
- }
126
- }
127
- if (isAlpha(peek())) {
128
- throw new Error(`Invalid number at position ${pos}: ${text}${peek()}`)
129
- }
130
- const num = parseFloat(text)
131
- if (isNaN(num)) {
132
- throw new Error(`Invalid number at position ${pos}: ${text}`)
133
- }
134
- tokens.push({
135
- type: 'number',
136
- value: text,
137
- position: pos,
138
- numericValue: num,
139
- })
193
+ tokens.push(parseNumber(pos))
140
194
  continue
141
195
  }
142
196
 
@@ -152,13 +206,15 @@ export function tokenize(sql) {
152
206
  type: 'keyword',
153
207
  value: upper,
154
208
  originalValue: text,
155
- position: pos,
209
+ positionStart: pos,
210
+ positionEnd: i,
156
211
  })
157
212
  } else {
158
213
  tokens.push({
159
214
  type: 'identifier',
160
215
  value: text,
161
- position: pos,
216
+ positionStart: pos,
217
+ positionEnd: i,
162
218
  })
163
219
  }
164
220
  continue
@@ -170,7 +226,7 @@ export function tokenize(sql) {
170
226
  let text = ''
171
227
  while (i <= length) {
172
228
  if (i === length) {
173
- throw new Error(`Unterminated string literal starting at position ${pos}`)
229
+ throw unterminatedError('string', pos, length)
174
230
  }
175
231
  const c = nextChar()
176
232
  if (c === quote) {
@@ -187,7 +243,8 @@ export function tokenize(sql) {
187
243
  tokens.push({
188
244
  type: 'string',
189
245
  value: text,
190
- position: pos,
246
+ positionStart: pos,
247
+ positionEnd: i,
191
248
  })
192
249
  continue
193
250
  }
@@ -198,7 +255,7 @@ export function tokenize(sql) {
198
255
  let text = ''
199
256
  while (i <= length) {
200
257
  if (i === length) {
201
- throw new Error(`Unterminated identifier starting at position ${pos}`)
258
+ throw unterminatedError('identifier', pos, length)
202
259
  }
203
260
  const c = nextChar()
204
261
  if (c === quote) {
@@ -215,7 +272,8 @@ export function tokenize(sql) {
215
272
  tokens.push({
216
273
  type: 'identifier',
217
274
  value: text,
218
- position: pos,
275
+ positionStart: pos,
276
+ positionEnd: i,
219
277
  })
220
278
  continue
221
279
  }
@@ -231,7 +289,8 @@ export function tokenize(sql) {
231
289
  tokens.push({
232
290
  type: 'operator',
233
291
  value: op,
234
- position: pos,
292
+ positionStart: pos,
293
+ positionEnd: i,
235
294
  })
236
295
  continue
237
296
  }
@@ -242,7 +301,8 @@ export function tokenize(sql) {
242
301
  tokens.push({
243
302
  type: 'operator',
244
303
  value: ch,
245
- position: pos,
304
+ positionStart: pos,
305
+ positionEnd: i,
246
306
  })
247
307
  continue
248
308
  }
@@ -252,7 +312,8 @@ export function tokenize(sql) {
252
312
  tokens.push({
253
313
  type: 'comma',
254
314
  value: ',',
255
- position: pos,
315
+ positionStart: pos,
316
+ positionEnd: i,
256
317
  })
257
318
  continue
258
319
  }
@@ -262,7 +323,8 @@ export function tokenize(sql) {
262
323
  tokens.push({
263
324
  type: 'dot',
264
325
  value: '.',
265
- position: pos,
326
+ positionStart: pos,
327
+ positionEnd: i,
266
328
  })
267
329
  continue
268
330
  }
@@ -272,7 +334,8 @@ export function tokenize(sql) {
272
334
  tokens.push({
273
335
  type: 'paren',
274
336
  value: ch,
275
- position: pos,
337
+ positionStart: pos,
338
+ positionEnd: i,
276
339
  })
277
340
  continue
278
341
  }
@@ -282,21 +345,23 @@ export function tokenize(sql) {
282
345
  tokens.push({
283
346
  type: 'semicolon',
284
347
  value: ';',
285
- position: pos,
348
+ positionStart: pos,
349
+ positionEnd: i,
286
350
  })
287
351
  continue
288
352
  }
289
353
 
290
354
  if (tokens.length === 0) {
291
- throw new Error(`Expected SELECT but found "${ch}" at position ${pos}`)
355
+ throw unexpectedCharError({ char: ch, positionStart: pos, expectsSelect: true })
292
356
  }
293
- throw new Error(`Unexpected character "${ch}" at position ${pos}`)
357
+ throw unexpectedCharError({ char: ch, positionStart: pos })
294
358
  }
295
359
 
296
360
  tokens.push({
297
361
  type: 'eof',
298
362
  value: '',
299
- position: length,
363
+ positionStart: length,
364
+ positionEnd: length,
300
365
  })
301
366
 
302
367
  return tokens