squirreling 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,332 @@
1
+ /**
2
+ * @import { Token } from '../types.d.ts'
3
+ */
4
+
5
+ const KEYWORDS = new Set([
6
+ 'SELECT',
7
+ 'FROM',
8
+ 'WHERE',
9
+ 'AND',
10
+ 'OR',
11
+ 'NOT',
12
+ 'IS',
13
+ 'GROUP',
14
+ 'BY',
15
+ 'HAVING',
16
+ 'ORDER',
17
+ 'ASC',
18
+ 'DESC',
19
+ 'LIMIT',
20
+ 'OFFSET',
21
+ 'AS',
22
+ 'DISTINCT',
23
+ 'TRUE',
24
+ 'FALSE',
25
+ 'NULL',
26
+ 'LIKE',
27
+ 'IN',
28
+ 'BETWEEN',
29
+ 'CASE',
30
+ 'WHEN',
31
+ 'THEN',
32
+ 'ELSE',
33
+ 'END',
34
+ 'JOIN',
35
+ 'INNER',
36
+ 'LEFT',
37
+ 'RIGHT',
38
+ 'FULL',
39
+ 'OUTER',
40
+ 'ON',
41
+ ])
42
+
43
+ /**
44
+ * @param {string} sql
45
+ * @returns {Token[]}
46
+ */
47
+ export function tokenize(sql) {
48
+ /** @type {Token[]} */
49
+ const tokens = []
50
+ const { length } = sql
51
+ let i = 0
52
+
53
+ /**
54
+ * @returns {string}
55
+ */
56
+ function peek() {
57
+ if (i >= length) return ''
58
+ return sql[i]
59
+ }
60
+
61
+ /**
62
+ * @returns {string}
63
+ */
64
+ function nextChar() {
65
+ if (i >= length) return ''
66
+ const ch = sql[i]
67
+ i++
68
+ return ch
69
+ }
70
+
71
+ while (i < length) {
72
+ const ch = peek()
73
+
74
+ if (isWhitespace(ch)) {
75
+ nextChar()
76
+ continue
77
+ }
78
+
79
+ // line comment --
80
+ if (ch === '-' && i + 1 < length && sql[i + 1] === '-') {
81
+ while (i < length && sql[i] !== '\n') {
82
+ i++
83
+ }
84
+ continue
85
+ }
86
+
87
+ // block comment /* ... */
88
+ if (ch === '/' && i + 1 < length && sql[i + 1] === '*') {
89
+ i += 2
90
+ while (i < length) {
91
+ if (sql[i] === '*' && i + 1 < length && sql[i + 1] === '/') {
92
+ i += 2
93
+ break
94
+ }
95
+ i++
96
+ }
97
+ continue
98
+ }
99
+
100
+ const pos = i
101
+
102
+ // numbers
103
+ if (isDigit(ch)) {
104
+ let text = ''
105
+ while (isDigit(peek())) {
106
+ text += nextChar()
107
+ }
108
+ if (peek() === '.') {
109
+ text += nextChar()
110
+ while (isDigit(peek())) {
111
+ text += nextChar()
112
+ }
113
+ }
114
+ // exponent
115
+ if (peek() === 'e' || peek() === 'E') {
116
+ text += nextChar()
117
+ if (peek() === '+' || peek() === '-') {
118
+ text += nextChar()
119
+ }
120
+ while (isDigit(peek())) {
121
+ text += nextChar()
122
+ }
123
+ }
124
+ if (isAlpha(peek())) {
125
+ throw new Error('Invalid number at position ' + pos + ': ' + text + peek())
126
+ }
127
+ const num = parseFloat(text)
128
+ if (isNaN(num)) {
129
+ throw new Error('Invalid number at position ' + pos + ': ' + text)
130
+ }
131
+ tokens.push({
132
+ type: 'number',
133
+ value: text,
134
+ position: pos,
135
+ numericValue: num,
136
+ })
137
+ continue
138
+ }
139
+
140
+ // identifiers / keywords
141
+ if (isAlpha(ch)) {
142
+ let text = ''
143
+ while (isAlphaNumeric(peek())) {
144
+ text += nextChar()
145
+ }
146
+ const upper = text.toUpperCase()
147
+ if (KEYWORDS.has(upper)) {
148
+ tokens.push({
149
+ type: 'keyword',
150
+ value: upper,
151
+ originalValue: text,
152
+ position: pos,
153
+ })
154
+ } else {
155
+ tokens.push({
156
+ type: 'identifier',
157
+ value: text,
158
+ position: pos,
159
+ })
160
+ }
161
+ continue
162
+ }
163
+
164
+ // string literals: single quotes
165
+ if (ch === '\'') {
166
+ const quote = nextChar()
167
+ let text = ''
168
+ while (i <= length) {
169
+ if (i === length) {
170
+ throw new Error('Unterminated string literal starting at position ' + pos)
171
+ }
172
+ const c = nextChar()
173
+ if (c === quote) {
174
+ // check for escaped quote
175
+ if (peek() === quote) {
176
+ text += quote
177
+ nextChar()
178
+ continue
179
+ }
180
+ break
181
+ }
182
+ text += c
183
+ }
184
+ tokens.push({
185
+ type: 'string',
186
+ value: text,
187
+ position: pos,
188
+ })
189
+ continue
190
+ }
191
+
192
+ // quoted identifiers: double quotes
193
+ if (ch === '"') {
194
+ const quote = nextChar()
195
+ let text = ''
196
+ while (i <= length) {
197
+ if (i === length) {
198
+ throw new Error('Unterminated identifier starting at position ' + pos)
199
+ }
200
+ const c = nextChar()
201
+ if (c === quote) {
202
+ // check for escaped quote
203
+ if (peek() === quote) {
204
+ text += quote
205
+ nextChar()
206
+ continue
207
+ }
208
+ break
209
+ }
210
+ text += c
211
+ }
212
+ tokens.push({
213
+ type: 'identifier',
214
+ value: text,
215
+ position: pos,
216
+ })
217
+ continue
218
+ }
219
+
220
+ // two-character operators
221
+ if (ch === '<' || ch === '>' || ch === '!' || ch === '=') {
222
+ let op = nextChar()
223
+ if ((op === '<' || op === '>' || op === '!') && peek() === '=') {
224
+ op += nextChar()
225
+ } else if (op === '<' && peek() === '>') {
226
+ op += nextChar()
227
+ }
228
+ tokens.push({
229
+ type: 'operator',
230
+ value: op,
231
+ position: pos,
232
+ })
233
+ continue
234
+ }
235
+
236
+ // single-char operators
237
+ if (ch === '*' || ch === '+' || ch === '-' || ch === '/' || ch === '%') {
238
+ nextChar()
239
+ tokens.push({
240
+ type: 'operator',
241
+ value: ch,
242
+ position: pos,
243
+ })
244
+ continue
245
+ }
246
+
247
+ if (ch === ',') {
248
+ nextChar()
249
+ tokens.push({
250
+ type: 'comma',
251
+ value: ',',
252
+ position: pos,
253
+ })
254
+ continue
255
+ }
256
+
257
+ if (ch === '.') {
258
+ nextChar()
259
+ tokens.push({
260
+ type: 'dot',
261
+ value: '.',
262
+ position: pos,
263
+ })
264
+ continue
265
+ }
266
+
267
+ if (ch === '(' || ch === ')') {
268
+ nextChar()
269
+ tokens.push({
270
+ type: 'paren',
271
+ value: ch,
272
+ position: pos,
273
+ })
274
+ continue
275
+ }
276
+
277
+ if (ch === ';') {
278
+ nextChar()
279
+ tokens.push({
280
+ type: 'semicolon',
281
+ value: ';',
282
+ position: pos,
283
+ })
284
+ continue
285
+ }
286
+
287
+ if (tokens.length === 0) {
288
+ throw new Error('Expected SELECT at position ' + pos)
289
+ }
290
+ throw new Error('Unexpected character at position ' + pos + ': ' + ch)
291
+ }
292
+
293
+ tokens.push({
294
+ type: 'eof',
295
+ value: '',
296
+ position: length,
297
+ })
298
+
299
+ return tokens
300
+ }
301
+
302
+ /**
303
+ * @param {string} ch
304
+ * @returns {boolean}
305
+ */
306
+ function isWhitespace(ch) {
307
+ return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r'
308
+ }
309
+
310
+ /**
311
+ * @param {string} ch
312
+ * @returns {boolean}
313
+ */
314
+ function isDigit(ch) {
315
+ return ch >= '0' && ch <= '9'
316
+ }
317
+
318
+ /**
319
+ * @param {string} ch
320
+ * @returns {boolean}
321
+ */
322
+ function isAlpha(ch) {
323
+ return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch === '_' || ch === '$'
324
+ }
325
+
326
+ /**
327
+ * @param {string} ch
328
+ * @returns {boolean}
329
+ */
330
+ function isAlphaNumeric(ch) {
331
+ return isAlpha(ch) || isDigit(ch)
332
+ }
package/src/types.d.ts ADDED
@@ -0,0 +1,159 @@
1
+ export type Row = Record<string, any>
2
+
3
+ export type SqlPrimitive = string | number | bigint | boolean | null
4
+
5
+ export interface SelectStatement {
6
+ distinct: boolean
7
+ columns: SelectColumn[]
8
+ from?: string
9
+ joins: JoinClause[]
10
+ where?: ExprNode
11
+ groupBy: ExprNode[]
12
+ orderBy: OrderByItem[]
13
+ limit?: number
14
+ offset?: number
15
+ }
16
+
17
+ export type TokenType =
18
+ | 'keyword'
19
+ | 'identifier'
20
+ | 'number'
21
+ | 'string'
22
+ | 'operator'
23
+ | 'comma'
24
+ | 'dot'
25
+ | 'paren'
26
+ | 'semicolon'
27
+ | 'eof'
28
+
29
+ export interface Token {
30
+ type: TokenType
31
+ value: string
32
+ position: number
33
+ numericValue?: number
34
+ originalValue?: string
35
+ }
36
+
37
+ export type BinaryOp =
38
+ | 'AND'
39
+ | 'OR'
40
+ | '='
41
+ | '!='
42
+ | '<>'
43
+ | '<'
44
+ | '>'
45
+ | '<='
46
+ | '>='
47
+ | 'LIKE'
48
+
49
+ export interface LiteralNode {
50
+ type: 'literal'
51
+ value: SqlPrimitive
52
+ }
53
+
54
+ export interface IdentifierNode {
55
+ type: 'identifier'
56
+ name: string
57
+ }
58
+
59
+ export interface UnaryNode {
60
+ type: 'unary'
61
+ op: 'NOT' | 'IS NULL' | 'IS NOT NULL' | '-'
62
+ argument: ExprNode
63
+ }
64
+
65
+ export interface BinaryNode {
66
+ type: 'binary'
67
+ op: BinaryOp
68
+ left: ExprNode
69
+ right: ExprNode
70
+ }
71
+
72
+ export interface FunctionNode {
73
+ type: 'function'
74
+ name: string
75
+ args: ExprNode[]
76
+ }
77
+
78
+ export interface CastNode {
79
+ type: 'cast'
80
+ expr: ExprNode
81
+ toType: string
82
+ }
83
+
84
+ export type ExprNode = LiteralNode | IdentifierNode | UnaryNode | BinaryNode | FunctionNode | CastNode
85
+
86
+ export interface StarColumn {
87
+ kind: 'star'
88
+ alias?: string
89
+ }
90
+
91
+ export interface SimpleColumn {
92
+ kind: 'column'
93
+ column: string
94
+ alias?: string
95
+ }
96
+
97
+ export type AggregateFunc = 'COUNT' | 'SUM' | 'AVG' | 'MIN' | 'MAX'
98
+
99
+ export type StringFunc = 'UPPER' | 'LOWER' | 'CONCAT' | 'LENGTH' | 'SUBSTRING' | 'TRIM'
100
+
101
+ export interface AggregateArgStar {
102
+ kind: 'star'
103
+ }
104
+
105
+ export interface AggregateArgColumn {
106
+ kind: 'column'
107
+ column: string
108
+ }
109
+
110
+ export type AggregateArg = AggregateArgStar | AggregateArgColumn
111
+
112
+ export interface AggregateColumn {
113
+ kind: 'aggregate'
114
+ func: AggregateFunc
115
+ arg: AggregateArg
116
+ alias?: string
117
+ }
118
+
119
+ export interface FunctionColumn {
120
+ kind: 'function'
121
+ func: StringFunc
122
+ args: ExprNode[]
123
+ alias?: string
124
+ }
125
+
126
+ export interface OperationColumn {
127
+ kind: 'operation'
128
+ expr: ExprNode
129
+ alias?: string
130
+ }
131
+
132
+ export type SelectColumn = StarColumn | SimpleColumn | AggregateColumn | FunctionColumn | OperationColumn
133
+
134
+ export interface OrderByItem {
135
+ expr: ExprNode
136
+ direction: 'ASC' | 'DESC'
137
+ }
138
+
139
+ export type JoinType = 'INNER' | 'LEFT' | 'RIGHT' | 'FULL' | 'CROSS'
140
+
141
+ export interface JoinClause {
142
+ type: JoinType
143
+ table: string
144
+ on?: ExprNode
145
+ }
146
+
147
+ export interface ParserState {
148
+ tokens: Token[]
149
+ pos: number
150
+ }
151
+
152
+ export interface ExprCursor {
153
+ current(): Token
154
+ peek(offset: number): Token
155
+ consume(): Token
156
+ match(type: TokenType, value?: string): boolean
157
+ expect(type: TokenType, value: string): Token
158
+ expectIdentifier(): Token
159
+ }