sqlpp 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,499 @@
1
+ require 'sqlpp/ast'
2
+
3
+ # select := 'SELECT'
4
+ # optional_projections
5
+ # optional_froms
6
+ # optional_wheres
7
+ # optional_groups
8
+ # optional_orders
9
+ #
10
+ # optional_projections := ''
11
+ # | list
12
+ #
13
+ # optional_froms := ''
14
+ # | 'FROM' froms
15
+ #
16
+ # optional_wheres := ''
17
+ # | 'WHERE' expr1
18
+ #
19
+ # optional_groups := ''
20
+ # | 'GROUP' 'BY' list
21
+ #
22
+ # optional_orders := ''
23
+ # | 'ORDER' 'BY' sort_keys
24
+ #
25
+ # sort_keys := sort_key
26
+ # | sort_key ',' sort_keys
27
+ #
28
+ # sort_key := expr1
29
+ # | expr1 sort_options
30
+ #
31
+ # sort_options := sort_option
32
+ # | sort_option ' ' sort_options
33
+ #
34
+ # sort_option := 'ASC' | 'DESC' | 'NULLS FIRST' | 'NULLS LAST'
35
+ #
36
+ # froms := from
37
+ # | from ',' froms
38
+ #
39
+ # from := entity
40
+ # | entity optional_join_expr
41
+ #
42
+ # optional_join_expr := ''
43
+ # | 'LEFT' 'JOIN' from 'ON' expr
44
+ # | 'INNER' 'JOIN' from 'ON' expr
45
+ # | 'OUTER' 'JOIN' from 'ON' expr
46
+ # | 'FULL' 'OUTER' 'JOIN' from 'ON' expr
47
+ #
48
+ # entity := '(' from ')'
49
+ # | id
50
+ # | select_stmt
51
+ #
52
+ # expr1 := expr2
53
+ # | expr2 op expr1
54
+ #
55
+ # op := 'AND' | 'OR' | 'IS' | 'IS NOT'
56
+ #
57
+ # expr2 := expr3
58
+ # | expr3 'BETWEEN' expr3 AND expr3
59
+ # | expr3 'IN' '(' list ')'
60
+ # | expr3 bop expr3
61
+ #
62
+ # bop := '<' | '<=' | '<>' | '=' | '>=' | '>'
63
+ #
64
+ # expr3 := expr4
65
+ # | expr4 op2 expr3
66
+ # | unary expr3
67
+ #
68
+ # op2 := '+' | '-' | '*' | '/'
69
+ #
70
+ # unary := '+' | '-' | 'NOT'
71
+ #
72
+ # expr4 := lit
73
+ # | id
74
+ # | id '.' id
75
+ # | id '(' args ')'
76
+ # | 'CASE' case_stmt 'END'
77
+ # | '(' expr1 ')'
78
+ #
79
+ # list := expr1
80
+ # | expr1 ',' list
81
+
82
+ module SQLPP
83
+ class Parser
84
+ class Exception < SQLPP::Exception; end
85
+ class UnexpectedToken < Exception; end
86
+ class TrailingTokens < Exception; end
87
+
88
+ def self.parse(string)
89
+ parser = new(string)
90
+ parser.parse
91
+ end
92
+
93
+ def initialize(string)
94
+ @tokenizer = SQLPP::Tokenizer.new(string)
95
+ end
96
+
97
+ def parse
98
+ _eat :space
99
+
100
+ token = _peek(:key)
101
+ raise UnexpectedToken, token.inspect unless token
102
+
103
+ case token.text
104
+ when :select then parse_select
105
+ else raise UnexpectedToken, token.inspect
106
+ end
107
+ end
108
+
109
+ # --- exposed for testing purposes ---
110
+
111
+ def parse_expression
112
+ _parse_expr1
113
+ ensure
114
+ _ensure_stream_empty!
115
+ end
116
+
117
+ def parse_from
118
+ _parse_from
119
+ ensure
120
+ _ensure_stream_empty!
121
+ end
122
+
123
+ def parse_select
124
+ _parse_select
125
+ ensure
126
+ _ensure_stream_empty!
127
+ end
128
+
129
+ # --- internal use ---
130
+
131
+ def _parse_select
132
+ _expect :key, :select
133
+ select = AST::Select.new
134
+
135
+ _eat :space
136
+
137
+ if !_peek(:key, /^(from|where)$/) && !_peek(:eof)
138
+ list = []
139
+
140
+ loop do
141
+ expr = _parse_expr1
142
+ _eat :space
143
+ if _peek(:key, :as)
144
+ _next
145
+ _eat :space
146
+ name = _expect(:id)
147
+ expr = AST::As.new(name.text, expr)
148
+ end
149
+ list.push expr
150
+ break unless _eat(:punct, ",")
151
+ end
152
+ _eat :space
153
+
154
+ select.projections = list
155
+ end
156
+
157
+ if _eat(:key, :from)
158
+ list = []
159
+
160
+ loop do
161
+ _eat :space
162
+ list << _parse_from
163
+ _eat :space
164
+ break unless _eat(:punct, ',')
165
+ end
166
+ _eat :space
167
+
168
+ select.froms = list
169
+ end
170
+
171
+ if _eat(:key, :where)
172
+ select.wheres = _parse_expr1
173
+ _eat :space
174
+ end
175
+
176
+ if _eat(:key, :group)
177
+ _eat :space
178
+ _expect :key, :by
179
+ _eat :space
180
+ select.groups = _parse_list
181
+ end
182
+
183
+ if _eat(:key, :order)
184
+ _eat :space
185
+ _expect :key, :by
186
+ _eat :space
187
+
188
+ list = []
189
+ loop do
190
+ key = AST::SortKey.new(_parse_expr1, [])
191
+ list << key
192
+
193
+ _eat :space
194
+
195
+ if (dir = _eat(:key, /^(asc|desc)$/))
196
+ _eat :space
197
+ key.options << dir.text
198
+ end
199
+
200
+ if (opt = _eat(:key, :nulls))
201
+ opt = opt.text.to_s
202
+ _eat :space
203
+ sort = _eat(:key, /^(first|last)$/)
204
+ opt << " " << sort.text.to_s if sort
205
+ key.options << opt
206
+ end
207
+
208
+ _eat :space
209
+ break unless _eat(:punct, ",")
210
+ end
211
+
212
+ select.orders = list
213
+ end
214
+
215
+ select
216
+ end
217
+
218
+ def _parse_from
219
+ entity = _parse_entity
220
+
221
+ loop do
222
+ _eat :space
223
+
224
+ if (which = _eat(:key, /^(inner|cross|left|right|full|outer)$/))
225
+ type = which.text.to_s
226
+
227
+ if type == "full" || type == "left" || type == "right"
228
+ _eat :space
229
+ _expect :key, :outer
230
+ type << " outer"
231
+ end
232
+
233
+ _eat :space
234
+ _expect :key, :join
235
+
236
+ entity = AST::Join.new(type.downcase, entity, _parse_from)
237
+
238
+ _eat :space
239
+ if _eat(:key, :on)
240
+ _eat :space
241
+ entity.on = _parse_expr1
242
+ end
243
+
244
+ else
245
+ break
246
+ end
247
+ end
248
+
249
+ entity
250
+ end
251
+
252
+ def _parse_entity
253
+ _eat :space
254
+
255
+ entity = if _eat(:punct, '(')
256
+ from = _parse_from
257
+ _eat :space
258
+ _expect :punct, ')'
259
+ AST::Parens.new(from)
260
+
261
+ elsif _peek(:key, :select)
262
+ _parse_select
263
+
264
+ else
265
+ id = _expect(:id)
266
+ AST::Atom.new(:attr, id.text)
267
+ end
268
+
269
+ _eat :space
270
+ if _eat(:key, :as)
271
+ _eat :space
272
+ id = _expect(:id)
273
+ AST::As.new(id.text, entity)
274
+ elsif (id = _eat(:id))
275
+ AST::Alias.new(id.text, entity)
276
+ else
277
+ entity
278
+ end
279
+ end
280
+
281
+ def _parse_expr1
282
+ _eat :space
283
+
284
+ left = _parse_expr2
285
+ _eat :space
286
+
287
+ if (op = _eat(:key, /^(and|or|is)$/i))
288
+ op = op.text
289
+
290
+ if op == :is
291
+ _eat :space
292
+ op2 = _eat(:key, :not)
293
+ op = "#{op} #{op2.text}" if op2
294
+ end
295
+
296
+ right = _parse_expr1
297
+
298
+ AST::Expr.new(left, op, right)
299
+ else
300
+ left
301
+ end
302
+ end
303
+
304
+ def _parse_expr2
305
+ _eat :space
306
+
307
+ left = _parse_expr3
308
+ _eat :space
309
+
310
+ if (op = _eat(:key, :between))
311
+ op = op.text
312
+
313
+ _eat :space
314
+ lo = _parse_expr3
315
+
316
+ _eat :space
317
+ _expect :key, :and
318
+
319
+ _eat :space
320
+ hi = _parse_expr3
321
+
322
+ right = AST::Atom.new(:range, lo, hi)
323
+
324
+ elsif (op = _eat(:key, :in))
325
+ op = op.text
326
+
327
+ _eat :space
328
+ _expect :punct, "("
329
+
330
+ right = AST::Atom.new(:list, _parse_list)
331
+ _eat :space
332
+ _expect :punct, ")"
333
+
334
+ elsif (op = _eat(:punct, /<=|<>|>=|=|<|>/) || _eat(:key, /^i?like$/))
335
+ op = op.text
336
+ right = _parse_expr3
337
+ end
338
+
339
+ if right
340
+ AST::Expr.new(left, op, right)
341
+ else
342
+ left
343
+ end
344
+ end
345
+
346
+ def _parse_expr3
347
+ _eat :space
348
+
349
+ if (op = (_eat(:punct, /[-+]/) || _eat(:key, :not)))
350
+ _eat :space
351
+ AST::Unary.new(op.text, _parse_expr3)
352
+
353
+ else
354
+ atom = _parse_atom
355
+ _eat :space
356
+
357
+ if (op = _eat(:punct, /[-+*\/]/))
358
+ _eat :space
359
+ AST::Expr.new(atom, op.text, _parse_expr3)
360
+ else
361
+ atom
362
+ end
363
+ end
364
+ end
365
+
366
+ def _parse_atom
367
+ if (lit = _eat(:lit))
368
+ AST::Atom.new(:lit, lit.text)
369
+
370
+ elsif (key = _eat(:key, :case))
371
+ _parse_case
372
+
373
+ elsif _eat(:punct, "(")
374
+ expr = _parse_expr1
375
+ _eat :space
376
+ _expect(:punct, ")")
377
+ AST::Parens.new(expr)
378
+
379
+ elsif _eat(:key, :null)
380
+ AST::Atom.new(:lit, "NULL")
381
+
382
+ elsif _eat(:punct, "*")
383
+ AST::Atom.new(:lit, "*")
384
+
385
+ else
386
+ id = _expect(:id)
387
+
388
+ if _eat(:punct, "(")
389
+ args = _parse_list
390
+ _expect(:punct, ")")
391
+ AST::Atom.new(:func, id.text, args)
392
+ elsif _eat(:punct, '.')
393
+ id2 = _eat(:id) || _eat(:punct, '*')
394
+
395
+ if !id2
396
+ raise UnexpectedToken, "expected id or *, got #{_peek.inspect}"
397
+ end
398
+
399
+ AST::Atom.new(:attr, id.text, id2.text)
400
+ else
401
+ AST::Atom.new(:attr, id.text)
402
+ end
403
+ end
404
+ end
405
+
406
+ def _parse_case
407
+ _expect :space
408
+
409
+ kase = AST::Atom.new(:case)
410
+ unless _peek(:key, :when)
411
+ kase.left = _parse_expr1
412
+ _eat :space
413
+ end
414
+
415
+ cases = []
416
+ while _eat(:key, :when)
417
+ condition = _parse_expr1
418
+ _eat :space
419
+ _expect :key, :then
420
+ result = _parse_expr1
421
+ cases << [condition, result]
422
+ _eat :space
423
+ end
424
+
425
+ if _eat(:key, :else)
426
+ cases << _parse_expr1
427
+ _eat :space
428
+ end
429
+
430
+ _expect :key, :end
431
+
432
+ kase.right = cases
433
+ kase
434
+ end
435
+
436
+ # list := ''
437
+ # | expr
438
+ # | expr ',' args
439
+ def _parse_list
440
+ _eat :space
441
+ args = []
442
+
443
+ loop do
444
+ args << _parse_expr1
445
+
446
+ _eat :space
447
+ if _eat(:punct, ",")
448
+ _eat :space
449
+ else
450
+ break
451
+ end
452
+ end
453
+
454
+ args
455
+ end
456
+
457
+ def _eat(type_or_types, pattern=nil)
458
+ _next if _peek(type_or_types, pattern)
459
+ end
460
+
461
+ def _peek(type_or_types, pattern=nil)
462
+ token = _next
463
+ _match(token, type_or_types, pattern)
464
+ ensure
465
+ @tokenizer.push(token)
466
+ end
467
+
468
+ def _match(token, type_or_types, pattern=nil)
469
+ types = type_or_types.is_a?(Array) ? type_or_types : [ type_or_types ]
470
+
471
+ if types.include?(token.type) && (pattern.nil? || pattern === token.text)
472
+ token
473
+ else
474
+ nil
475
+ end
476
+ end
477
+
478
+ def _expect(type_or_types, pattern=nil)
479
+ token = _next
480
+
481
+ if !_match(token, type_or_types, pattern)
482
+ raise UnexpectedToken, "expected #{type_or_types.inspect}(#{pattern.inspect}), got #{token.inspect}"
483
+ end
484
+
485
+ token
486
+ end
487
+
488
+ def _next
489
+ @tokenizer.next
490
+ end
491
+
492
+ def _ensure_stream_empty!
493
+ unless _peek(:eof)
494
+ raise TrailingTokens, _next.inspect
495
+ end
496
+ end
497
+
498
+ end
499
+ end
@@ -0,0 +1,124 @@
1
+ require 'strscan'
2
+
3
+ module SQLPP
4
+ class Tokenizer
5
+ class Exception < SQLPP::Exception; end
6
+ class UnexpectedCharacter < Exception; end
7
+ class EOFError < Exception; end
8
+
9
+ class Token < Struct.new(:type, :text, :pos)
10
+ end
11
+
12
+ KEYWORDS = %w(
13
+ and
14
+ as
15
+ asc
16
+ between
17
+ by
18
+ case
19
+ cross
20
+ desc
21
+ else
22
+ end
23
+ first
24
+ from
25
+ full
26
+ group
27
+ having
28
+ ilike
29
+ in
30
+ inner
31
+ is
32
+ join
33
+ last
34
+ left
35
+ like
36
+ not
37
+ null
38
+ nulls
39
+ on
40
+ or
41
+ order
42
+ outer
43
+ right
44
+ select
45
+ then
46
+ when
47
+ where
48
+ )
49
+
50
+ KEYWORDS_REGEX = Regexp.new('\b(' + KEYWORDS.join('|') + ')\b', Regexp::IGNORECASE)
51
+
52
+ def initialize(string)
53
+ @scanner = StringScanner.new(string)
54
+ @buffer = []
55
+ end
56
+
57
+ def next
58
+ if @buffer.any?
59
+ @buffer.pop
60
+ else
61
+ _scan
62
+ end
63
+ end
64
+
65
+ def peek
66
+ push(self.next)
67
+ end
68
+
69
+ def push(token)
70
+ @buffer.push(token)
71
+ token
72
+ end
73
+
74
+ def _scan
75
+ pos = @scanner.pos
76
+
77
+ if @scanner.eos?
78
+ Token.new(:eof, nil, pos)
79
+ elsif (key = @scanner.scan(KEYWORDS_REGEX))
80
+ Token.new(:key, key.downcase.to_sym, pos)
81
+ elsif (num = @scanner.scan(/\d+(?:\.\d+)?/))
82
+ Token.new(:lit, num, pos)
83
+ elsif (id = @scanner.scan(/\w+/))
84
+ Token.new(:id, id, pos)
85
+ elsif (punct = @scanner.scan(/<=|<>|!=|>=/))
86
+ Token.new(:punct, punct, pos)
87
+ elsif (punct = @scanner.scan(/[<>=\(\).*,\/+\-]/))
88
+ Token.new(:punct, punct, pos)
89
+ elsif @scanner.scan(/"/)
90
+ contents = _scan_to_delim('"', pos)
91
+ Token.new(:id, "\"#{contents}\"", pos)
92
+ elsif @scanner.scan(/'/)
93
+ contents = _scan_to_delim("'", pos)
94
+ Token.new(:lit, "'#{contents}'", pos)
95
+ elsif (space = @scanner.scan(/\s+/))
96
+ Token.new(:space, space, pos)
97
+ else
98
+ raise UnexpectedCharacter, @scanner.rest
99
+ end
100
+ end
101
+
102
+ def _scan_to_delim(delim, pos)
103
+ string = ""
104
+ loop do
105
+ ch = @scanner.getch
106
+
107
+ if delim == '"' && ch == "\\"
108
+ ch << @scanner.getch
109
+ elsif delim == "'" && ch == "'"
110
+ ch << @scanner.getch if @scanner.peek(1) == "'"
111
+ end
112
+
113
+ case ch
114
+ when nil then
115
+ raise EOFError, "end of input reached in string started at #{pos} with #{delim.inspect}"
116
+ when delim then
117
+ return string
118
+ else
119
+ string << ch
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,9 @@
1
+ module SQLPP
2
+ module Version
3
+ MAJOR = 1
4
+ MINOR = 0
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join(".")
8
+ end
9
+ end
data/lib/sqlpp.rb ADDED
@@ -0,0 +1,8 @@
1
+ module SQLPP
2
+ class Exception < RuntimeError; end
3
+ end
4
+
5
+ require 'sqlpp/tokenizer'
6
+ require 'sqlpp/parser'
7
+ require 'sqlpp/formatter'
8
+ require 'sqlpp/version'
data/sqlpp.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "sqlpp/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.version = SQLPP::Version::STRING
7
+ gem.name = "sqlpp"
8
+ gem.authors = ["Jamis Buck"]
9
+ gem.email = ["jamis@jamisbuck.org"]
10
+ gem.homepage = "http://github.com/jamis/sqlp"
11
+ gem.summary = "A simplistic SQL parser and pretty-printer"
12
+ gem.description = "A simplistic SQL parser and pretty-printer"
13
+ gem.license = 'MIT'
14
+
15
+ gem.files = `git ls-files`.split($\)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^test/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ ##
21
+ # Development dependencies
22
+ #
23
+ gem.add_development_dependency "rake"
24
+ gem.add_development_dependency "minitest"
25
+ end
@@ -0,0 +1,41 @@
1
+ require 'test_helper'
2
+
3
+ class FormatterTest < Minitest::Test
4
+ def test_format_select
5
+ ast = _parser("select a, b, c from table where x > 5 and z between 1 and 2 or (y IS NULL) group by a, b order by z ASC").parse
6
+
7
+ assert_equal <<-SQL, _format(ast)
8
+ SELECT a, b, c
9
+ FROM table
10
+ WHERE x > 5
11
+ AND z BETWEEN 1 AND 2
12
+ OR (y IS NULL)
13
+ GROUP BY a, b
14
+ ORDER BY z ASC
15
+ SQL
16
+ end
17
+
18
+ def test_format_subselect
19
+ ast = _parser("select a, b, c from (select d,e,f from table where table.id in (1,2,3)) subselect where x > 5 group by a, b order by z ASC").parse
20
+
21
+ assert_equal <<-SQL, _format(ast)
22
+ SELECT a, b, c
23
+ FROM (
24
+ SELECT d, e, f
25
+ FROM table
26
+ WHERE table.id IN (1, 2, 3)
27
+ ) subselect
28
+ WHERE x > 5
29
+ GROUP BY a, b
30
+ ORDER BY z ASC
31
+ SQL
32
+ end
33
+
34
+ def _parser(string)
35
+ SQLPP::Parser.new(string)
36
+ end
37
+
38
+ def _format(ast)
39
+ SQLPP::Formatter.new.format(ast)
40
+ end
41
+ end