sql_tree 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,174 @@
1
+ # The <tt>SQLTree::Tokenizer</tt> class transforms a string or stream of
2
+ # characters into a enumeration of tokens, that are more appropriate for
3
+ # the SQL parser to work with.
4
+ #
5
+ # An example:
6
+ #
7
+ # >> SQLTree::Tokenizer.new.tokenize('SELECT * FROM table')
8
+ # => [:select, :all, :from, Variable('table')]
9
+ #
10
+ # The <tt>tokenize</tt> method will return an array of tokens, while
11
+ # the <tt>each_token</tt> (aliased to <tt>each</tt>) will yield every
12
+ # token one by one.
13
+ class SQLTree::Tokenizer
14
+
15
+ include Enumerable
16
+
17
+ # The keyword queue, on which kywords are placed before they are yielded
18
+ # to the parser, to enable keyword combining (e.g. NOT LIKE)
19
+ attr_reader :keyword_queue
20
+
21
+ def initialize # :nodoc:
22
+ @keyword_queue = []
23
+ end
24
+
25
+ # Returns an array of tokens for the given string.
26
+ # <tt>string</tt>:: the string to tokenize
27
+ def tokenize(string)
28
+ @string = string
29
+ @current_char_pos = -1
30
+ self.entries
31
+ end
32
+
33
+ # Returns the current character that is being tokenized
34
+ def current_char
35
+ @current_char
36
+ end
37
+
38
+ # Returns the next character to tokenize, but does not move
39
+ # the pointer of the current character forward.
40
+ # <tt>lookahead</tt>:: how many positions forward to peek.
41
+ def peek_char(lookahead = 1)
42
+ @string[@current_char_pos + lookahead, 1]
43
+ end
44
+
45
+ # Returns the next character to tokenize, and moves the pointer
46
+ # of the current character one position forward.
47
+ def next_char
48
+ @current_char_pos += 1
49
+ @current_char = @string[@current_char_pos, 1]
50
+ end
51
+
52
+ # Combines several tokens to a single token if possible, and
53
+ # yields teh result, or yields every single token if they cannot
54
+ # be combined.
55
+ # <tt>token</tt>:: the token to yield or combine
56
+ # <tt>block</tt>:: the block to yield tokens and combined tokens to.
57
+ def handle_token(token, &block) # :yields: SQLTree::Token
58
+ if token.kind_of?(SQLTree::Token::Keyword)
59
+ keyword_queue.push(token)
60
+ else
61
+ empty_keyword_queue!(&block)
62
+ block.call(token)
63
+ end
64
+ end
65
+
66
+ # This method ensures that every keyword currently in the queue is
67
+ # yielded. This method get called by <tt>handle_token</tt> when it
68
+ # knows for sure that the keywords on the queue cannot be combined
69
+ # into a single keyword.
70
+ # <tt>block</tt>:: the block to yield the tokens on the queue to.
71
+ def empty_keyword_queue!(&block) # :yields: SQLTree::Token
72
+ block.call(@keyword_queue.shift) until @keyword_queue.empty?
73
+ end
74
+
75
+ # Iterator method that yields each token that is encountered in the
76
+ # SQL stream. These tokens are passed to the SQL parser to construct
77
+ # a syntax tree for the SQL query.
78
+ #
79
+ # This method is aliased to <tt>:each</tt> to make the Enumerable
80
+ # methods work on this method.
81
+ def each_token(&block) # :yields: SQLTree::Token
82
+ while next_char
83
+ case current_char
84
+ when /^\s?$/; # whitespace, go to next character
85
+ when '('; handle_token(SQLTree::Token::LPAREN, &block)
86
+ when ')'; handle_token(SQLTree::Token::RPAREN, &block)
87
+ when '.'; handle_token(SQLTree::Token::DOT, &block)
88
+ when ','; handle_token(SQLTree::Token::COMMA, &block)
89
+ when /\d/; tokenize_number(&block)
90
+ when "'"; tokenize_quoted_string(&block)
91
+ when OPERATOR_CHARS; tokenize_operator(&block)
92
+ when /\w/; tokenize_keyword(&block)
93
+ when '"'; tokenize_quoted_variable(&block) # TODO: allow MySQL quoting mode
94
+ end
95
+ end
96
+
97
+ # Make sure to yield any tokens that are still stashed on the queue.
98
+ empty_keyword_queue!(&block)
99
+ end
100
+
101
+ alias :each :each_token
102
+
103
+ # Tokenizes a eyword in the code. This can either be a reserved SQL keyword
104
+ # or a variable. This method will yield variables directly. Keywords will be
105
+ # yielded with a delay, because they may need to be combined with other
106
+ # keywords in the <tt>handle_token</tt> method.
107
+ def tokenize_keyword(&block) # :yields: SQLTree::Token
108
+ literal = current_char
109
+ literal << next_char while /[\w]/ =~ peek_char
110
+
111
+ if SQLTree::Token::KEYWORDS.include?(literal.upcase)
112
+ handle_token(SQLTree::Token.const_get(literal.upcase), &block)
113
+ else
114
+ handle_token(SQLTree::Token::Variable.new(literal), &block)
115
+ end
116
+ end
117
+
118
+ # Tokenizes a number (either an integer or float) in the SQL stream.
119
+ # This method will yield the token after the last digit of the number
120
+ # has been encountered.
121
+ def tokenize_number(&block) # :yields: SQLTree::Token::Number
122
+ number = current_char
123
+ dot_encountered = false
124
+ while /\d/ =~ peek_char || (peek_char == '.' && !dot_encountered)
125
+ dot_encountered = true if peek_char == '.'
126
+ number << next_char
127
+ end
128
+
129
+ if dot_encountered
130
+ handle_token(SQLTree::Token::Number.new(number.to_f), &block)
131
+ else
132
+ handle_token(SQLTree::Token::Number.new(number.to_i), &block)
133
+ end
134
+ end
135
+
136
+ # Reads a quoted string token from the SQL stream. This method will
137
+ # yield an SQLTree::Token::String when the closing quote character is
138
+ # encountered.
139
+ def tokenize_quoted_string(&block) # :yields: SQLTree::Token::String
140
+ string = ''
141
+ until next_char.nil? || current_char == "'"
142
+ string << (current_char == "\\" ? next_char : current_char)
143
+ end
144
+ handle_token(SQLTree::Token::String.new(string), &block)
145
+ end
146
+
147
+ # Tokenize a quoted variable from the SQL stream. This method will
148
+ # yield an SQLTree::Token::Variable when to closing quote is found.
149
+ #
150
+ # The actual quote character that is used depends on the DBMS. For now,
151
+ # only the more standard double quote is accepted.
152
+ def tokenize_quoted_variable(&block) # :yields: SQLTree::Token::Variable
153
+ variable = ''
154
+ until next_char.nil? || current_char == '"' # TODO: allow MySQL quoting mode
155
+ variable << (current_char == "\\" ? next_char : current_char)
156
+ end
157
+ handle_token(SQLTree::Token::Variable.new(variable), &block)
158
+ end
159
+
160
+ # A regular expression that matches all operator characters.
161
+ OPERATOR_CHARS = /\=|<|>|!|\-|\+|\/|\*|\%/
162
+
163
+ # Tokenizes an operator in the SQL stream. This method will yield the
164
+ # operator token when the last character of the token is encountered.
165
+ def tokenize_operator(&block) # :yields: SQLTree::Token
166
+ operator = current_char
167
+ if operator == '-' && /[\d\.]/ =~ peek_char
168
+ tokenize_number(&block)
169
+ else
170
+ operator << next_char if SQLTree::Token::OPERATORS_HASH.has_key?(operator + peek_char)
171
+ handle_token(SQLTree::Token.const_get(SQLTree::Token::OPERATORS_HASH[operator].to_s.upcase), &block)
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,5 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree, :API do
4
+
5
+ end
@@ -0,0 +1,21 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree, 'parsing and generating SQL' do
4
+
5
+ it "should parse and generate SQL fo a simple list query" do
6
+ SQLTree["SELECT * FROM table"].to_sql.should == 'SELECT * FROM "table"'
7
+ end
8
+
9
+ it "should parse and generate the DISTINCT keyword" do
10
+ SQLTree["SELECT DISTINCT * FROM table"].to_sql.should == 'SELECT DISTINCT * FROM "table"'
11
+ end
12
+
13
+ it 'should parse and generate table aliases' do
14
+ SQLTree["SELECT a.* FROM table AS a"].to_sql.should == 'SELECT "a".* FROM "table" AS "a"'
15
+ end
16
+
17
+ it "parse and generate a complex SQL query" do
18
+ SQLTree['SELECT a.*, MD5( a.name ) AS checksum FROM table AS a , other WHERE other.timestamp > a.timestamp'].to_sql.should ==
19
+ 'SELECT "a".*, MD5("a"."name") AS "checksum" FROM "table" AS "a", "other" WHERE ("other"."timestamp" > "a"."timestamp")'
20
+ end
21
+ end
@@ -0,0 +1,84 @@
1
+ class ParseAs
2
+
3
+ def initialize(expected_tree)
4
+ @expected_tree = expected_tree
5
+ end
6
+
7
+ def matches?(found_tree)
8
+ @found_tree = found_tree.to_tree
9
+ return @found_tree == @expected_tree
10
+ end
11
+
12
+ def description
13
+ "expected to parse to #{@expected_tree.inspect}"
14
+ end
15
+
16
+ def failure_message
17
+ " #{@expected_tree.inspect} expected, but found #{@found_tree.inspect}"
18
+ end
19
+
20
+ def negative_failure_message
21
+ " expected not to be tokenized to #{@expected_tree.inspect}"
22
+ end
23
+ end
24
+
25
+ def parse_as(tree)
26
+ ParseAs.new(tree)
27
+ end
28
+
29
+ class TokenizeTo
30
+
31
+ def initialize(expected_tokens)
32
+ @expected_tokens = expected_tokens.map do |t|
33
+ case t
34
+ when SQLTree::Token then t
35
+ when String then SQLTree::Token::String.new(t)
36
+ when Numeric then SQLTree::Token::Number.new(t)
37
+ when Symbol then SQLTree::Token.const_get(t.to_s.upcase)
38
+ else "Cannot check for this token: #{t.inspect}!"
39
+ end
40
+ end
41
+ end
42
+
43
+ def matches?(found_tokens)
44
+ @found_tokens = found_tokens
45
+ return @found_tokens == @expected_tokens
46
+ end
47
+
48
+ def description
49
+ "expected to tokenized to #{@expected_tokens.inspect}"
50
+ end
51
+
52
+ def failure_message
53
+ " #{@expected_tokens.inspect} expected, but found #{@found_tokens.inspect}"
54
+ end
55
+
56
+ def negative_failure_message
57
+ " expected not to be tokenized to #{@expected_tokens.inspect}"
58
+ end
59
+
60
+ end
61
+
62
+ def tokenize_to(*expected_tokens)
63
+ TokenizeTo.new(expected_tokens)
64
+ end
65
+
66
+ def sql_var(name)
67
+ SQLTree::Token::Variable.new(name.to_s)
68
+ end
69
+
70
+ def dot
71
+ SQLTree::Token::DOT
72
+ end
73
+
74
+ def comma
75
+ SQLTree::Token::COMMA
76
+ end
77
+
78
+ def lparen
79
+ SQLTree::Token::LPAREN
80
+ end
81
+
82
+ def rparen
83
+ SQLTree::Token::RPAREN
84
+ end
@@ -0,0 +1,26 @@
1
+ $:.reject! { |e| e.include? 'TextMate' }
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'sql_tree'
7
+
8
+ module SQLTree::Spec
9
+ module NodeLoader
10
+ def self.const_missing(const)
11
+ SQLTree::Node.const_get(const)
12
+ end
13
+ end
14
+
15
+ module TokenLoader
16
+ def self.const_missing(const)
17
+ SQLTree::Token.const_get(const)
18
+ end
19
+ end
20
+ end
21
+
22
+ Spec::Runner.configure do |config|
23
+
24
+ end
25
+
26
+ require "#{File.dirname(__FILE__)}/lib/matchers"
@@ -0,0 +1,102 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree::Node::Expression do
4
+
5
+ describe '.parse' do
6
+ it "shoud parse a value correctly" do
7
+ SQLTree::Node::Expression['123'].should == SQLTree::Node::Value.new(123)
8
+ end
9
+
10
+ it "shoud parse a function call without arguments correctly" do
11
+ function = SQLTree::Node::Expression['NOW()']
12
+ function.function.should == 'NOW'
13
+ function.arguments.should be_empty
14
+ end
15
+
16
+ it "shoud parse a function call with arguments correctly" do
17
+ function = SQLTree::Node::Expression["MD5('string')"]
18
+ function.function.should == 'MD5'
19
+ function.arguments.should == [SQLTree::Node::Value.new('string')]
20
+ end
21
+
22
+ it "should parse a logical OR expression correctly" do
23
+ logical = SQLTree::Node::Expression["'this' OR 'that"]
24
+ logical.operator.should == :or
25
+ logical.expressions.should == [SQLTree::Node::Value.new('this'), SQLTree::Node::Value.new('that')]
26
+ end
27
+
28
+ it "should parse a logical AND expression correctly" do
29
+ logical = SQLTree::Node::Expression['1 AND 2']
30
+ logical.operator.should == :and
31
+ logical.expressions == [SQLTree::Node::Value.new(1), SQLTree::Node::Value.new(2)]
32
+ end
33
+
34
+ it "should nest a logical AND expression correctly" do
35
+ logical = SQLTree::Node::Expression['1 AND 2 AND 3']
36
+ logical.should == SQLTree::Node::Expression['(1 AND 2) AND 3']
37
+ end
38
+
39
+ it "should nest expressions correctly when parentheses are used" do
40
+ logical = SQLTree::Node::Expression['1 AND (2 AND 3)']
41
+ logical.should_not == SQLTree::Node::Expression['(1 AND 2) AND 3']
42
+ end
43
+
44
+ it "should parse a NOT expression without parenteheses correctly" do
45
+ SQLTree::Node::Expression['NOT 1'].should == SQLTree::Node::LogicalNotExpression.new(SQLTree::Node::Value.new(1))
46
+ end
47
+
48
+ it "should parse a NOT expression without parenteheses correctly" do
49
+ SQLTree::Node::Expression['NOT(1)'].should == SQLTree::Node::LogicalNotExpression.new(SQLTree::Node::Value.new(1))
50
+ end
51
+
52
+ it "should parse a comparison expression correctly" do
53
+ comparison = SQLTree::Node::Expression['1 < 2']
54
+ comparison.operator.should == '<'
55
+ comparison.lhs.should == SQLTree::Node::Value.new(1)
56
+ comparison.rhs.should == SQLTree::Node::Value.new(2)
57
+ end
58
+
59
+ it "should parse an IS NULL expression corectly" do
60
+ comparison = SQLTree::Node::Expression['field IS NULL']
61
+ comparison.operator.should == 'IS'
62
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
63
+ comparison.rhs.should == SQLTree::Node::Value.new(nil)
64
+ end
65
+
66
+ it "should parse an IS NOT NULL expression corectly" do
67
+ comparison = SQLTree::Node::Expression['field IS NOT NULL']
68
+ comparison.operator.should == 'IS NOT'
69
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
70
+ comparison.rhs.should == SQLTree::Node::Value.new(nil)
71
+ end
72
+
73
+ it "should parse a LIKE expression corectly" do
74
+ comparison = SQLTree::Node::Expression["field LIKE '%search%"]
75
+ comparison.operator.should == 'LIKE'
76
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
77
+ comparison.rhs.should == SQLTree::Node::Value.new('%search%')
78
+ end
79
+
80
+ it "should parse a NOT ILIKE expression corectly" do
81
+ comparison = SQLTree::Node::Expression["field NOT ILIKE '%search%"]
82
+ comparison.operator.should == 'NOT ILIKE'
83
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
84
+ comparison.rhs.should == SQLTree::Node::Value.new('%search%')
85
+ end
86
+
87
+ it "should parse an IN expression correctly" do
88
+ comparison = SQLTree::Node::Expression["field IN (1,2,3,4)"]
89
+ comparison.operator.should == 'IN'
90
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
91
+ comparison.rhs.should be_kind_of(SQLTree::Node::SetExpression)
92
+ end
93
+
94
+ it "should parse a NOT IN expression correctly" do
95
+ comparison = SQLTree::Node::Expression["field NOT IN (1>2, 3+6, 99)"]
96
+ comparison.operator.should == 'NOT IN'
97
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
98
+ comparison.rhs.should be_kind_of(SQLTree::Node::SetExpression)
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,84 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree::Node::Value do
4
+
5
+ describe '.parse' do
6
+ it "should not parse a field name" do
7
+ lambda { SQLTree::Node::Value['field_name'] }.should raise_error(SQLTree::Parser::UnexpectedToken)
8
+ end
9
+
10
+ it "should parse an integer value correctly" do
11
+ SQLTree::Node::Value['123'].value.should == 123
12
+ end
13
+
14
+ it "should parse a string correctly" do
15
+ SQLTree::Node::Value["'123'"].value.should == '123'
16
+ end
17
+
18
+ it "should parse a NULL value correctly" do
19
+ SQLTree::Node::Value['NULL'].value.should == nil
20
+ end
21
+
22
+ end
23
+ end
24
+
25
+ describe SQLTree::Node::Variable do
26
+
27
+ describe '.parse' do
28
+ it "should parse a variable name correctly" do
29
+ SQLTree::Node::Field['variable'].name.should == 'variable'
30
+ end
31
+
32
+ it "should parse a quoted variable name correctly" do
33
+ SQLTree::Node::Field['"variable"'].name.should == 'variable'
34
+ end
35
+
36
+ it "should raise an error when parsing a reserved keyword as variable" do
37
+ lambda { SQLTree::Node::Field['select'] }.should raise_error(SQLTree::Parser::UnexpectedToken)
38
+ end
39
+
40
+ it "should parse a quoted reserved keyword as variable name correctly" do
41
+ SQLTree::Node::Field['"select"'].name.should == 'select'
42
+ end
43
+ end
44
+ end
45
+
46
+ describe SQLTree::Node::Field do
47
+ describe '.parse' do
48
+ it "should parse a field name with table name correclty" do
49
+ field = SQLTree::Node::Field['table.field']
50
+ field.table.should == 'table'
51
+ field.name.should == 'field'
52
+ end
53
+
54
+ it "should parse a field name without table name correclty" do
55
+ field = SQLTree::Node::Field['field']
56
+ field.table.should be_nil
57
+ field.name.should == 'field'
58
+ end
59
+
60
+ it "should parse a quoted field name without table name correclty" do
61
+ field = SQLTree::Node::Field['"field"']
62
+ field.table.should be_nil
63
+ field.name.should == 'field'
64
+ end
65
+
66
+ it "should parse a quoted field name with quoted table name correclty" do
67
+ field = SQLTree::Node::Field['"table"."field"']
68
+ field.table.should == 'table'
69
+ field.name.should == 'field'
70
+ end
71
+
72
+ it "should parse a quoted field name with non-quoted table name correclty" do
73
+ field = SQLTree::Node::Field['table."field"']
74
+ field.table.should == 'table'
75
+ field.name.should == 'field'
76
+ end
77
+
78
+ it "should parse a non-quoted field name with quoted table name correclty" do
79
+ field = SQLTree::Node::Field['"table".field']
80
+ field.table.should == 'table'
81
+ field.name.should == 'field'
82
+ end
83
+ end
84
+ end