sql_tree 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ # The <tt>SQLTree::Tokenizer</tt> class transforms a string or stream of
2
+ # characters into a enumeration of tokens, that are more appropriate for
3
+ # the SQL parser to work with.
4
+ #
5
+ # An example:
6
+ #
7
+ # >> SQLTree::Tokenizer.new.tokenize('SELECT * FROM table')
8
+ # => [:select, :all, :from, Variable('table')]
9
+ #
10
+ # The <tt>tokenize</tt> method will return an array of tokens, while
11
+ # the <tt>each_token</tt> (aliased to <tt>each</tt>) will yield every
12
+ # token one by one.
13
+ class SQLTree::Tokenizer
14
+
15
+ include Enumerable
16
+
17
+ # The keyword queue, on which kywords are placed before they are yielded
18
+ # to the parser, to enable keyword combining (e.g. NOT LIKE)
19
+ attr_reader :keyword_queue
20
+
21
+ def initialize # :nodoc:
22
+ @keyword_queue = []
23
+ end
24
+
25
+ # Returns an array of tokens for the given string.
26
+ # <tt>string</tt>:: the string to tokenize
27
+ def tokenize(string)
28
+ @string = string
29
+ @current_char_pos = -1
30
+ self.entries
31
+ end
32
+
33
+ # Returns the current character that is being tokenized
34
+ def current_char
35
+ @current_char
36
+ end
37
+
38
+ # Returns the next character to tokenize, but does not move
39
+ # the pointer of the current character forward.
40
+ # <tt>lookahead</tt>:: how many positions forward to peek.
41
+ def peek_char(lookahead = 1)
42
+ @string[@current_char_pos + lookahead, 1]
43
+ end
44
+
45
+ # Returns the next character to tokenize, and moves the pointer
46
+ # of the current character one position forward.
47
+ def next_char
48
+ @current_char_pos += 1
49
+ @current_char = @string[@current_char_pos, 1]
50
+ end
51
+
52
+ # Combines several tokens to a single token if possible, and
53
+ # yields teh result, or yields every single token if they cannot
54
+ # be combined.
55
+ # <tt>token</tt>:: the token to yield or combine
56
+ # <tt>block</tt>:: the block to yield tokens and combined tokens to.
57
+ def handle_token(token, &block) # :yields: SQLTree::Token
58
+ if token.kind_of?(SQLTree::Token::Keyword)
59
+ keyword_queue.push(token)
60
+ else
61
+ empty_keyword_queue!(&block)
62
+ block.call(token)
63
+ end
64
+ end
65
+
66
+ # This method ensures that every keyword currently in the queue is
67
+ # yielded. This method get called by <tt>handle_token</tt> when it
68
+ # knows for sure that the keywords on the queue cannot be combined
69
+ # into a single keyword.
70
+ # <tt>block</tt>:: the block to yield the tokens on the queue to.
71
+ def empty_keyword_queue!(&block) # :yields: SQLTree::Token
72
+ block.call(@keyword_queue.shift) until @keyword_queue.empty?
73
+ end
74
+
75
+ # Iterator method that yields each token that is encountered in the
76
+ # SQL stream. These tokens are passed to the SQL parser to construct
77
+ # a syntax tree for the SQL query.
78
+ #
79
+ # This method is aliased to <tt>:each</tt> to make the Enumerable
80
+ # methods work on this method.
81
+ def each_token(&block) # :yields: SQLTree::Token
82
+ while next_char
83
+ case current_char
84
+ when /^\s?$/; # whitespace, go to next character
85
+ when '('; handle_token(SQLTree::Token::LPAREN, &block)
86
+ when ')'; handle_token(SQLTree::Token::RPAREN, &block)
87
+ when '.'; handle_token(SQLTree::Token::DOT, &block)
88
+ when ','; handle_token(SQLTree::Token::COMMA, &block)
89
+ when /\d/; tokenize_number(&block)
90
+ when "'"; tokenize_quoted_string(&block)
91
+ when OPERATOR_CHARS; tokenize_operator(&block)
92
+ when /\w/; tokenize_keyword(&block)
93
+ when '"'; tokenize_quoted_variable(&block) # TODO: allow MySQL quoting mode
94
+ end
95
+ end
96
+
97
+ # Make sure to yield any tokens that are still stashed on the queue.
98
+ empty_keyword_queue!(&block)
99
+ end
100
+
101
+ alias :each :each_token
102
+
103
+ # Tokenizes a eyword in the code. This can either be a reserved SQL keyword
104
+ # or a variable. This method will yield variables directly. Keywords will be
105
+ # yielded with a delay, because they may need to be combined with other
106
+ # keywords in the <tt>handle_token</tt> method.
107
+ def tokenize_keyword(&block) # :yields: SQLTree::Token
108
+ literal = current_char
109
+ literal << next_char while /[\w]/ =~ peek_char
110
+
111
+ if SQLTree::Token::KEYWORDS.include?(literal.upcase)
112
+ handle_token(SQLTree::Token.const_get(literal.upcase), &block)
113
+ else
114
+ handle_token(SQLTree::Token::Variable.new(literal), &block)
115
+ end
116
+ end
117
+
118
+ # Tokenizes a number (either an integer or float) in the SQL stream.
119
+ # This method will yield the token after the last digit of the number
120
+ # has been encountered.
121
+ def tokenize_number(&block) # :yields: SQLTree::Token::Number
122
+ number = current_char
123
+ dot_encountered = false
124
+ while /\d/ =~ peek_char || (peek_char == '.' && !dot_encountered)
125
+ dot_encountered = true if peek_char == '.'
126
+ number << next_char
127
+ end
128
+
129
+ if dot_encountered
130
+ handle_token(SQLTree::Token::Number.new(number.to_f), &block)
131
+ else
132
+ handle_token(SQLTree::Token::Number.new(number.to_i), &block)
133
+ end
134
+ end
135
+
136
+ # Reads a quoted string token from the SQL stream. This method will
137
+ # yield an SQLTree::Token::String when the closing quote character is
138
+ # encountered.
139
+ def tokenize_quoted_string(&block) # :yields: SQLTree::Token::String
140
+ string = ''
141
+ until next_char.nil? || current_char == "'"
142
+ string << (current_char == "\\" ? next_char : current_char)
143
+ end
144
+ handle_token(SQLTree::Token::String.new(string), &block)
145
+ end
146
+
147
+ # Tokenize a quoted variable from the SQL stream. This method will
148
+ # yield an SQLTree::Token::Variable when to closing quote is found.
149
+ #
150
+ # The actual quote character that is used depends on the DBMS. For now,
151
+ # only the more standard double quote is accepted.
152
+ def tokenize_quoted_variable(&block) # :yields: SQLTree::Token::Variable
153
+ variable = ''
154
+ until next_char.nil? || current_char == '"' # TODO: allow MySQL quoting mode
155
+ variable << (current_char == "\\" ? next_char : current_char)
156
+ end
157
+ handle_token(SQLTree::Token::Variable.new(variable), &block)
158
+ end
159
+
160
+ # A regular expression that matches all operator characters.
161
+ OPERATOR_CHARS = /\=|<|>|!|\-|\+|\/|\*|\%/
162
+
163
+ # Tokenizes an operator in the SQL stream. This method will yield the
164
+ # operator token when the last character of the token is encountered.
165
+ def tokenize_operator(&block) # :yields: SQLTree::Token
166
+ operator = current_char
167
+ if operator == '-' && /[\d\.]/ =~ peek_char
168
+ tokenize_number(&block)
169
+ else
170
+ operator << next_char if SQLTree::Token::OPERATORS_HASH.has_key?(operator + peek_char)
171
+ handle_token(SQLTree::Token.const_get(SQLTree::Token::OPERATORS_HASH[operator].to_s.upcase), &block)
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,5 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree, :API do
4
+
5
+ end
@@ -0,0 +1,21 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree, 'parsing and generating SQL' do
4
+
5
+ it "should parse and generate SQL fo a simple list query" do
6
+ SQLTree["SELECT * FROM table"].to_sql.should == 'SELECT * FROM "table"'
7
+ end
8
+
9
+ it "should parse and generate the DISTINCT keyword" do
10
+ SQLTree["SELECT DISTINCT * FROM table"].to_sql.should == 'SELECT DISTINCT * FROM "table"'
11
+ end
12
+
13
+ it 'should parse and generate table aliases' do
14
+ SQLTree["SELECT a.* FROM table AS a"].to_sql.should == 'SELECT "a".* FROM "table" AS "a"'
15
+ end
16
+
17
+ it "parse and generate a complex SQL query" do
18
+ SQLTree['SELECT a.*, MD5( a.name ) AS checksum FROM table AS a , other WHERE other.timestamp > a.timestamp'].to_sql.should ==
19
+ 'SELECT "a".*, MD5("a"."name") AS "checksum" FROM "table" AS "a", "other" WHERE ("other"."timestamp" > "a"."timestamp")'
20
+ end
21
+ end
@@ -0,0 +1,84 @@
1
+ class ParseAs
2
+
3
+ def initialize(expected_tree)
4
+ @expected_tree = expected_tree
5
+ end
6
+
7
+ def matches?(found_tree)
8
+ @found_tree = found_tree.to_tree
9
+ return @found_tree == @expected_tree
10
+ end
11
+
12
+ def description
13
+ "expected to parse to #{@expected_tree.inspect}"
14
+ end
15
+
16
+ def failure_message
17
+ " #{@expected_tree.inspect} expected, but found #{@found_tree.inspect}"
18
+ end
19
+
20
+ def negative_failure_message
21
+ " expected not to be tokenized to #{@expected_tree.inspect}"
22
+ end
23
+ end
24
+
25
+ def parse_as(tree)
26
+ ParseAs.new(tree)
27
+ end
28
+
29
+ class TokenizeTo
30
+
31
+ def initialize(expected_tokens)
32
+ @expected_tokens = expected_tokens.map do |t|
33
+ case t
34
+ when SQLTree::Token then t
35
+ when String then SQLTree::Token::String.new(t)
36
+ when Numeric then SQLTree::Token::Number.new(t)
37
+ when Symbol then SQLTree::Token.const_get(t.to_s.upcase)
38
+ else "Cannot check for this token: #{t.inspect}!"
39
+ end
40
+ end
41
+ end
42
+
43
+ def matches?(found_tokens)
44
+ @found_tokens = found_tokens
45
+ return @found_tokens == @expected_tokens
46
+ end
47
+
48
+ def description
49
+ "expected to tokenized to #{@expected_tokens.inspect}"
50
+ end
51
+
52
+ def failure_message
53
+ " #{@expected_tokens.inspect} expected, but found #{@found_tokens.inspect}"
54
+ end
55
+
56
+ def negative_failure_message
57
+ " expected not to be tokenized to #{@expected_tokens.inspect}"
58
+ end
59
+
60
+ end
61
+
62
+ def tokenize_to(*expected_tokens)
63
+ TokenizeTo.new(expected_tokens)
64
+ end
65
+
66
+ def sql_var(name)
67
+ SQLTree::Token::Variable.new(name.to_s)
68
+ end
69
+
70
+ def dot
71
+ SQLTree::Token::DOT
72
+ end
73
+
74
+ def comma
75
+ SQLTree::Token::COMMA
76
+ end
77
+
78
+ def lparen
79
+ SQLTree::Token::LPAREN
80
+ end
81
+
82
+ def rparen
83
+ SQLTree::Token::RPAREN
84
+ end
@@ -0,0 +1,26 @@
1
+ $:.reject! { |e| e.include? 'TextMate' }
2
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'sql_tree'
7
+
8
+ module SQLTree::Spec
9
+ module NodeLoader
10
+ def self.const_missing(const)
11
+ SQLTree::Node.const_get(const)
12
+ end
13
+ end
14
+
15
+ module TokenLoader
16
+ def self.const_missing(const)
17
+ SQLTree::Token.const_get(const)
18
+ end
19
+ end
20
+ end
21
+
22
+ Spec::Runner.configure do |config|
23
+
24
+ end
25
+
26
+ require "#{File.dirname(__FILE__)}/lib/matchers"
@@ -0,0 +1,102 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree::Node::Expression do
4
+
5
+ describe '.parse' do
6
+ it "shoud parse a value correctly" do
7
+ SQLTree::Node::Expression['123'].should == SQLTree::Node::Value.new(123)
8
+ end
9
+
10
+ it "shoud parse a function call without arguments correctly" do
11
+ function = SQLTree::Node::Expression['NOW()']
12
+ function.function.should == 'NOW'
13
+ function.arguments.should be_empty
14
+ end
15
+
16
+ it "shoud parse a function call with arguments correctly" do
17
+ function = SQLTree::Node::Expression["MD5('string')"]
18
+ function.function.should == 'MD5'
19
+ function.arguments.should == [SQLTree::Node::Value.new('string')]
20
+ end
21
+
22
+ it "should parse a logical OR expression correctly" do
23
+ logical = SQLTree::Node::Expression["'this' OR 'that"]
24
+ logical.operator.should == :or
25
+ logical.expressions.should == [SQLTree::Node::Value.new('this'), SQLTree::Node::Value.new('that')]
26
+ end
27
+
28
+ it "should parse a logical AND expression correctly" do
29
+ logical = SQLTree::Node::Expression['1 AND 2']
30
+ logical.operator.should == :and
31
+ logical.expressions == [SQLTree::Node::Value.new(1), SQLTree::Node::Value.new(2)]
32
+ end
33
+
34
+ it "should nest a logical AND expression correctly" do
35
+ logical = SQLTree::Node::Expression['1 AND 2 AND 3']
36
+ logical.should == SQLTree::Node::Expression['(1 AND 2) AND 3']
37
+ end
38
+
39
+ it "should nest expressions correctly when parentheses are used" do
40
+ logical = SQLTree::Node::Expression['1 AND (2 AND 3)']
41
+ logical.should_not == SQLTree::Node::Expression['(1 AND 2) AND 3']
42
+ end
43
+
44
+ it "should parse a NOT expression without parenteheses correctly" do
45
+ SQLTree::Node::Expression['NOT 1'].should == SQLTree::Node::LogicalNotExpression.new(SQLTree::Node::Value.new(1))
46
+ end
47
+
48
+ it "should parse a NOT expression without parenteheses correctly" do
49
+ SQLTree::Node::Expression['NOT(1)'].should == SQLTree::Node::LogicalNotExpression.new(SQLTree::Node::Value.new(1))
50
+ end
51
+
52
+ it "should parse a comparison expression correctly" do
53
+ comparison = SQLTree::Node::Expression['1 < 2']
54
+ comparison.operator.should == '<'
55
+ comparison.lhs.should == SQLTree::Node::Value.new(1)
56
+ comparison.rhs.should == SQLTree::Node::Value.new(2)
57
+ end
58
+
59
+ it "should parse an IS NULL expression corectly" do
60
+ comparison = SQLTree::Node::Expression['field IS NULL']
61
+ comparison.operator.should == 'IS'
62
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
63
+ comparison.rhs.should == SQLTree::Node::Value.new(nil)
64
+ end
65
+
66
+ it "should parse an IS NOT NULL expression corectly" do
67
+ comparison = SQLTree::Node::Expression['field IS NOT NULL']
68
+ comparison.operator.should == 'IS NOT'
69
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
70
+ comparison.rhs.should == SQLTree::Node::Value.new(nil)
71
+ end
72
+
73
+ it "should parse a LIKE expression corectly" do
74
+ comparison = SQLTree::Node::Expression["field LIKE '%search%"]
75
+ comparison.operator.should == 'LIKE'
76
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
77
+ comparison.rhs.should == SQLTree::Node::Value.new('%search%')
78
+ end
79
+
80
+ it "should parse a NOT ILIKE expression corectly" do
81
+ comparison = SQLTree::Node::Expression["field NOT ILIKE '%search%"]
82
+ comparison.operator.should == 'NOT ILIKE'
83
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
84
+ comparison.rhs.should == SQLTree::Node::Value.new('%search%')
85
+ end
86
+
87
+ it "should parse an IN expression correctly" do
88
+ comparison = SQLTree::Node::Expression["field IN (1,2,3,4)"]
89
+ comparison.operator.should == 'IN'
90
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
91
+ comparison.rhs.should be_kind_of(SQLTree::Node::SetExpression)
92
+ end
93
+
94
+ it "should parse a NOT IN expression correctly" do
95
+ comparison = SQLTree::Node::Expression["field NOT IN (1>2, 3+6, 99)"]
96
+ comparison.operator.should == 'NOT IN'
97
+ comparison.lhs.should == SQLTree::Node::Variable.new('field')
98
+ comparison.rhs.should be_kind_of(SQLTree::Node::SetExpression)
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,84 @@
1
+ require "#{File.dirname(__FILE__)}/../spec_helper"
2
+
3
+ describe SQLTree::Node::Value do
4
+
5
+ describe '.parse' do
6
+ it "should not parse a field name" do
7
+ lambda { SQLTree::Node::Value['field_name'] }.should raise_error(SQLTree::Parser::UnexpectedToken)
8
+ end
9
+
10
+ it "should parse an integer value correctly" do
11
+ SQLTree::Node::Value['123'].value.should == 123
12
+ end
13
+
14
+ it "should parse a string correctly" do
15
+ SQLTree::Node::Value["'123'"].value.should == '123'
16
+ end
17
+
18
+ it "should parse a NULL value correctly" do
19
+ SQLTree::Node::Value['NULL'].value.should == nil
20
+ end
21
+
22
+ end
23
+ end
24
+
25
+ describe SQLTree::Node::Variable do
26
+
27
+ describe '.parse' do
28
+ it "should parse a variable name correctly" do
29
+ SQLTree::Node::Field['variable'].name.should == 'variable'
30
+ end
31
+
32
+ it "should parse a quoted variable name correctly" do
33
+ SQLTree::Node::Field['"variable"'].name.should == 'variable'
34
+ end
35
+
36
+ it "should raise an error when parsing a reserved keyword as variable" do
37
+ lambda { SQLTree::Node::Field['select'] }.should raise_error(SQLTree::Parser::UnexpectedToken)
38
+ end
39
+
40
+ it "should parse a quoted reserved keyword as variable name correctly" do
41
+ SQLTree::Node::Field['"select"'].name.should == 'select'
42
+ end
43
+ end
44
+ end
45
+
46
+ describe SQLTree::Node::Field do
47
+ describe '.parse' do
48
+ it "should parse a field name with table name correclty" do
49
+ field = SQLTree::Node::Field['table.field']
50
+ field.table.should == 'table'
51
+ field.name.should == 'field'
52
+ end
53
+
54
+ it "should parse a field name without table name correclty" do
55
+ field = SQLTree::Node::Field['field']
56
+ field.table.should be_nil
57
+ field.name.should == 'field'
58
+ end
59
+
60
+ it "should parse a quoted field name without table name correclty" do
61
+ field = SQLTree::Node::Field['"field"']
62
+ field.table.should be_nil
63
+ field.name.should == 'field'
64
+ end
65
+
66
+ it "should parse a quoted field name with quoted table name correclty" do
67
+ field = SQLTree::Node::Field['"table"."field"']
68
+ field.table.should == 'table'
69
+ field.name.should == 'field'
70
+ end
71
+
72
+ it "should parse a quoted field name with non-quoted table name correclty" do
73
+ field = SQLTree::Node::Field['table."field"']
74
+ field.table.should == 'table'
75
+ field.name.should == 'field'
76
+ end
77
+
78
+ it "should parse a non-quoted field name with quoted table name correclty" do
79
+ field = SQLTree::Node::Field['"table".field']
80
+ field.table.should == 'table'
81
+ field.name.should == 'field'
82
+ end
83
+ end
84
+ end