sparkql 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ # $Id$
2
+ #
3
+ # SparkQL grammar
4
+
5
+ class Sparkql::Parser
6
+ prechigh
7
+ nonassoc UMINUS
8
+ preclow
9
+ rule
10
+ target
11
+ : expressions
12
+ | /* none */ { result = 0 }
13
+ ;
14
+
15
+ expressions
16
+ : expression
17
+ | conjunction
18
+ ;
19
+
20
+ expression
21
+ : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
22
+ | group
23
+ ;
24
+
25
+ conjunction
26
+ : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
27
+ ;
28
+
29
+ group
30
+ : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
31
+ ;
32
+
33
+ field
34
+ : STANDARD_FIELD
35
+ | CUSTOM_FIELD
36
+ ;
37
+
38
+ condition
39
+ : literal
40
+ | literal_list
41
+ | function
42
+ ;
43
+
44
+ function
45
+ : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
46
+ | function_name LPAREN function_args RPAREN { result = tokenize_function(val[0], val[2]) }
47
+ ;
48
+
49
+ function_name
50
+ : KEYWORD
51
+ ;
52
+
53
+ function_args
54
+ : function_arg
55
+ | function_args COMMA function_arg
56
+ ;
57
+
58
+ function_arg
59
+ : literal
60
+ | literals
61
+ ;
62
+
63
+ literal_list
64
+ : literals
65
+ | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
66
+ ;
67
+
68
+ # Literals that support multiple
69
+ literals
70
+ : INTEGER
71
+ | DECIMAL
72
+ | CHARACTER
73
+ ;
74
+
75
+ # Literals that support single only
76
+ literal
77
+ : DATE
78
+ | DATETIME
79
+ | BOOLEAN
80
+ | NULL
81
+ ;
82
+
83
+ end
84
+
85
+ ---- header
86
+ # $Id$
87
+ ---- inner
88
+ include Sparkql::ParserTools
89
+ include Sparkql::ParserCompatibility
90
+
91
+ ---- footer
92
+
93
+ # END PARSER
@@ -0,0 +1,231 @@
1
+ # Required interface for existing parser implementations
2
+ module Sparkql::ParserCompatibility
3
+
4
+ MAXIMUM_MULTIPLE_VALUES = 25
5
+ MAXIMUM_EXPRESSIONS = 50
6
+ MAXIMUM_LEVEL_DEPTH = 2
7
+
8
+ # TODO I Really don't think this is required anymore
9
+ # Ordered by precedence.
10
+ FILTER_VALUES = [
11
+ {
12
+ :type => :datetime,
13
+ :regex => /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}\:[0-9]{2}\:[0-9]{2}\.[0-9]{6}$/,
14
+ :operators => Sparkql::Token::OPERATORS
15
+ },
16
+ {
17
+ :type => :date,
18
+ :regex => /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/,
19
+ :operators => Sparkql::Token::OPERATORS
20
+ },
21
+ {
22
+ :type => :character,
23
+ :regex => /^'([^'\\]*(\\.[^'\\]*)*)'$/, # Strings must be single quoted. Any inside single quotes must be escaped.
24
+ :multiple => /^'([^'\\]*(\\.[^'\\]*)*)'/,
25
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
26
+ },
27
+ {
28
+ :type => :integer,
29
+ :regex => /^\-?[0-9]+$/,
30
+ :multiple => /^\-?[0-9]+/,
31
+ :operators => Sparkql::Token::OPERATORS
32
+ },
33
+ {
34
+ :type => :decimal,
35
+ :regex => /^\-?[0-9]+\.[0-9]+$/,
36
+ :multiple => /^\-?[0-9]+\.[0-9]+/,
37
+ :operators => Sparkql::Token::OPERATORS
38
+ },
39
+ {
40
+ :type => :boolean,
41
+ :regex => /^true|false$/,
42
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
43
+ },
44
+ {
45
+ :type => :null,
46
+ :regex => /^NULL|Null|null$/,
47
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
48
+ }
49
+ ]
50
+
51
+ OPERATORS_SUPPORTING_MULTIPLES = ["Eq","Ne"]
52
+
53
+ # To be implemented by child class.
54
+ # Shall return a valid query string for the respective database,
55
+ # or nil if the source could not be processed. It may be possible to return a valid
56
+ # SQL string AND have errors ( as checked by errors? ), but this will be left
57
+ # to the discretion of the child class.
58
+ def compile( source, mapper )
59
+ raise NotImplementedError
60
+ end
61
+
62
+ # Returns a list of expressions tokenized in the following format:
63
+ # [{ :field => IdentifierName, :operator => "Eq", :value => "'Fargo'", :type => :character, :conjunction => "And" }]
64
+ # This step will set errors if source is not syntactically correct.
65
+ def tokenize( source )
66
+ raise ArgumentError, "You must supply a source string to tokenize!" unless source.is_a?(String)
67
+
68
+ # Reset the parser error stack
69
+ @errors = []
70
+
71
+ expressions = self.parse(source)
72
+ expressions
73
+ end
74
+
75
+ # Returns an array of errors. This is an array of ParserError objects
76
+ def errors
77
+ @errors = [] unless defined?(@errors)
78
+ @errors
79
+ end
80
+
81
+ # Delegator for methods to process the error list.
82
+ def process_errors
83
+ Sparkql::ErrorsProcessor.new(@errors)
84
+ end
85
+
86
+ # delegate :errors?, :fatal_errors?, :dropped_errors?, :recovered_errors?, :to => :process_errors
87
+ # Since I don't have rails delegate...
88
+ def errors?
89
+ process_errors.errors?
90
+ end
91
+ def fatal_errors?
92
+ process_errors.fatal_errors?
93
+ end
94
+ def dropped_errors?
95
+ process_errors.dropped_errors?
96
+ end
97
+ def recovered_errors?
98
+ process_errors.recovered_errors?
99
+ end
100
+
101
+ def escape_value_list( expression )
102
+ final_list = []
103
+ expression[:value].each do | value |
104
+ new_exp = {
105
+ :value => value,
106
+ :type => expression[:type]
107
+ }
108
+ final_list << escape_value(new_exp)
109
+ end
110
+ expression[:value] = final_list
111
+ end
112
+
113
+ def escape_value( expression )
114
+ if expression[:value].is_a? Array
115
+ return escape_value_list( expression )
116
+ end
117
+ case expression[:type]
118
+ when :character
119
+ return character_escape(expression[:value])
120
+ when :integer
121
+ return integer_escape(expression[:value])
122
+ when :decimal
123
+ return decimal_escape(expression[:value])
124
+ when :date
125
+ return date_escape(expression[:value])
126
+ when :datetime
127
+ return datetime_escape(expression[:value])
128
+ when :boolean
129
+ return boolean_escape(expression[:value])
130
+ when :null
131
+ return nil
132
+ end
133
+ expression[:value]
134
+ end
135
+
136
+ # processes escape characters for a given string. May be overridden by
137
+ # child classes.
138
+ def character_escape( string )
139
+ string.gsub(/^\'/,'').gsub(/\'$/,'').gsub(/\\'/, "'")
140
+ end
141
+
142
+ def integer_escape( string )
143
+ string.to_i
144
+ end
145
+
146
+ def decimal_escape( string )
147
+ string.to_f
148
+ end
149
+
150
+ def date_escape(string)
151
+ Date.parse(string)
152
+ end
153
+
154
+ def datetime_escape(string)
155
+ DateTime.parse(string)
156
+ end
157
+
158
+ def boolean_escape(string)
159
+ "true" == string
160
+ end
161
+
162
+ # Returns the rule hash for a given type
163
+ def rules_for_type( type )
164
+ FILTER_VALUES.each do |rule|
165
+ return rule if rule[:type] == type
166
+ end
167
+ nil
168
+ end
169
+
170
+ # true if a given type supports multiple values
171
+ def supports_multiple?( type )
172
+ rules_for_type(type).include?( :multiple )
173
+ end
174
+
175
+ # Maximum supported nesting level for the parser filters
176
+ def max_level_depth
177
+ MAXIMUM_LEVEL_DEPTH
178
+ end
179
+
180
+ private
181
+
182
+ def tokenizer_error( error_hash )
183
+ self.errors << Sparkql::ParserError.new( error_hash )
184
+ end
185
+ alias :compile_error :tokenizer_error
186
+
187
+ # Checks the type of an expression with what is expected.
188
+ def check_type!(expression, expected, supports_nulls = true)
189
+ if expected == expression[:type] || (supports_nulls && expression[:type] == :null)
190
+ return true
191
+ elsif expected == :datetime && expression[:type] == :date
192
+ expression[:type] = :datetime
193
+ expression[:cast] = :date
194
+ return true
195
+ end
196
+ type_error(expression, expected)
197
+ false
198
+ end
199
+
200
+ def type_error( expression, expected )
201
+ compile_error(:token => expression[:field], :expression => expression,
202
+ :message => "expected #{expected} but found #{expression[:type]}",
203
+ :status => :fatal )
204
+ end
205
+
206
+ # Builds the correct operator based on the type and the value.
207
+ # default should be the operator provided in the actual filter string
208
+ def get_operator(expression, default )
209
+ f = rules_for_type(expression[:type])
210
+ if f[:operators].include?(default)
211
+ if f[:multiple] && multiple_values?( expression[:value])
212
+ return nil unless operator_supports_multiples?(default)
213
+ return default == "Ne" ? "Not In" : "In"
214
+ elsif default == "Ne"
215
+ return "Not Eq"
216
+ end
217
+ return default
218
+ else
219
+ return nil
220
+ end
221
+ end
222
+
223
+ def multiple_values?(value)
224
+ Array(value).size > 1
225
+ end
226
+
227
+ def operator_supports_multiples?(operator)
228
+ OPERATORS_SUPPORTING_MULTIPLES.include?(operator)
229
+ end
230
+
231
+ end
@@ -0,0 +1,93 @@
1
+ # This is the guts of the parser internals and is mixed into the parser for organization.
2
+ module Sparkql::ParserTools
3
+
4
+ def parse(str)
5
+ @lexer = Sparkql::Lexer.new(str)
6
+ results = do_parse
7
+ max = Sparkql::ParserCompatibility::MAXIMUM_EXPRESSIONS
8
+ return if results.nil?
9
+ results.size > max ? results[0,max] : results
10
+ end
11
+
12
+ def next_token
13
+ t = @lexer.shift
14
+ while t[0] == :SPACE or t[0] == :NEWLINE
15
+ t = @lexer.shift
16
+ end
17
+ t
18
+ end
19
+
20
+ def tokenize_expression(field, op, val)
21
+ operator = get_operator(val,op) unless val.nil?
22
+ custom_field = field.start_with?('"')
23
+ block_group = (@lexer.level == 0) ? 0 : @lexer.block_group_identifier
24
+ expression = {:field => field, :operator => operator, :conjunction => 'And',
25
+ :level => @lexer.level, :block_group => block_group, :custom_field => custom_field}
26
+ expression = val.merge(expression) unless val.nil?
27
+ if @lexer.level > max_level_depth
28
+ compile_error(:token => "(", :expression => expression,
29
+ :message => "You have exceeded the maximum nesting level. Please nest no more than #{max_level_depth} levels deep.",
30
+ :status => :fatal, :syntax => false )
31
+ end
32
+ if operator.nil?
33
+ tokenizer_error(:token => op, :expression => expression,
34
+ :message => "Operator not supported for this type and value string", :status => :fatal )
35
+ end
36
+ [expression]
37
+ end
38
+
39
+ def tokenize_conjunction(exp1, conj, exp2)
40
+ exp2.first[:conjunction] = conj
41
+ exp1 + exp2
42
+ end
43
+
44
+ def tokenize_group(expressions)
45
+ @lexer.leveldown
46
+ expressions
47
+ end
48
+
49
+ def tokenize_multiple(lit1, lit2)
50
+ if lit1[:type] != lit2[:type]
51
+ tokenizer_error(:token => @lexer.last_field,
52
+ :message => "Type mismatch in field list.",
53
+ :status => :fatal,
54
+ :syntax => true)
55
+ end
56
+ array = Array(lit1[:value])
57
+ unless array.size >= Sparkql::ParserCompatibility::MAXIMUM_MULTIPLE_VALUES
58
+ array << lit2[:value]
59
+ end
60
+ {
61
+ :type => lit1[:type],
62
+ :value => array,
63
+ :multiple => "true" # TODO ?
64
+ }
65
+ end
66
+
67
+ def tokenize_function(name, f_args)
68
+ args = f_args.instance_of?(Array) ? f_args : [f_args]
69
+ args.each do |arg|
70
+ arg[:value] = escape_value(arg)
71
+ end
72
+ resolver = Sparkql::FunctionResolver.new(name, args)
73
+
74
+ resolver.validate
75
+ if(resolver.errors?)
76
+ errors += resolver.errors
77
+ return nil
78
+ else
79
+ return resolver.call()
80
+ end
81
+ end
82
+
83
+ def on_error(error_token_id, error_value, value_stack)
84
+ token_name = token_to_str(error_token_id)
85
+ token_name.downcase!
86
+ token = error_value.to_s.inspect
87
+ tokenizer_error(:token => @lexer.last_field,
88
+ :message => "Error parsing token #{token_name}",
89
+ :status => :fatal,
90
+ :syntax => true)
91
+ end
92
+
93
+ end
@@ -0,0 +1,21 @@
1
+ module Sparkql::Token
2
+ SPACE = /[\t ]+/
3
+ NEWLINE = /\r\n|\n\r|\r|\n/
4
+ LPAREN = /\(/
5
+ RPAREN = /\)/
6
+ KEYWORD = /[A-Za-z]+/
7
+ STANDARD_FIELD = /[A-Z]+[A-Za-z]*/
8
+ CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]+)\")/
9
+ INTEGER = /^\-?[0-9]+/
10
+ DECIMAL = /^\-?[0-9]+\.[0-9]+/
11
+ CHARACTER = /^'([^'\\]*(\\.[^'\\]*)*)'/
12
+ DATE = /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}/
13
+ DATETIME = /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}\:[0-9]{2}\:[0-9]{2}\.[0-9]{6}/
14
+ BOOLEAN = /^true|false/
15
+ NULL = /NULL|null|Null/
16
+ # Reserved words
17
+ EQUALITY_OPERATORS = ['Eq','Ne']
18
+ OPERATORS = ['Eq','Ne','Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
19
+ CONJUNCTIONS = ['And','Or']
20
+
21
+ end
@@ -0,0 +1,3 @@
1
+ module Sparkql
2
+ VERSION = File.read(File.dirname(__FILE__) + "/../../VERSION").chomp
3
+ end
data/lib/sparkql.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "sparkql/version"
2
+ require "sparkql/token"
3
+ require "sparkql/errors"
4
+ require "sparkql/expression_state"
5
+ require "sparkql/lexer"
6
+ require "sparkql/function_resolver"
7
+ require "sparkql/parser_tools"
8
+ require "sparkql/parser_compatibility"
9
+ require "sparkql/parser"
10
+
11
+ module Sparkql
12
+ # I AM A SPARKQLING MODULE!!!
13
+ end
data/sparkql.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "sparkql/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "sparkql"
7
+ s.version = Sparkql::VERSION
8
+ s.authors = ["Wade McEwen"]
9
+ s.email = ["wade@fbsdata.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{API Parser engine for filter searching}
12
+ s.description = %q{Specification and base implementation of the Spark API parsing system.}
13
+
14
+ s.rubyforge_project = "sparkql"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency 'racc', '1.4.8'
22
+ s.add_development_dependency 'flexmls_gems', '~> 0.2.9'
23
+ s.add_development_dependency 'rake', '~> 0.9.2'
24
+ s.add_development_dependency 'test-unit', '~> 2.1.0'
25
+ s.add_development_dependency 'ci_reporter', '~> 1.6'
26
+ s.add_development_dependency 'rcov', '~> 0.9.9'
27
+
28
+ end
@@ -0,0 +1,2 @@
1
+ require 'test/unit'
2
+ require 'sparkql'
@@ -0,0 +1,57 @@
1
+ require 'test_helper'
2
+
3
+ class ExpressionStateTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def setup
7
+ @subject = ExpressionState.new
8
+ @parser = Parser.new
9
+ end
10
+
11
+ def test_needs_join
12
+ filter = '"General Property Description"."Taxes" Lt 500.0'
13
+ process(filter)
14
+ assert @subject.needs_join?
15
+ end
16
+
17
+ def test_or
18
+ filter = '"General Property Description"."Taxes" Lt 500.0 Or "General Property Description"."Taxes" Gt 400.0'
19
+ process(filter)
20
+ assert !@subject.needs_join?, "#{@subject.inspect} Expressions:#{ @expressions.inspect}"
21
+ end
22
+
23
+ def test_and
24
+ filter = '"General Property Description"."Taxes" Lt 500.0 And "General Property Description"."Taxes2" Eq 1.0'
25
+ process(filter)
26
+ assert @subject.needs_join?
27
+ end
28
+
29
+ def test_and_or
30
+ filter = '"General Property Description"."Taxes" Lt 500.0 And "General Property Description"."Taxes2" ' +
31
+ 'Eq 1.0 Or "General Property Description"."Taxes" Gt 400.0'
32
+ process(filter)
33
+ assert !@subject.needs_join?
34
+ end
35
+
36
+ def test_or_and
37
+ filter = '"General Property Description"."Taxes" Lt 500.0 Or "General Property Description"."Taxes" ' +
38
+ 'Gt 400.0 And "General Property Description"."Taxes2" Eq 1.0'
39
+ process(filter)
40
+ assert @subject.needs_join?
41
+ end
42
+
43
+ def test_or_with_standard_field
44
+ filter = 'Test Eq 0.0 Or "General Property Description"."Taxes" Lt 500.0'
45
+ process(filter)
46
+ assert @subject.needs_join?
47
+ end
48
+
49
+ def process(filter)
50
+ @expressions = @parser.parse(filter)
51
+ @expressions.each do |ex|
52
+ @subject.push(ex) if ex[:custom_field] == true
53
+ end
54
+ @expressions
55
+ end
56
+
57
+ end
@@ -0,0 +1,50 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def test_now
7
+ start = Time.now
8
+ f = FunctionResolver.new('now', [])
9
+ f.validate
10
+ assert !f.errors?, "Errors #{f.errors.inspect}"
11
+ value = f.call
12
+ assert_equal :datetime, value[:type]
13
+ test_time = Time.parse(value[:value])
14
+ assert (-5 < test_time - start && 5 > test_time - start), "Time range off by more than five seconds #{test_time - start} '#{test_time} - #{start}'"
15
+ end
16
+
17
+ def test_day
18
+ d = Date.today
19
+ dt = DateTime.new(d.year, d.month,d.day, 0,0,0, DateTime.now.offset)
20
+ start = Time.parse(dt.to_s)
21
+ f = FunctionResolver.new('days', [{:type=>:integer, :value =>7}])
22
+ f.validate
23
+ assert !f.errors?, "Errors #{f.errors.inspect}"
24
+ value = f.call
25
+ assert_equal :date, value[:type]
26
+ test_time = Time.parse(value[:value])
27
+ assert (605000 > test_time - start && 604000 < test_time - start), "Time range off by more than five seconds #{test_time - start} '#{test_time} - #{start}'"
28
+ end
29
+
30
+ def test_invalid_param
31
+ f = FunctionResolver.new('now', [{:type => :character, :value=>'bad value'}])
32
+ f.validate
33
+ assert f.errors?, "'now' function does not support parameters"
34
+
35
+ f = FunctionResolver.new('days', [])
36
+ f.validate
37
+ assert f.errors?, "'days' function requires one parameter"
38
+
39
+ f = FunctionResolver.new('days', [{:type => :character, :value=>'bad value'}])
40
+ f.validate
41
+ assert f.errors?, "'days' function needs integer parameter"
42
+ end
43
+
44
+ def test_invalid_function
45
+ f = FunctionResolver.new('then', [])
46
+ f.validate
47
+ assert f.errors?, "'then' is not a function"
48
+ end
49
+
50
+ end
@@ -0,0 +1,29 @@
1
+ require 'test_helper'
2
+
3
+ class LexerTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def test_check_reserved_words_standard_fields
7
+ ["OrOrOr Eq true", "Equador Eq true", "Oregon Ge 10"].each do |standard_field|
8
+ @lexer = Lexer.new(standard_field)
9
+ token = @lexer.shift
10
+ assert_equal :STANDARD_FIELD, token.first, standard_field
11
+ end
12
+ end
13
+ def test_check_reserved_words_conjunctions
14
+ ['And Derp', 'Or 123'].each do |conjunction|
15
+ @lexer = Lexer.new(conjunction)
16
+ token = @lexer.shift
17
+ assert_equal :CONJUNCTION, token.first, conjunction
18
+ end
19
+ end
20
+
21
+ def test_check_reserved_words_operators
22
+ ['Eq Derp', 'Gt 123'].each do |op|
23
+ @lexer = Lexer.new(op)
24
+ token = @lexer.shift
25
+ assert_equal :OPERATOR, token.first, op
26
+ end
27
+ end
28
+
29
+ end