sparkql 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ # $Id$
2
+ #
3
+ # SparkQL grammar
4
+
5
+ class Sparkql::Parser
6
+ prechigh
7
+ nonassoc UMINUS
8
+ preclow
9
+ rule
10
+ target
11
+ : expressions
12
+ | /* none */ { result = 0 }
13
+ ;
14
+
15
+ expressions
16
+ : expression
17
+ | conjunction
18
+ ;
19
+
20
+ expression
21
+ : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
22
+ | group
23
+ ;
24
+
25
+ conjunction
26
+ : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
27
+ ;
28
+
29
+ group
30
+ : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
31
+ ;
32
+
33
+ field
34
+ : STANDARD_FIELD
35
+ | CUSTOM_FIELD
36
+ ;
37
+
38
+ condition
39
+ : literal
40
+ | literal_list
41
+ | function
42
+ ;
43
+
44
+ function
45
+ : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
46
+ | function_name LPAREN function_args RPAREN { result = tokenize_function(val[0], val[2]) }
47
+ ;
48
+
49
+ function_name
50
+ : KEYWORD
51
+ ;
52
+
53
+ function_args
54
+ : function_arg
55
+ | function_args COMMA function_arg
56
+ ;
57
+
58
+ function_arg
59
+ : literal
60
+ | literals
61
+ ;
62
+
63
+ literal_list
64
+ : literals
65
+ | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
66
+ ;
67
+
68
+ # Literals that support multiple
69
+ literals
70
+ : INTEGER
71
+ | DECIMAL
72
+ | CHARACTER
73
+ ;
74
+
75
+ # Literals that support single only
76
+ literal
77
+ : DATE
78
+ | DATETIME
79
+ | BOOLEAN
80
+ | NULL
81
+ ;
82
+
83
+ end
84
+
85
+ ---- header
86
+ # $Id$
87
+ ---- inner
88
+ include Sparkql::ParserTools
89
+ include Sparkql::ParserCompatibility
90
+
91
+ ---- footer
92
+
93
+ # END PARSER
@@ -0,0 +1,231 @@
1
+ # Required interface for existing parser implementations
2
+ module Sparkql::ParserCompatibility
3
+
4
+ MAXIMUM_MULTIPLE_VALUES = 25
5
+ MAXIMUM_EXPRESSIONS = 50
6
+ MAXIMUM_LEVEL_DEPTH = 2
7
+
8
+ # TODO I Really don't think this is required anymore
9
+ # Ordered by precedence.
10
+ FILTER_VALUES = [
11
+ {
12
+ :type => :datetime,
13
+ :regex => /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}\:[0-9]{2}\:[0-9]{2}\.[0-9]{6}$/,
14
+ :operators => Sparkql::Token::OPERATORS
15
+ },
16
+ {
17
+ :type => :date,
18
+ :regex => /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/,
19
+ :operators => Sparkql::Token::OPERATORS
20
+ },
21
+ {
22
+ :type => :character,
23
+ :regex => /^'([^'\\]*(\\.[^'\\]*)*)'$/, # Strings must be single quoted. Any inside single quotes must be escaped.
24
+ :multiple => /^'([^'\\]*(\\.[^'\\]*)*)'/,
25
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
26
+ },
27
+ {
28
+ :type => :integer,
29
+ :regex => /^\-?[0-9]+$/,
30
+ :multiple => /^\-?[0-9]+/,
31
+ :operators => Sparkql::Token::OPERATORS
32
+ },
33
+ {
34
+ :type => :decimal,
35
+ :regex => /^\-?[0-9]+\.[0-9]+$/,
36
+ :multiple => /^\-?[0-9]+\.[0-9]+/,
37
+ :operators => Sparkql::Token::OPERATORS
38
+ },
39
+ {
40
+ :type => :boolean,
41
+ :regex => /^true|false$/,
42
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
43
+ },
44
+ {
45
+ :type => :null,
46
+ :regex => /^NULL|Null|null$/,
47
+ :operators => Sparkql::Token::EQUALITY_OPERATORS
48
+ }
49
+ ]
50
+
51
+ OPERATORS_SUPPORTING_MULTIPLES = ["Eq","Ne"]
52
+
53
+ # To be implemented by child class.
54
+ # Shall return a valid query string for the respective database,
55
+ # or nil if the source could not be processed. It may be possible to return a valid
56
+ # SQL string AND have errors ( as checked by errors? ), but this will be left
57
+ # to the discretion of the child class.
58
+ def compile( source, mapper )
59
+ raise NotImplementedError
60
+ end
61
+
62
+ # Returns a list of expressions tokenized in the following format:
63
+ # [{ :field => IdentifierName, :operator => "Eq", :value => "'Fargo'", :type => :character, :conjunction => "And" }]
64
+ # This step will set errors if source is not syntactically correct.
65
+ def tokenize( source )
66
+ raise ArgumentError, "You must supply a source string to tokenize!" unless source.is_a?(String)
67
+
68
+ # Reset the parser error stack
69
+ @errors = []
70
+
71
+ expressions = self.parse(source)
72
+ expressions
73
+ end
74
+
75
+ # Returns an array of errors. This is an array of ParserError objects
76
+ def errors
77
+ @errors = [] unless defined?(@errors)
78
+ @errors
79
+ end
80
+
81
+ # Delegator for methods to process the error list.
82
+ def process_errors
83
+ Sparkql::ErrorsProcessor.new(@errors)
84
+ end
85
+
86
+ # delegate :errors?, :fatal_errors?, :dropped_errors?, :recovered_errors?, :to => :process_errors
87
+ # Since I don't have rails delegate...
88
+ def errors?
89
+ process_errors.errors?
90
+ end
91
+ def fatal_errors?
92
+ process_errors.fatal_errors?
93
+ end
94
+ def dropped_errors?
95
+ process_errors.dropped_errors?
96
+ end
97
+ def recovered_errors?
98
+ process_errors.recovered_errors?
99
+ end
100
+
101
+ def escape_value_list( expression )
102
+ final_list = []
103
+ expression[:value].each do | value |
104
+ new_exp = {
105
+ :value => value,
106
+ :type => expression[:type]
107
+ }
108
+ final_list << escape_value(new_exp)
109
+ end
110
+ expression[:value] = final_list
111
+ end
112
+
113
+ def escape_value( expression )
114
+ if expression[:value].is_a? Array
115
+ return escape_value_list( expression )
116
+ end
117
+ case expression[:type]
118
+ when :character
119
+ return character_escape(expression[:value])
120
+ when :integer
121
+ return integer_escape(expression[:value])
122
+ when :decimal
123
+ return decimal_escape(expression[:value])
124
+ when :date
125
+ return date_escape(expression[:value])
126
+ when :datetime
127
+ return datetime_escape(expression[:value])
128
+ when :boolean
129
+ return boolean_escape(expression[:value])
130
+ when :null
131
+ return nil
132
+ end
133
+ expression[:value]
134
+ end
135
+
136
+ # processes escape characters for a given string. May be overridden by
137
+ # child classes.
138
+ def character_escape( string )
139
+ string.gsub(/^\'/,'').gsub(/\'$/,'').gsub(/\\'/, "'")
140
+ end
141
+
142
+ def integer_escape( string )
143
+ string.to_i
144
+ end
145
+
146
+ def decimal_escape( string )
147
+ string.to_f
148
+ end
149
+
150
+ def date_escape(string)
151
+ Date.parse(string)
152
+ end
153
+
154
+ def datetime_escape(string)
155
+ DateTime.parse(string)
156
+ end
157
+
158
+ def boolean_escape(string)
159
+ "true" == string
160
+ end
161
+
162
+ # Returns the rule hash for a given type
163
+ def rules_for_type( type )
164
+ FILTER_VALUES.each do |rule|
165
+ return rule if rule[:type] == type
166
+ end
167
+ nil
168
+ end
169
+
170
+ # true if a given type supports multiple values
171
+ def supports_multiple?( type )
172
+ rules_for_type(type).include?( :multiple )
173
+ end
174
+
175
+ # Maximum supported nesting level for the parser filters
176
+ def max_level_depth
177
+ MAXIMUM_LEVEL_DEPTH
178
+ end
179
+
180
+ private
181
+
182
+ def tokenizer_error( error_hash )
183
+ self.errors << Sparkql::ParserError.new( error_hash )
184
+ end
185
+ alias :compile_error :tokenizer_error
186
+
187
+ # Checks the type of an expression with what is expected.
188
+ def check_type!(expression, expected, supports_nulls = true)
189
+ if expected == expression[:type] || (supports_nulls && expression[:type] == :null)
190
+ return true
191
+ elsif expected == :datetime && expression[:type] == :date
192
+ expression[:type] = :datetime
193
+ expression[:cast] = :date
194
+ return true
195
+ end
196
+ type_error(expression, expected)
197
+ false
198
+ end
199
+
200
+ def type_error( expression, expected )
201
+ compile_error(:token => expression[:field], :expression => expression,
202
+ :message => "expected #{expected} but found #{expression[:type]}",
203
+ :status => :fatal )
204
+ end
205
+
206
+ # Builds the correct operator based on the type and the value.
207
+ # default should be the operator provided in the actual filter string
208
+ def get_operator(expression, default )
209
+ f = rules_for_type(expression[:type])
210
+ if f[:operators].include?(default)
211
+ if f[:multiple] && multiple_values?( expression[:value])
212
+ return nil unless operator_supports_multiples?(default)
213
+ return default == "Ne" ? "Not In" : "In"
214
+ elsif default == "Ne"
215
+ return "Not Eq"
216
+ end
217
+ return default
218
+ else
219
+ return nil
220
+ end
221
+ end
222
+
223
+ def multiple_values?(value)
224
+ Array(value).size > 1
225
+ end
226
+
227
+ def operator_supports_multiples?(operator)
228
+ OPERATORS_SUPPORTING_MULTIPLES.include?(operator)
229
+ end
230
+
231
+ end
@@ -0,0 +1,93 @@
1
+ # This is the guts of the parser internals and is mixed into the parser for organization.
2
+ module Sparkql::ParserTools
3
+
4
+ def parse(str)
5
+ @lexer = Sparkql::Lexer.new(str)
6
+ results = do_parse
7
+ max = Sparkql::ParserCompatibility::MAXIMUM_EXPRESSIONS
8
+ return if results.nil?
9
+ results.size > max ? results[0,max] : results
10
+ end
11
+
12
+ def next_token
13
+ t = @lexer.shift
14
+ while t[0] == :SPACE or t[0] == :NEWLINE
15
+ t = @lexer.shift
16
+ end
17
+ t
18
+ end
19
+
20
+ def tokenize_expression(field, op, val)
21
+ operator = get_operator(val,op) unless val.nil?
22
+ custom_field = field.start_with?('"')
23
+ block_group = (@lexer.level == 0) ? 0 : @lexer.block_group_identifier
24
+ expression = {:field => field, :operator => operator, :conjunction => 'And',
25
+ :level => @lexer.level, :block_group => block_group, :custom_field => custom_field}
26
+ expression = val.merge(expression) unless val.nil?
27
+ if @lexer.level > max_level_depth
28
+ compile_error(:token => "(", :expression => expression,
29
+ :message => "You have exceeded the maximum nesting level. Please nest no more than #{max_level_depth} levels deep.",
30
+ :status => :fatal, :syntax => false )
31
+ end
32
+ if operator.nil?
33
+ tokenizer_error(:token => op, :expression => expression,
34
+ :message => "Operator not supported for this type and value string", :status => :fatal )
35
+ end
36
+ [expression]
37
+ end
38
+
39
+ def tokenize_conjunction(exp1, conj, exp2)
40
+ exp2.first[:conjunction] = conj
41
+ exp1 + exp2
42
+ end
43
+
44
+ def tokenize_group(expressions)
45
+ @lexer.leveldown
46
+ expressions
47
+ end
48
+
49
+ def tokenize_multiple(lit1, lit2)
50
+ if lit1[:type] != lit2[:type]
51
+ tokenizer_error(:token => @lexer.last_field,
52
+ :message => "Type mismatch in field list.",
53
+ :status => :fatal,
54
+ :syntax => true)
55
+ end
56
+ array = Array(lit1[:value])
57
+ unless array.size >= Sparkql::ParserCompatibility::MAXIMUM_MULTIPLE_VALUES
58
+ array << lit2[:value]
59
+ end
60
+ {
61
+ :type => lit1[:type],
62
+ :value => array,
63
+ :multiple => "true" # TODO ?
64
+ }
65
+ end
66
+
67
+ def tokenize_function(name, f_args)
68
+ args = f_args.instance_of?(Array) ? f_args : [f_args]
69
+ args.each do |arg|
70
+ arg[:value] = escape_value(arg)
71
+ end
72
+ resolver = Sparkql::FunctionResolver.new(name, args)
73
+
74
+ resolver.validate
75
+ if(resolver.errors?)
76
+ errors += resolver.errors
77
+ return nil
78
+ else
79
+ return resolver.call()
80
+ end
81
+ end
82
+
83
+ def on_error(error_token_id, error_value, value_stack)
84
+ token_name = token_to_str(error_token_id)
85
+ token_name.downcase!
86
+ token = error_value.to_s.inspect
87
+ tokenizer_error(:token => @lexer.last_field,
88
+ :message => "Error parsing token #{token_name}",
89
+ :status => :fatal,
90
+ :syntax => true)
91
+ end
92
+
93
+ end
@@ -0,0 +1,21 @@
1
+ module Sparkql::Token
2
+ SPACE = /[\t ]+/
3
+ NEWLINE = /\r\n|\n\r|\r|\n/
4
+ LPAREN = /\(/
5
+ RPAREN = /\)/
6
+ KEYWORD = /[A-Za-z]+/
7
+ STANDARD_FIELD = /[A-Z]+[A-Za-z]*/
8
+ CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]+)\")/
9
+ INTEGER = /^\-?[0-9]+/
10
+ DECIMAL = /^\-?[0-9]+\.[0-9]+/
11
+ CHARACTER = /^'([^'\\]*(\\.[^'\\]*)*)'/
12
+ DATE = /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}/
13
+ DATETIME = /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}\:[0-9]{2}\:[0-9]{2}\.[0-9]{6}/
14
+ BOOLEAN = /^true|false/
15
+ NULL = /NULL|null|Null/
16
+ # Reserved words
17
+ EQUALITY_OPERATORS = ['Eq','Ne']
18
+ OPERATORS = ['Eq','Ne','Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
19
+ CONJUNCTIONS = ['And','Or']
20
+
21
+ end
@@ -0,0 +1,3 @@
1
+ module Sparkql
2
+ VERSION = File.read(File.dirname(__FILE__) + "/../../VERSION").chomp
3
+ end
data/lib/sparkql.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "sparkql/version"
2
+ require "sparkql/token"
3
+ require "sparkql/errors"
4
+ require "sparkql/expression_state"
5
+ require "sparkql/lexer"
6
+ require "sparkql/function_resolver"
7
+ require "sparkql/parser_tools"
8
+ require "sparkql/parser_compatibility"
9
+ require "sparkql/parser"
10
+
11
+ module Sparkql
12
+ # I AM A SPARKQLING MODULE!!!
13
+ end
data/sparkql.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "sparkql/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "sparkql"
7
+ s.version = Sparkql::VERSION
8
+ s.authors = ["Wade McEwen"]
9
+ s.email = ["wade@fbsdata.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{API Parser engine for filter searching}
12
+ s.description = %q{Specification and base implementation of the Spark API parsing system.}
13
+
14
+ s.rubyforge_project = "sparkql"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency 'racc', '1.4.8'
22
+ s.add_development_dependency 'flexmls_gems', '~> 0.2.9'
23
+ s.add_development_dependency 'rake', '~> 0.9.2'
24
+ s.add_development_dependency 'test-unit', '~> 2.1.0'
25
+ s.add_development_dependency 'ci_reporter', '~> 1.6'
26
+ s.add_development_dependency 'rcov', '~> 0.9.9'
27
+
28
+ end
@@ -0,0 +1,2 @@
1
+ require 'test/unit'
2
+ require 'sparkql'
@@ -0,0 +1,57 @@
1
+ require 'test_helper'
2
+
3
+ class ExpressionStateTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def setup
7
+ @subject = ExpressionState.new
8
+ @parser = Parser.new
9
+ end
10
+
11
+ def test_needs_join
12
+ filter = '"General Property Description"."Taxes" Lt 500.0'
13
+ process(filter)
14
+ assert @subject.needs_join?
15
+ end
16
+
17
+ def test_or
18
+ filter = '"General Property Description"."Taxes" Lt 500.0 Or "General Property Description"."Taxes" Gt 400.0'
19
+ process(filter)
20
+ assert !@subject.needs_join?, "#{@subject.inspect} Expressions:#{ @expressions.inspect}"
21
+ end
22
+
23
+ def test_and
24
+ filter = '"General Property Description"."Taxes" Lt 500.0 And "General Property Description"."Taxes2" Eq 1.0'
25
+ process(filter)
26
+ assert @subject.needs_join?
27
+ end
28
+
29
+ def test_and_or
30
+ filter = '"General Property Description"."Taxes" Lt 500.0 And "General Property Description"."Taxes2" ' +
31
+ 'Eq 1.0 Or "General Property Description"."Taxes" Gt 400.0'
32
+ process(filter)
33
+ assert !@subject.needs_join?
34
+ end
35
+
36
+ def test_or_and
37
+ filter = '"General Property Description"."Taxes" Lt 500.0 Or "General Property Description"."Taxes" ' +
38
+ 'Gt 400.0 And "General Property Description"."Taxes2" Eq 1.0'
39
+ process(filter)
40
+ assert @subject.needs_join?
41
+ end
42
+
43
+ def test_or_with_standard_field
44
+ filter = 'Test Eq 0.0 Or "General Property Description"."Taxes" Lt 500.0'
45
+ process(filter)
46
+ assert @subject.needs_join?
47
+ end
48
+
49
+ def process(filter)
50
+ @expressions = @parser.parse(filter)
51
+ @expressions.each do |ex|
52
+ @subject.push(ex) if ex[:custom_field] == true
53
+ end
54
+ @expressions
55
+ end
56
+
57
+ end
@@ -0,0 +1,50 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def test_now
7
+ start = Time.now
8
+ f = FunctionResolver.new('now', [])
9
+ f.validate
10
+ assert !f.errors?, "Errors #{f.errors.inspect}"
11
+ value = f.call
12
+ assert_equal :datetime, value[:type]
13
+ test_time = Time.parse(value[:value])
14
+ assert (-5 < test_time - start && 5 > test_time - start), "Time range off by more than five seconds #{test_time - start} '#{test_time} - #{start}'"
15
+ end
16
+
17
+ def test_day
18
+ d = Date.today
19
+ dt = DateTime.new(d.year, d.month,d.day, 0,0,0, DateTime.now.offset)
20
+ start = Time.parse(dt.to_s)
21
+ f = FunctionResolver.new('days', [{:type=>:integer, :value =>7}])
22
+ f.validate
23
+ assert !f.errors?, "Errors #{f.errors.inspect}"
24
+ value = f.call
25
+ assert_equal :date, value[:type]
26
+ test_time = Time.parse(value[:value])
27
+ assert (605000 > test_time - start && 604000 < test_time - start), "Time range off by more than five seconds #{test_time - start} '#{test_time} - #{start}'"
28
+ end
29
+
30
+ def test_invalid_param
31
+ f = FunctionResolver.new('now', [{:type => :character, :value=>'bad value'}])
32
+ f.validate
33
+ assert f.errors?, "'now' function does not support parameters"
34
+
35
+ f = FunctionResolver.new('days', [])
36
+ f.validate
37
+ assert f.errors?, "'days' function requires one parameter"
38
+
39
+ f = FunctionResolver.new('days', [{:type => :character, :value=>'bad value'}])
40
+ f.validate
41
+ assert f.errors?, "'days' function needs integer parameter"
42
+ end
43
+
44
+ def test_invalid_function
45
+ f = FunctionResolver.new('then', [])
46
+ f.validate
47
+ assert f.errors?, "'then' is not a function"
48
+ end
49
+
50
+ end
@@ -0,0 +1,29 @@
1
+ require 'test_helper'
2
+
3
+ class LexerTest < Test::Unit::TestCase
4
+ include Sparkql
5
+
6
+ def test_check_reserved_words_standard_fields
7
+ ["OrOrOr Eq true", "Equador Eq true", "Oregon Ge 10"].each do |standard_field|
8
+ @lexer = Lexer.new(standard_field)
9
+ token = @lexer.shift
10
+ assert_equal :STANDARD_FIELD, token.first, standard_field
11
+ end
12
+ end
13
+ def test_check_reserved_words_conjunctions
14
+ ['And Derp', 'Or 123'].each do |conjunction|
15
+ @lexer = Lexer.new(conjunction)
16
+ token = @lexer.shift
17
+ assert_equal :CONJUNCTION, token.first, conjunction
18
+ end
19
+ end
20
+
21
+ def test_check_reserved_words_operators
22
+ ['Eq Derp', 'Gt 123'].each do |op|
23
+ @lexer = Lexer.new(op)
24
+ token = @lexer.shift
25
+ assert_equal :OPERATOR, token.first, op
26
+ end
27
+ end
28
+
29
+ end