sparkql 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ lib/sparkql/*.output
5
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use ree-1.8.7-2011.03
2
+
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://gems.dev.fbsdata.com/public"
2
+ source "http://rubygems.org"
3
+
4
+ # Specify your gem's dependencies in sparkapi_parser.gemspec
5
+ gemspec
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ SparkQL query language parser
2
+ =====================
3
+ This gem contains the syntax parser for processing spark api filter queries into manageable
4
+ expressions. To get an overview of the language syntax-wise, refer to the following files:
5
+
6
+ * lib/sparkql/parser.y # BNF Grammar
7
+ * lib/sparkql/token.rb # Token matching rules
8
+
9
+ Installation
10
+ -------------
11
+
12
+ Add the gem to your gemfile:
13
+
14
+ Gemfile
15
+ gem 'sparkql', '~> 0.0.1'
16
+
17
+ When completed, run 'bundle install'.
18
+
19
+
20
+ Usage
21
+ -------------
22
+ See test/unit/parser_test.rb for generic parsing examples. In most cases an extended parser is
23
+ needed to do anything of significance, such as the postgres and db2 search implementations in the
24
+ API.
25
+
26
+ Here is a basic example:
27
+
28
+ expressions = Parser.new.parse("Hello Eq 'World')
29
+
30
+ The return value will be an array with one expression element containing the query information:
31
+
32
+ {
33
+ :field => "Hello",
34
+ :type => :character,
35
+ :value => "'World'",
36
+ :operator => 'Eq'
37
+ # ...
38
+ }
39
+
40
+
41
+ Development
42
+ -------------
43
+ The parser is based on racc, a yacc like LR parser that is a part of the ruby runtime. The grammar
44
+ is located at lib/sparkql/parser.y and is compiled as part of the test process. Refer to the
45
+ Rakefile for details. When modifying the grammar, please checkin BOTH the parser.y and parser.rb
46
+ files.
47
+
48
+ Debugging grammar issues can be done by hand using the "racc" command. For example, a dump of the
49
+ parser states (and conflicts) can be generated via
50
+
51
+ racc -o lib/sparkql/parser.rb lib/sparkql/parser.y -v # see lib/sparkql/parser.output
52
+
53
+ The rails/journey project was an inspiration for this gem. Look it up on github for reference.
54
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require "rubygems"
2
+ require 'rubygems/user_interaction'
3
+ require 'flexmls_gems/tasks'
4
+ require 'flexmls_gems/tasks/test_unit'
5
+ require 'flexmls_gems/tasks/rdoc'
6
+
7
+ rule '.rb' => '.y' do |t|
8
+ sh "racc -l -o #{t.name} #{t.source}"
9
+ end
10
+
11
+ desc "Compile the racc parser from the grammar"
12
+ task :compile => "lib/sparkql/parser.rb"
13
+
14
+ Rake::Task[:test].prerequisites.unshift "lib/sparkql/parser.rb"
15
+
16
+ desc 'Default: run unit tests.'
17
+ task :default => :test
18
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.8
@@ -0,0 +1,81 @@
1
+ module Sparkql
2
+
3
+ class ErrorsProcessor
4
+ attr_accessor :errors
5
+
6
+ def initialize( errors )
7
+ @errors = errors || []
8
+ end
9
+
10
+ # true if the error stack contains at least one error
11
+ def errors?
12
+ @errors.size > 0
13
+ end
14
+
15
+ # true if there is at least one error of status :status in the error stack
16
+ def errors_by_status?( status )
17
+ @errors.each do | error |
18
+ return true if status == error.status
19
+ end
20
+ false
21
+ end
22
+
23
+ # true if there is at least one :fatal error in the error stack
24
+ def fatal_errors?
25
+ errors_by_status? :fatal
26
+ end
27
+
28
+ # true if there is at least one :dropped error in the error stack
29
+ def dropped_errors?
30
+ errors_by_status? :dropped
31
+ end
32
+
33
+ # true if there is at least one :recovered error in the error stack
34
+ def recovered_errors?
35
+ errors_by_status? :recovered
36
+ end
37
+
38
+ end
39
+
40
+ class ParserError
41
+ attr_accessor :token, :expression, :message, :status, :recovered_as
42
+
43
+ def initialize(error_hash=nil)
44
+ error_hash = {} if error_hash.nil?
45
+ @token = error_hash[:token]
46
+ @expression = error_hash[:expression]
47
+ @message = error_hash[:message]
48
+ @status = error_hash[:status]
49
+ @recovered_as = error_hash[:recovered_as]
50
+ self.syntax= error_hash[:syntax] == false ? false : true
51
+ end
52
+
53
+ def syntax=(syntax_error)
54
+ @syntax = syntax_error
55
+ end
56
+
57
+ def syntax?
58
+ @syntax
59
+ end
60
+
61
+ def to_s
62
+ str = case @status
63
+ # Do nothing. Dropping the expressions isn't special
64
+ when :dropped then "Dropped: "
65
+ # Fatal errors cannot be recovered from, and should cause anaylisis or
66
+ # compilation to stop.
67
+ when :fatal then "Fatal: "
68
+ # Recovered errors are those that are syntatically
69
+ # or symantically incorrect, but are ones that we could "guess" at the
70
+ # intention
71
+ when :recovered then
72
+ "Recovered as #{@recovered_as}: "
73
+ else ""
74
+ end
75
+ str += "<#{@token}> in " unless @token.nil?
76
+ str += "<#{@expression}>: #{@message}."
77
+ str
78
+ end
79
+ end
80
+
81
+ end
@@ -0,0 +1,20 @@
1
+ # Custom fields need to add a table join to the customfieldsearch table when AND'd together,
2
+ # but not when they are OR'd. This class maintains the state for all custom field expressions
3
+ # lets the parser know when to do either.
4
+ class Sparkql::ExpressionState
5
+
6
+ def initialize
7
+ @expressions = []
8
+ @last_conjunction = "And" # always start with a join
9
+ end
10
+
11
+ def push(expression)
12
+ @expressions << expression
13
+ @last_conjunction = expression[:conjunction]
14
+ end
15
+
16
+ def needs_join?
17
+ return @expressions.size == 1 || "And" == @last_conjunction
18
+ end
19
+
20
+ end
@@ -0,0 +1,106 @@
1
+ require 'time'
2
+
3
+ # Binding class to all supported function calls in the parser. Current support requires that the
4
+ # resolution of function calls to happen on the fly at parsing time at which point a value and
5
+ # value type is required, just as literals would be returned to the expression tokenization level.
6
+ #
7
+ # Name and argument requirements for the function should match the function declaration in
8
+ # SUPPORTED_FUNCTIONS which will run validation on the function syntax prior to execution.
9
+ class Sparkql::FunctionResolver
10
+ SECONDS_IN_DAY = 60 * 60 * 24
11
+
12
+ SUPPORTED_FUNCTIONS = {
13
+ :days => {
14
+ :args => [:integer],
15
+ :return_type => :datetime
16
+ },
17
+ :now => {
18
+ :args => [],
19
+ :return_type => :datetime
20
+ }
21
+ }
22
+
23
+ # Construct a resolver instance for a function
24
+ # name: function name (String)
25
+ # args: array of literal hashes of the format {:type=><literal_type>, :value=><escaped_literal_value>}.
26
+ # Empty arry for functions that have no arguments.
27
+ def initialize(name, args)
28
+ @name = name
29
+ @args = args
30
+ @errors = []
31
+ end
32
+
33
+ # Validate the function instance prior to calling it. All validation failures will show up in the
34
+ # errors array.
35
+ def validate()
36
+ name = @name.to_sym
37
+ unless support.has_key?(name)
38
+ @errors << Sparkql::ParserError.new(:token => @name,
39
+ :message => "Unsupported function call '#{@name}' for expression",
40
+ :status => :fatal )
41
+ return
42
+ end
43
+ required_args = support[name][:args]
44
+ unless required_args.size == @args.size
45
+ @errors << Sparkql::ParserError.new(:token => @name,
46
+ :message => "Function call '#{@name}' requires #{required_args.size} arguments",
47
+ :status => :fatal )
48
+ return
49
+ end
50
+
51
+ count = 0
52
+ @args.each do |arg|
53
+ unless arg[:type] == required_args[count]
54
+ @errors << Sparkql::ParserError.new(:token => @name,
55
+ :message => "Function call '#{@name}' has an invalid argument at #{arg[:value]}",
56
+ :status => :fatal )
57
+ end
58
+ count +=1
59
+ end
60
+ end
61
+
62
+ def return_type
63
+ supported[@name.to_sym][:return_type]
64
+ end
65
+
66
+ def errors
67
+ @errors
68
+ end
69
+
70
+ def errors?
71
+ @errors.size > 0
72
+ end
73
+
74
+ def support
75
+ SUPPORTED_FUNCTIONS
76
+ end
77
+
78
+ # Execute the function
79
+ def call()
80
+ real_vals = @args.map { |i| i[:value]}
81
+ self.send(@name.to_sym, *real_vals)
82
+ end
83
+
84
+ protected
85
+
86
+ # Supported function calls
87
+
88
+ # Offset the current timestamp by a number of days
89
+ def days(num)
90
+ # date calculated as the offset from midnight tommorrow. Zero will provide values for all times
91
+ # today.
92
+ d = Date.today + num
93
+ {
94
+ :type => :date,
95
+ :value => d.to_s
96
+ }
97
+ end
98
+
99
+ # The current timestamp
100
+ def now()
101
+ {
102
+ :type => :datetime,
103
+ :value => Time.now.iso8601
104
+ }
105
+ end
106
+ end
@@ -0,0 +1,114 @@
1
+ class Sparkql::Lexer < StringScanner
2
+ include Sparkql::Token
3
+
4
+ def initialize(str)
5
+ str.freeze
6
+ super(str, false) # DO NOT dup str
7
+ @level = 0
8
+ @block_group_identifier = 0
9
+ @expression_count = 0
10
+ end
11
+
12
+ # Lookup the next matching token
13
+ #
14
+ # TODO the old implementation did value type detection conversion at a later date, we can perform
15
+ # this at parse time if we want!!!!
16
+ def shift
17
+ token = case
18
+ when value = scan(SPACE)
19
+ [:SPACE, value]
20
+ when value = scan(LPAREN)
21
+ levelup
22
+ [:LPAREN, value]
23
+ when value = scan(RPAREN)
24
+ # leveldown do this after parsing group
25
+ [:RPAREN, value]
26
+ when value = scan(/\,/)
27
+ [:COMMA,value]
28
+ when value = scan(NULL)
29
+ literal :NULL, "NULL"
30
+ when value = scan(STANDARD_FIELD)
31
+ check_standard_fields(value)
32
+ when value = scan(DATETIME)
33
+ literal :DATETIME, value
34
+ when value = scan(DATE)
35
+ literal :DATE, value
36
+ when value = scan(DECIMAL)
37
+ literal :DECIMAL, value
38
+ when value = scan(INTEGER)
39
+ literal :INTEGER, value
40
+ when value = scan(CHARACTER)
41
+ literal :CHARACTER, value
42
+ when value = scan(BOOLEAN)
43
+ literal :BOOLEAN, value
44
+ when value = scan(KEYWORD)
45
+ check_keywords(value)
46
+ when value = scan(CUSTOM_FIELD)
47
+ [:CUSTOM_FIELD,value]
48
+ when empty?
49
+ [false, false] # end of file, \Z don't work with StringScanner
50
+ else
51
+ [:UNKNOWN, "ERROR: '#{self.string}'"]
52
+ end
53
+ #value.freeze
54
+ token.freeze
55
+ end
56
+
57
+ def check_reserved_words(value)
58
+ u_value = value.capitalize
59
+ if OPERATORS.include?(u_value)
60
+ [:OPERATOR,u_value]
61
+ elsif CONJUNCTIONS.include?(u_value)
62
+ [:CONJUNCTION,u_value]
63
+ else
64
+ [:UNKNOWN, "ERROR: '#{self.string}'"]
65
+ end
66
+ end
67
+
68
+ def check_standard_fields(value)
69
+ result = check_reserved_words(value)
70
+ if result.first == :UNKNOWN
71
+ @last_field = value
72
+ result = [:STANDARD_FIELD,value]
73
+ end
74
+ result
75
+ end
76
+
77
+ def check_keywords(value)
78
+ result = check_reserved_words(value)
79
+ if result.first == :UNKNOWN
80
+ result = [:KEYWORD,value]
81
+ end
82
+ result
83
+ end
84
+
85
+ def level
86
+ @level
87
+ end
88
+
89
+ def block_group_identifier
90
+ @block_group_identifier
91
+ end
92
+
93
+ def levelup
94
+ @level += 1
95
+ @block_group_identifier += 1
96
+ end
97
+
98
+ def leveldown
99
+ @level -= 1
100
+ end
101
+
102
+ def literal(symbol, value)
103
+ node = {
104
+ :type => symbol.to_s.downcase.to_sym,
105
+ :value => value
106
+ }
107
+ [symbol, node]
108
+ end
109
+
110
+ def last_field
111
+ @last_field
112
+ end
113
+
114
+ end
@@ -0,0 +1,268 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by Racc 1.4.8
4
+ # from Racc grammer file "".
5
+ #
6
+
7
+ require 'racc/parser.rb'
8
+
9
+ # $Id$
10
+ module Sparkql
11
+ class Parser < Racc::Parser
12
+
13
+ include Sparkql::ParserTools
14
+ include Sparkql::ParserCompatibility
15
+
16
+ ##### State transition tables begin ###
17
+
18
+ racc_action_table = [
19
+ 35, 4, 31, 5, 6, 14, 15, 16, 18, 19,
20
+ 21, 23, 26, 32, 14, 15, 16, 18, 19, 21,
21
+ 23, 14, 15, 16, 18, 19, 21, 23, 39, 13,
22
+ 12, 4, 40, 5, 6, 4, 10, 5, 6, 13,
23
+ 29, 28, 14, 15, 16 ]
24
+
25
+ racc_action_check = [
26
+ 32, 13, 22, 13, 13, 32, 32, 32, 32, 32,
27
+ 32, 32, 10, 25, 10, 10, 10, 10, 10, 10,
28
+ 10, 40, 40, 40, 40, 40, 40, 40, 36, 8,
29
+ 7, 4, 36, 4, 4, 0, 2, 0, 0, 11,
30
+ 12, 11, 31, 31, 31 ]
31
+
32
+ racc_action_pointer = [
33
+ 30, nil, 33, nil, 26, nil, nil, 30, 25, nil,
34
+ 3, 35, 40, -4, nil, nil, nil, nil, nil, nil,
35
+ nil, nil, -8, nil, nil, 8, nil, nil, nil, nil,
36
+ nil, 31, -6, nil, nil, nil, 22, nil, nil, nil,
37
+ 10, nil ]
38
+
39
+ racc_action_default = [
40
+ -2, -4, -30, -6, -30, -9, -10, -30, -1, -3,
41
+ -30, -30, -30, -30, -23, -24, -25, -5, -26, -27,
42
+ -11, -28, -12, -29, -13, -30, -16, -21, -8, 42,
43
+ -7, -30, -30, -22, -19, -14, -30, -17, -20, -15,
44
+ -30, -18 ]
45
+
46
+ racc_goto_table = [
47
+ 27, 37, 8, 22, 20, 17, 11, 24, 25, 41,
48
+ 36, 30, 7, nil, nil, nil, nil, nil, nil, nil,
49
+ nil, 33 ]
50
+
51
+ racc_goto_check = [
52
+ 14, 13, 2, 9, 8, 6, 2, 10, 11, 13,
53
+ 12, 3, 1, nil, nil, nil, nil, nil, nil, nil,
54
+ nil, 14 ]
55
+
56
+ racc_goto_pointer = [
57
+ nil, 12, 2, -2, nil, nil, -5, nil, -6, -7,
58
+ -3, -2, -22, -31, -10 ]
59
+
60
+ racc_goto_default = [
61
+ nil, nil, nil, 9, 1, 2, nil, 3, 34, nil,
62
+ nil, nil, nil, nil, 38 ]
63
+
64
+ racc_reduce_table = [
65
+ 0, 0, :racc_error,
66
+ 1, 19, :_reduce_none,
67
+ 0, 19, :_reduce_2,
68
+ 1, 20, :_reduce_none,
69
+ 1, 20, :_reduce_none,
70
+ 3, 21, :_reduce_5,
71
+ 1, 21, :_reduce_none,
72
+ 3, 22, :_reduce_7,
73
+ 3, 25, :_reduce_8,
74
+ 1, 23, :_reduce_none,
75
+ 1, 23, :_reduce_none,
76
+ 1, 24, :_reduce_none,
77
+ 1, 24, :_reduce_none,
78
+ 1, 24, :_reduce_none,
79
+ 3, 28, :_reduce_14,
80
+ 4, 28, :_reduce_15,
81
+ 1, 29, :_reduce_none,
82
+ 1, 30, :_reduce_none,
83
+ 3, 30, :_reduce_none,
84
+ 1, 31, :_reduce_none,
85
+ 1, 31, :_reduce_none,
86
+ 1, 27, :_reduce_none,
87
+ 3, 27, :_reduce_22,
88
+ 1, 32, :_reduce_none,
89
+ 1, 32, :_reduce_none,
90
+ 1, 32, :_reduce_none,
91
+ 1, 26, :_reduce_none,
92
+ 1, 26, :_reduce_none,
93
+ 1, 26, :_reduce_none,
94
+ 1, 26, :_reduce_none ]
95
+
96
+ racc_reduce_n = 30
97
+
98
+ racc_shift_n = 42
99
+
100
+ racc_token_table = {
101
+ false => 0,
102
+ :error => 1,
103
+ :UMINUS => 2,
104
+ :OPERATOR => 3,
105
+ :CONJUNCTION => 4,
106
+ :LPAREN => 5,
107
+ :RPAREN => 6,
108
+ :STANDARD_FIELD => 7,
109
+ :CUSTOM_FIELD => 8,
110
+ :KEYWORD => 9,
111
+ :COMMA => 10,
112
+ :INTEGER => 11,
113
+ :DECIMAL => 12,
114
+ :CHARACTER => 13,
115
+ :DATE => 14,
116
+ :DATETIME => 15,
117
+ :BOOLEAN => 16,
118
+ :NULL => 17 }
119
+
120
+ racc_nt_base = 18
121
+
122
+ racc_use_result_var = true
123
+
124
+ Racc_arg = [
125
+ racc_action_table,
126
+ racc_action_check,
127
+ racc_action_default,
128
+ racc_action_pointer,
129
+ racc_goto_table,
130
+ racc_goto_check,
131
+ racc_goto_default,
132
+ racc_goto_pointer,
133
+ racc_nt_base,
134
+ racc_reduce_table,
135
+ racc_token_table,
136
+ racc_shift_n,
137
+ racc_reduce_n,
138
+ racc_use_result_var ]
139
+
140
+ Racc_token_to_s_table = [
141
+ "$end",
142
+ "error",
143
+ "UMINUS",
144
+ "OPERATOR",
145
+ "CONJUNCTION",
146
+ "LPAREN",
147
+ "RPAREN",
148
+ "STANDARD_FIELD",
149
+ "CUSTOM_FIELD",
150
+ "KEYWORD",
151
+ "COMMA",
152
+ "INTEGER",
153
+ "DECIMAL",
154
+ "CHARACTER",
155
+ "DATE",
156
+ "DATETIME",
157
+ "BOOLEAN",
158
+ "NULL",
159
+ "$start",
160
+ "target",
161
+ "expressions",
162
+ "expression",
163
+ "conjunction",
164
+ "field",
165
+ "condition",
166
+ "group",
167
+ "literal",
168
+ "literal_list",
169
+ "function",
170
+ "function_name",
171
+ "function_args",
172
+ "function_arg",
173
+ "literals" ]
174
+
175
+ Racc_debug_parser = false
176
+
177
+ ##### State transition tables end #####
178
+
179
+ # reduce 0 omitted
180
+
181
+ # reduce 1 omitted
182
+
183
+ def _reduce_2(val, _values, result)
184
+ result = 0
185
+ result
186
+ end
187
+
188
+ # reduce 3 omitted
189
+
190
+ # reduce 4 omitted
191
+
192
+ def _reduce_5(val, _values, result)
193
+ result = tokenize_expression(val[0], val[1],val[2])
194
+ result
195
+ end
196
+
197
+ # reduce 6 omitted
198
+
199
+ def _reduce_7(val, _values, result)
200
+ result = tokenize_conjunction(val[0], val[1],val[2])
201
+ result
202
+ end
203
+
204
+ def _reduce_8(val, _values, result)
205
+ result = tokenize_group(val[1])
206
+ result
207
+ end
208
+
209
+ # reduce 9 omitted
210
+
211
+ # reduce 10 omitted
212
+
213
+ # reduce 11 omitted
214
+
215
+ # reduce 12 omitted
216
+
217
+ # reduce 13 omitted
218
+
219
+ def _reduce_14(val, _values, result)
220
+ result = tokenize_function(val[0], [])
221
+ result
222
+ end
223
+
224
+ def _reduce_15(val, _values, result)
225
+ result = tokenize_function(val[0], val[2])
226
+ result
227
+ end
228
+
229
+ # reduce 16 omitted
230
+
231
+ # reduce 17 omitted
232
+
233
+ # reduce 18 omitted
234
+
235
+ # reduce 19 omitted
236
+
237
+ # reduce 20 omitted
238
+
239
+ # reduce 21 omitted
240
+
241
+ def _reduce_22(val, _values, result)
242
+ result = tokenize_multiple(val[0], val[2])
243
+ result
244
+ end
245
+
246
+ # reduce 23 omitted
247
+
248
+ # reduce 24 omitted
249
+
250
+ # reduce 25 omitted
251
+
252
+ # reduce 26 omitted
253
+
254
+ # reduce 27 omitted
255
+
256
+ # reduce 28 omitted
257
+
258
+ # reduce 29 omitted
259
+
260
+ def _reduce_none(val, _values, result)
261
+ val[0]
262
+ end
263
+
264
+ end # class Parser
265
+ end # module Sparkql
266
+
267
+
268
+ # END PARSER