sparkql 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ lib/sparkql/*.output
5
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use ree-1.8.7-2011.03
2
+
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://gems.dev.fbsdata.com/public"
2
+ source "http://rubygems.org"
3
+
4
+ # Specify your gem's dependencies in sparkapi_parser.gemspec
5
+ gemspec
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ SparkQL query language parser
2
+ =====================
3
+ This gem contains the syntax parser for processing spark api filter queries into manageable
4
+ expressions. To get an overview of the language syntax-wise, refer to the following files:
5
+
6
+ * lib/sparkql/parser.y # BNF Grammar
7
+ * lib/sparkql/token.rb # Token matching rules
8
+
9
+ Installation
10
+ -------------
11
+
12
+ Add the gem to your gemfile:
13
+
14
+ Gemfile
15
+ gem 'sparkql', '~> 0.0.1'
16
+
17
+ When completed, run 'bundle install'.
18
+
19
+
20
+ Usage
21
+ -------------
22
+ See test/unit/parser_test.rb for generic parsing examples. In most cases an extended parser is
23
+ needed to do anything of significance, such as the postgres and db2 search implementations in the
24
+ API.
25
+
26
+ Here is a basic example:
27
+
28
+ expressions = Parser.new.parse("Hello Eq 'World')
29
+
30
+ The return value will be an array with one expression element containing the query information:
31
+
32
+ {
33
+ :field => "Hello",
34
+ :type => :character,
35
+ :value => "'World'",
36
+ :operator => 'Eq'
37
+ # ...
38
+ }
39
+
40
+
41
+ Development
42
+ -------------
43
+ The parser is based on racc, a yacc like LR parser that is a part of the ruby runtime. The grammar
44
+ is located at lib/sparkql/parser.y and is compiled as part of the test process. Refer to the
45
+ Rakefile for details. When modifying the grammar, please checkin BOTH the parser.y and parser.rb
46
+ files.
47
+
48
+ Debugging grammar issues can be done by hand using the "racc" command. For example, a dump of the
49
+ parser states (and conflicts) can be generated via
50
+
51
+ racc -o lib/sparkql/parser.rb lib/sparkql/parser.y -v # see lib/sparkql/parser.output
52
+
53
+ The rails/journey project was an inspiration for this gem. Look it up on github for reference.
54
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require "rubygems"
2
+ require 'rubygems/user_interaction'
3
+ require 'flexmls_gems/tasks'
4
+ require 'flexmls_gems/tasks/test_unit'
5
+ require 'flexmls_gems/tasks/rdoc'
6
+
7
+ rule '.rb' => '.y' do |t|
8
+ sh "racc -l -o #{t.name} #{t.source}"
9
+ end
10
+
11
+ desc "Compile the racc parser from the grammar"
12
+ task :compile => "lib/sparkql/parser.rb"
13
+
14
+ Rake::Task[:test].prerequisites.unshift "lib/sparkql/parser.rb"
15
+
16
+ desc 'Default: run unit tests.'
17
+ task :default => :test
18
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.8
@@ -0,0 +1,81 @@
1
+ module Sparkql
2
+
3
+ class ErrorsProcessor
4
+ attr_accessor :errors
5
+
6
+ def initialize( errors )
7
+ @errors = errors || []
8
+ end
9
+
10
+ # true if the error stack contains at least one error
11
+ def errors?
12
+ @errors.size > 0
13
+ end
14
+
15
+ # true if there is at least one error of status :status in the error stack
16
+ def errors_by_status?( status )
17
+ @errors.each do | error |
18
+ return true if status == error.status
19
+ end
20
+ false
21
+ end
22
+
23
+ # true if there is at least one :fatal error in the error stack
24
+ def fatal_errors?
25
+ errors_by_status? :fatal
26
+ end
27
+
28
+ # true if there is at least one :dropped error in the error stack
29
+ def dropped_errors?
30
+ errors_by_status? :dropped
31
+ end
32
+
33
+ # true if there is at least one :recovered error in the error stack
34
+ def recovered_errors?
35
+ errors_by_status? :recovered
36
+ end
37
+
38
+ end
39
+
40
+ class ParserError
41
+ attr_accessor :token, :expression, :message, :status, :recovered_as
42
+
43
+ def initialize(error_hash=nil)
44
+ error_hash = {} if error_hash.nil?
45
+ @token = error_hash[:token]
46
+ @expression = error_hash[:expression]
47
+ @message = error_hash[:message]
48
+ @status = error_hash[:status]
49
+ @recovered_as = error_hash[:recovered_as]
50
+ self.syntax= error_hash[:syntax] == false ? false : true
51
+ end
52
+
53
+ def syntax=(syntax_error)
54
+ @syntax = syntax_error
55
+ end
56
+
57
+ def syntax?
58
+ @syntax
59
+ end
60
+
61
+ def to_s
62
+ str = case @status
63
+ # Do nothing. Dropping the expressions isn't special
64
+ when :dropped then "Dropped: "
65
+ # Fatal errors cannot be recovered from, and should cause anaylisis or
66
+ # compilation to stop.
67
+ when :fatal then "Fatal: "
68
+ # Recovered errors are those that are syntatically
69
+ # or symantically incorrect, but are ones that we could "guess" at the
70
+ # intention
71
+ when :recovered then
72
+ "Recovered as #{@recovered_as}: "
73
+ else ""
74
+ end
75
+ str += "<#{@token}> in " unless @token.nil?
76
+ str += "<#{@expression}>: #{@message}."
77
+ str
78
+ end
79
+ end
80
+
81
+ end
@@ -0,0 +1,20 @@
1
+ # Custom fields need to add a table join to the customfieldsearch table when AND'd together,
2
+ # but not when they are OR'd. This class maintains the state for all custom field expressions
3
+ # lets the parser know when to do either.
4
+ class Sparkql::ExpressionState
5
+
6
+ def initialize
7
+ @expressions = []
8
+ @last_conjunction = "And" # always start with a join
9
+ end
10
+
11
+ def push(expression)
12
+ @expressions << expression
13
+ @last_conjunction = expression[:conjunction]
14
+ end
15
+
16
+ def needs_join?
17
+ return @expressions.size == 1 || "And" == @last_conjunction
18
+ end
19
+
20
+ end
@@ -0,0 +1,106 @@
1
+ require 'time'
2
+
3
+ # Binding class to all supported function calls in the parser. Current support requires that the
4
+ # resolution of function calls to happen on the fly at parsing time at which point a value and
5
+ # value type is required, just as literals would be returned to the expression tokenization level.
6
+ #
7
+ # Name and argument requirements for the function should match the function declaration in
8
+ # SUPPORTED_FUNCTIONS which will run validation on the function syntax prior to execution.
9
+ class Sparkql::FunctionResolver
10
+ SECONDS_IN_DAY = 60 * 60 * 24
11
+
12
+ SUPPORTED_FUNCTIONS = {
13
+ :days => {
14
+ :args => [:integer],
15
+ :return_type => :datetime
16
+ },
17
+ :now => {
18
+ :args => [],
19
+ :return_type => :datetime
20
+ }
21
+ }
22
+
23
+ # Construct a resolver instance for a function
24
+ # name: function name (String)
25
+ # args: array of literal hashes of the format {:type=><literal_type>, :value=><escaped_literal_value>}.
26
+ # Empty arry for functions that have no arguments.
27
+ def initialize(name, args)
28
+ @name = name
29
+ @args = args
30
+ @errors = []
31
+ end
32
+
33
+ # Validate the function instance prior to calling it. All validation failures will show up in the
34
+ # errors array.
35
+ def validate()
36
+ name = @name.to_sym
37
+ unless support.has_key?(name)
38
+ @errors << Sparkql::ParserError.new(:token => @name,
39
+ :message => "Unsupported function call '#{@name}' for expression",
40
+ :status => :fatal )
41
+ return
42
+ end
43
+ required_args = support[name][:args]
44
+ unless required_args.size == @args.size
45
+ @errors << Sparkql::ParserError.new(:token => @name,
46
+ :message => "Function call '#{@name}' requires #{required_args.size} arguments",
47
+ :status => :fatal )
48
+ return
49
+ end
50
+
51
+ count = 0
52
+ @args.each do |arg|
53
+ unless arg[:type] == required_args[count]
54
+ @errors << Sparkql::ParserError.new(:token => @name,
55
+ :message => "Function call '#{@name}' has an invalid argument at #{arg[:value]}",
56
+ :status => :fatal )
57
+ end
58
+ count +=1
59
+ end
60
+ end
61
+
62
+ def return_type
63
+ supported[@name.to_sym][:return_type]
64
+ end
65
+
66
+ def errors
67
+ @errors
68
+ end
69
+
70
+ def errors?
71
+ @errors.size > 0
72
+ end
73
+
74
+ def support
75
+ SUPPORTED_FUNCTIONS
76
+ end
77
+
78
+ # Execute the function
79
+ def call()
80
+ real_vals = @args.map { |i| i[:value]}
81
+ self.send(@name.to_sym, *real_vals)
82
+ end
83
+
84
+ protected
85
+
86
+ # Supported function calls
87
+
88
+ # Offset the current timestamp by a number of days
89
+ def days(num)
90
+ # date calculated as the offset from midnight tommorrow. Zero will provide values for all times
91
+ # today.
92
+ d = Date.today + num
93
+ {
94
+ :type => :date,
95
+ :value => d.to_s
96
+ }
97
+ end
98
+
99
+ # The current timestamp
100
+ def now()
101
+ {
102
+ :type => :datetime,
103
+ :value => Time.now.iso8601
104
+ }
105
+ end
106
+ end
@@ -0,0 +1,114 @@
1
+ class Sparkql::Lexer < StringScanner
2
+ include Sparkql::Token
3
+
4
+ def initialize(str)
5
+ str.freeze
6
+ super(str, false) # DO NOT dup str
7
+ @level = 0
8
+ @block_group_identifier = 0
9
+ @expression_count = 0
10
+ end
11
+
12
+ # Lookup the next matching token
13
+ #
14
+ # TODO the old implementation did value type detection conversion at a later date, we can perform
15
+ # this at parse time if we want!!!!
16
+ def shift
17
+ token = case
18
+ when value = scan(SPACE)
19
+ [:SPACE, value]
20
+ when value = scan(LPAREN)
21
+ levelup
22
+ [:LPAREN, value]
23
+ when value = scan(RPAREN)
24
+ # leveldown do this after parsing group
25
+ [:RPAREN, value]
26
+ when value = scan(/\,/)
27
+ [:COMMA,value]
28
+ when value = scan(NULL)
29
+ literal :NULL, "NULL"
30
+ when value = scan(STANDARD_FIELD)
31
+ check_standard_fields(value)
32
+ when value = scan(DATETIME)
33
+ literal :DATETIME, value
34
+ when value = scan(DATE)
35
+ literal :DATE, value
36
+ when value = scan(DECIMAL)
37
+ literal :DECIMAL, value
38
+ when value = scan(INTEGER)
39
+ literal :INTEGER, value
40
+ when value = scan(CHARACTER)
41
+ literal :CHARACTER, value
42
+ when value = scan(BOOLEAN)
43
+ literal :BOOLEAN, value
44
+ when value = scan(KEYWORD)
45
+ check_keywords(value)
46
+ when value = scan(CUSTOM_FIELD)
47
+ [:CUSTOM_FIELD,value]
48
+ when empty?
49
+ [false, false] # end of file, \Z don't work with StringScanner
50
+ else
51
+ [:UNKNOWN, "ERROR: '#{self.string}'"]
52
+ end
53
+ #value.freeze
54
+ token.freeze
55
+ end
56
+
57
+ def check_reserved_words(value)
58
+ u_value = value.capitalize
59
+ if OPERATORS.include?(u_value)
60
+ [:OPERATOR,u_value]
61
+ elsif CONJUNCTIONS.include?(u_value)
62
+ [:CONJUNCTION,u_value]
63
+ else
64
+ [:UNKNOWN, "ERROR: '#{self.string}'"]
65
+ end
66
+ end
67
+
68
+ def check_standard_fields(value)
69
+ result = check_reserved_words(value)
70
+ if result.first == :UNKNOWN
71
+ @last_field = value
72
+ result = [:STANDARD_FIELD,value]
73
+ end
74
+ result
75
+ end
76
+
77
+ def check_keywords(value)
78
+ result = check_reserved_words(value)
79
+ if result.first == :UNKNOWN
80
+ result = [:KEYWORD,value]
81
+ end
82
+ result
83
+ end
84
+
85
+ def level
86
+ @level
87
+ end
88
+
89
+ def block_group_identifier
90
+ @block_group_identifier
91
+ end
92
+
93
+ def levelup
94
+ @level += 1
95
+ @block_group_identifier += 1
96
+ end
97
+
98
+ def leveldown
99
+ @level -= 1
100
+ end
101
+
102
+ def literal(symbol, value)
103
+ node = {
104
+ :type => symbol.to_s.downcase.to_sym,
105
+ :value => value
106
+ }
107
+ [symbol, node]
108
+ end
109
+
110
+ def last_field
111
+ @last_field
112
+ end
113
+
114
+ end
@@ -0,0 +1,268 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by Racc 1.4.8
4
+ # from Racc grammer file "".
5
+ #
6
+
7
+ require 'racc/parser.rb'
8
+
9
+ # $Id$
10
+ module Sparkql
11
+ class Parser < Racc::Parser
12
+
13
+ include Sparkql::ParserTools
14
+ include Sparkql::ParserCompatibility
15
+
16
+ ##### State transition tables begin ###
17
+
18
+ racc_action_table = [
19
+ 35, 4, 31, 5, 6, 14, 15, 16, 18, 19,
20
+ 21, 23, 26, 32, 14, 15, 16, 18, 19, 21,
21
+ 23, 14, 15, 16, 18, 19, 21, 23, 39, 13,
22
+ 12, 4, 40, 5, 6, 4, 10, 5, 6, 13,
23
+ 29, 28, 14, 15, 16 ]
24
+
25
+ racc_action_check = [
26
+ 32, 13, 22, 13, 13, 32, 32, 32, 32, 32,
27
+ 32, 32, 10, 25, 10, 10, 10, 10, 10, 10,
28
+ 10, 40, 40, 40, 40, 40, 40, 40, 36, 8,
29
+ 7, 4, 36, 4, 4, 0, 2, 0, 0, 11,
30
+ 12, 11, 31, 31, 31 ]
31
+
32
+ racc_action_pointer = [
33
+ 30, nil, 33, nil, 26, nil, nil, 30, 25, nil,
34
+ 3, 35, 40, -4, nil, nil, nil, nil, nil, nil,
35
+ nil, nil, -8, nil, nil, 8, nil, nil, nil, nil,
36
+ nil, 31, -6, nil, nil, nil, 22, nil, nil, nil,
37
+ 10, nil ]
38
+
39
+ racc_action_default = [
40
+ -2, -4, -30, -6, -30, -9, -10, -30, -1, -3,
41
+ -30, -30, -30, -30, -23, -24, -25, -5, -26, -27,
42
+ -11, -28, -12, -29, -13, -30, -16, -21, -8, 42,
43
+ -7, -30, -30, -22, -19, -14, -30, -17, -20, -15,
44
+ -30, -18 ]
45
+
46
+ racc_goto_table = [
47
+ 27, 37, 8, 22, 20, 17, 11, 24, 25, 41,
48
+ 36, 30, 7, nil, nil, nil, nil, nil, nil, nil,
49
+ nil, 33 ]
50
+
51
+ racc_goto_check = [
52
+ 14, 13, 2, 9, 8, 6, 2, 10, 11, 13,
53
+ 12, 3, 1, nil, nil, nil, nil, nil, nil, nil,
54
+ nil, 14 ]
55
+
56
+ racc_goto_pointer = [
57
+ nil, 12, 2, -2, nil, nil, -5, nil, -6, -7,
58
+ -3, -2, -22, -31, -10 ]
59
+
60
+ racc_goto_default = [
61
+ nil, nil, nil, 9, 1, 2, nil, 3, 34, nil,
62
+ nil, nil, nil, nil, 38 ]
63
+
64
+ racc_reduce_table = [
65
+ 0, 0, :racc_error,
66
+ 1, 19, :_reduce_none,
67
+ 0, 19, :_reduce_2,
68
+ 1, 20, :_reduce_none,
69
+ 1, 20, :_reduce_none,
70
+ 3, 21, :_reduce_5,
71
+ 1, 21, :_reduce_none,
72
+ 3, 22, :_reduce_7,
73
+ 3, 25, :_reduce_8,
74
+ 1, 23, :_reduce_none,
75
+ 1, 23, :_reduce_none,
76
+ 1, 24, :_reduce_none,
77
+ 1, 24, :_reduce_none,
78
+ 1, 24, :_reduce_none,
79
+ 3, 28, :_reduce_14,
80
+ 4, 28, :_reduce_15,
81
+ 1, 29, :_reduce_none,
82
+ 1, 30, :_reduce_none,
83
+ 3, 30, :_reduce_none,
84
+ 1, 31, :_reduce_none,
85
+ 1, 31, :_reduce_none,
86
+ 1, 27, :_reduce_none,
87
+ 3, 27, :_reduce_22,
88
+ 1, 32, :_reduce_none,
89
+ 1, 32, :_reduce_none,
90
+ 1, 32, :_reduce_none,
91
+ 1, 26, :_reduce_none,
92
+ 1, 26, :_reduce_none,
93
+ 1, 26, :_reduce_none,
94
+ 1, 26, :_reduce_none ]
95
+
96
+ racc_reduce_n = 30
97
+
98
+ racc_shift_n = 42
99
+
100
+ racc_token_table = {
101
+ false => 0,
102
+ :error => 1,
103
+ :UMINUS => 2,
104
+ :OPERATOR => 3,
105
+ :CONJUNCTION => 4,
106
+ :LPAREN => 5,
107
+ :RPAREN => 6,
108
+ :STANDARD_FIELD => 7,
109
+ :CUSTOM_FIELD => 8,
110
+ :KEYWORD => 9,
111
+ :COMMA => 10,
112
+ :INTEGER => 11,
113
+ :DECIMAL => 12,
114
+ :CHARACTER => 13,
115
+ :DATE => 14,
116
+ :DATETIME => 15,
117
+ :BOOLEAN => 16,
118
+ :NULL => 17 }
119
+
120
+ racc_nt_base = 18
121
+
122
+ racc_use_result_var = true
123
+
124
+ Racc_arg = [
125
+ racc_action_table,
126
+ racc_action_check,
127
+ racc_action_default,
128
+ racc_action_pointer,
129
+ racc_goto_table,
130
+ racc_goto_check,
131
+ racc_goto_default,
132
+ racc_goto_pointer,
133
+ racc_nt_base,
134
+ racc_reduce_table,
135
+ racc_token_table,
136
+ racc_shift_n,
137
+ racc_reduce_n,
138
+ racc_use_result_var ]
139
+
140
+ Racc_token_to_s_table = [
141
+ "$end",
142
+ "error",
143
+ "UMINUS",
144
+ "OPERATOR",
145
+ "CONJUNCTION",
146
+ "LPAREN",
147
+ "RPAREN",
148
+ "STANDARD_FIELD",
149
+ "CUSTOM_FIELD",
150
+ "KEYWORD",
151
+ "COMMA",
152
+ "INTEGER",
153
+ "DECIMAL",
154
+ "CHARACTER",
155
+ "DATE",
156
+ "DATETIME",
157
+ "BOOLEAN",
158
+ "NULL",
159
+ "$start",
160
+ "target",
161
+ "expressions",
162
+ "expression",
163
+ "conjunction",
164
+ "field",
165
+ "condition",
166
+ "group",
167
+ "literal",
168
+ "literal_list",
169
+ "function",
170
+ "function_name",
171
+ "function_args",
172
+ "function_arg",
173
+ "literals" ]
174
+
175
+ Racc_debug_parser = false
176
+
177
+ ##### State transition tables end #####
178
+
179
+ # reduce 0 omitted
180
+
181
+ # reduce 1 omitted
182
+
183
+ def _reduce_2(val, _values, result)
184
+ result = 0
185
+ result
186
+ end
187
+
188
+ # reduce 3 omitted
189
+
190
+ # reduce 4 omitted
191
+
192
+ def _reduce_5(val, _values, result)
193
+ result = tokenize_expression(val[0], val[1],val[2])
194
+ result
195
+ end
196
+
197
+ # reduce 6 omitted
198
+
199
+ def _reduce_7(val, _values, result)
200
+ result = tokenize_conjunction(val[0], val[1],val[2])
201
+ result
202
+ end
203
+
204
+ def _reduce_8(val, _values, result)
205
+ result = tokenize_group(val[1])
206
+ result
207
+ end
208
+
209
+ # reduce 9 omitted
210
+
211
+ # reduce 10 omitted
212
+
213
+ # reduce 11 omitted
214
+
215
+ # reduce 12 omitted
216
+
217
+ # reduce 13 omitted
218
+
219
+ def _reduce_14(val, _values, result)
220
+ result = tokenize_function(val[0], [])
221
+ result
222
+ end
223
+
224
+ def _reduce_15(val, _values, result)
225
+ result = tokenize_function(val[0], val[2])
226
+ result
227
+ end
228
+
229
+ # reduce 16 omitted
230
+
231
+ # reduce 17 omitted
232
+
233
+ # reduce 18 omitted
234
+
235
+ # reduce 19 omitted
236
+
237
+ # reduce 20 omitted
238
+
239
+ # reduce 21 omitted
240
+
241
+ def _reduce_22(val, _values, result)
242
+ result = tokenize_multiple(val[0], val[2])
243
+ result
244
+ end
245
+
246
+ # reduce 23 omitted
247
+
248
+ # reduce 24 omitted
249
+
250
+ # reduce 25 omitted
251
+
252
+ # reduce 26 omitted
253
+
254
+ # reduce 27 omitted
255
+
256
+ # reduce 28 omitted
257
+
258
+ # reduce 29 omitted
259
+
260
+ def _reduce_none(val, _values, result)
261
+ val[0]
262
+ end
263
+
264
+ end # class Parser
265
+ end # module Sparkql
266
+
267
+
268
+ # END PARSER