sparkql 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sparkql/parser.y CHANGED
@@ -6,7 +6,7 @@ class Sparkql::Parser
6
6
 
7
7
  ###############################################################################
8
8
  # READ THIS!
9
- # The grammar documentation is parsed from this file and is in a sensitive
9
+ # The grammar documentation is parsed from this file and is in a sensitive
10
10
  # syntax between the START_MARKDOWN and STOP_MARKDOWN keywords. In general, all
11
11
  # line comments will be treated as markdown text, and everything else is padded
12
12
  # for code formatting
@@ -15,24 +15,26 @@ class Sparkql::Parser
15
15
  #START_MARKDOWN
16
16
 
17
17
  ### SparkQL BNF Grammar
18
- #
18
+ #
19
19
  # This document explains the rules for the Spark API filter language syntax and
20
- # is a living document generated from the reference implementation at
20
+ # is a living document generated from the reference implementation at
21
21
  # https://github.com/sparkapi/sparkql.
22
22
 
23
23
  #### Precedence Rules
24
- #
25
- # Unless otherwise specified, SparkQL follows SQL precendence conventions for
24
+ #
25
+ # Unless otherwise specified, SparkQL follows SQL precendence conventions for
26
26
  # operators and conjunctions.
27
- #
27
+ #
28
28
  # Unary minus is always tied to value, such as for negative numbers.
29
29
  prechigh
30
30
  nonassoc UMINUS
31
+ left MUL DIV MOD
32
+ left ADD SUB
31
33
  preclow
32
-
34
+
33
35
 
34
36
  #### Grammar Rules
35
- #
37
+ #
36
38
  # A filter (target) is a composition of filter basic filter expressions.
37
39
  rule
38
40
  target
@@ -41,7 +43,7 @@ rule
41
43
  ;
42
44
 
43
45
  ##### Expressions
44
- #
46
+ #
45
47
  # One or more expressions
46
48
  expressions
47
49
  : expression
@@ -50,66 +52,84 @@ rule
50
52
  ;
51
53
 
52
54
  ##### Expression
53
- #
54
- # The core of the filtering system, the expression requires a field, a condition
55
- # and criteria for comparing the value of the field to the value(s) of the
56
- # condition. The result of evaluating the expression on a resource is a true of
57
- # false for matching the criteria.
55
+ #
56
+ # The core of the filtering system, the expression requires a field, a condition
57
+ # and criteria for comparing the value of the field to the value(s) of the
58
+ # condition. The result of evaluating the expression on a resource is a true of
59
+ # false for matching the criteria. We are separating functions and arithmetic
60
+ # based on if we are acting on the field side or the literal side. This is to
61
+ # allow literal folding on the literal side.
58
62
  expression
59
- : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
60
- | field RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
63
+ : field_expression OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
64
+ | field_expression RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
61
65
  | group
62
66
  ;
63
-
67
+
64
68
  ##### Unary Conjunction
65
- #
66
- # Some conjunctions don't need to expression at all times (e.g. 'NOT').
69
+ #
70
+ # Some conjunctions don't need to expression at all times (e.g. 'NOT').
67
71
  unary_conjunction
68
72
  : UNARY_CONJUNCTION expression { result = tokenize_unary_conjunction(val[0], val[1]) }
69
- ;
70
-
73
+ ;
74
+
71
75
  ##### Conjunction
72
- #
76
+ #
73
77
  # Two expressions joined together using a supported conjunction
74
78
  conjunction
75
79
  : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
76
80
  | expressions UNARY_CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
77
81
  ;
78
-
82
+
79
83
  ##### Group
80
- #
84
+ #
81
85
  # One or more expressions encased in parenthesis. There are limitations on nesting depth at the time of this writing.
86
+
82
87
  group
83
- : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
84
- ;
88
+ : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
89
+ ;
90
+
91
+ field_expression
92
+ : field_arithmetic_expression
93
+ ;
94
+
95
+ field_arithmetic_expression
96
+ : field_arithmetic_expression ADD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
97
+ | field_arithmetic_expression SUB field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
98
+ | field_arithmetic_expression MUL field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
99
+ | field_arithmetic_expression DIV field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
100
+ | field_arithmetic_expression MOD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
101
+ | literals
102
+ | field_function_expression
103
+ ;
104
+
105
+ field_function_expression
106
+ : field
107
+ | function
108
+ ;
85
109
 
86
- ##### Field
87
- #
88
- # Keyword for searching on, these fields should be discovered using the metadata
89
- # rules. In general, Keywords that cannot be found will be dropped from the
90
- # filter.
91
- field
92
- : STANDARD_FIELD
93
- | CUSTOM_FIELD
94
- | function
95
- ;
96
-
97
110
  ##### Condition
98
- #
99
- # The determinant of the filter, this is typically a value or set of values of
100
- # a type that the field supports (review the field meta data for support).
111
+ #
112
+ # The determinant of the filter, this is typically a value or set of values of
113
+ # a type that the field supports (review the field meta data for support).
101
114
  # Functions are also supported on some field types, and provide more flexibility
102
115
  # on filtering values
103
116
  condition
104
- : literal
105
- | literal_function
117
+ : arithmetic_condition
106
118
  | literal_list { result = tokenize_list(val[0]) }
119
+ | literal
107
120
  ;
108
-
121
+
122
+ arithmetic_condition
123
+ : condition ADD condition { result = add_fold(val[0], val[2]) }
124
+ | condition SUB condition { result = sub_fold(val[0], val[2]) }
125
+ | condition MUL condition { result = mul_fold(val[0], val[2]) }
126
+ | condition DIV condition { result = div_fold(val[0], val[2]) }
127
+ | condition MOD condition { result = mod_fold(val[0], val[2]) }
128
+
109
129
  ##### Function
110
- #
111
- # Functions may replace static values for conditions with supported field
112
- # types. Functions may have parameters that match types supported by
130
+ #
131
+ # Functions may replace static values for conditions with supported field
132
+ # types. Functions may have parameters that match types supported by
113
133
  # fields.
114
134
  function
115
135
  : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
@@ -120,13 +140,13 @@ rule
120
140
  : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
121
141
  | function_name LPAREN literal_function_args RPAREN { result = tokenize_function(val[0], val[2]) }
122
142
  ;
123
-
143
+
124
144
  function_name
125
145
  : KEYWORD
126
146
  ;
127
-
147
+
128
148
  ##### Function Arguments
129
- #
149
+ #
130
150
  # Functions may optionally have a comma delimited list of parameters.
131
151
  function_args
132
152
  : function_arg
@@ -134,9 +154,9 @@ rule
134
154
  ;
135
155
 
136
156
  function_arg
137
- : literal
157
+ : field_function_expression { result = tokenize_field_arg(val[0]) }
158
+ | literal
138
159
  | literals
139
- | field { result = tokenize_field_arg(val[0]) }
140
160
  ;
141
161
 
142
162
  literal_function_args
@@ -147,11 +167,10 @@ rule
147
167
  literal_function_arg
148
168
  : literal
149
169
  | literals
150
- | literal_function
151
170
  ;
152
171
 
153
172
  ##### Literal List
154
- #
173
+ #
155
174
  # A comma delimited list of functions and values.
156
175
  literal_list
157
176
  : literals
@@ -159,17 +178,17 @@ rule
159
178
  | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
160
179
  | literal_list COMMA function { result = tokenize_multiple(val[0], val[2]) }
161
180
  ;
162
-
181
+
163
182
  ##### Range List
164
- #
165
- # A comma delimited list of values that support ranges for the Between operator
183
+ #
184
+ # A comma delimited list of values that support ranges for the Between operator
166
185
  # (see rangeable).
167
- range
186
+ range
168
187
  : rangeable COMMA rangeable { result = tokenize_multiple(val[0], val[2]) }
169
188
  ;
170
189
 
171
190
  ##### Literals
172
- #
191
+ #
173
192
  # Literals that support multiple values in a list for a condition
174
193
  literals
175
194
  : INTEGER
@@ -178,9 +197,9 @@ rule
178
197
  | LPAREN literals RPAREN { result = val[1] }
179
198
  | UMINUS literals { result = tokenize_literal_negation(val[1]) }
180
199
  ;
181
-
200
+
182
201
  ##### Literal
183
- #
202
+ #
184
203
  # Literals only support a single value in a condition
185
204
  literal
186
205
  : DATE
@@ -191,8 +210,8 @@ rule
191
210
  ;
192
211
 
193
212
  ##### Range List
194
- #
195
- # Functions, and literals that can be used in a range
213
+ #
214
+ # Functions, and literals that can be used in a range
196
215
  rangeable
197
216
  : INTEGER
198
217
  | DECIMAL
@@ -202,9 +221,19 @@ rule
202
221
  | function
203
222
  ;
204
223
 
224
+ ##### Field
225
+ #
226
+ # Keyword for searching on, these fields should be discovered using the metadata
227
+ # rules. In general, Keywords that cannot be found will be dropped from the
228
+ # filter.
229
+ field
230
+ : STANDARD_FIELD
231
+ | CUSTOM_FIELD
232
+ ;
233
+
205
234
  #STOP_MARKDOWN
206
235
 
207
-
236
+
208
237
  end
209
238
 
210
239
  ---- header
@@ -212,7 +241,7 @@ end
212
241
  ---- inner
213
242
  include Sparkql::ParserTools
214
243
  include Sparkql::ParserCompatibility
215
-
244
+
216
245
  ---- footer
217
246
 
218
247
  # END PARSER
@@ -4,7 +4,8 @@ module Sparkql::ParserTools
4
4
  # Coercible types from highest precision to lowest
5
5
  DATE_TYPES = [:datetime, :date]
6
6
  NUMBER_TYPES = [:decimal, :integer]
7
-
7
+ ARITHMETIC_TYPES = [:decimal, :integer, :field, :arithmetic]
8
+
8
9
  def parse(str)
9
10
  @lexer = Sparkql::Lexer.new(str)
10
11
  @expression_count = 0
@@ -21,7 +22,34 @@ module Sparkql::ParserTools
21
22
  end
22
23
  t
23
24
  end
24
-
25
+
26
+ def arithmetic_field(nested_representation)
27
+ lhs = nested_representation[:lhs]
28
+ rhs = nested_representation[:rhs]
29
+
30
+ if lhs[:type] == :field
31
+ lhs[:value]
32
+ elsif rhs[:type] == :field
33
+ rhs[:value]
34
+ elsif lhs.key?(:field)
35
+ lhs[:field]
36
+ elsif rhs.key?(:field)
37
+ rhs[:field]
38
+ elsif lhs[:type] == :arithmetic
39
+ arithmetic_field(lhs)
40
+ elsif rhs[:type] == :arithmetic
41
+ arithmetic_field(rhs)
42
+ else
43
+ nil
44
+ end
45
+ end
46
+
47
+ def no_field_error(field, operator)
48
+ tokenizer_error(:token => field,
49
+ :expression => {operator: operator, conjuction: 'And', conjunction_level: 0, level: @lexer.level},
50
+ :message => "Each expression must evaluate a field", :status => :fatal )
51
+ end
52
+
25
53
  def tokenize_expression(field, op, val)
26
54
  operator = get_operator(val,op) unless val.nil?
27
55
 
@@ -34,6 +62,12 @@ module Sparkql::ParserTools
34
62
  end
35
63
  field_manipulations = field
36
64
  field = field[:field]
65
+ elsif field.is_a?(Hash) && field[:type] == :arithmetic
66
+ field_manipulations = field
67
+ field = arithmetic_field(field)
68
+ no_field_error(field, operator) if field.nil?
69
+ elsif field.is_a?(Hash)
70
+ no_field_error(field, operator)
37
71
  end
38
72
 
39
73
  custom_field = !field.nil? && field.is_a?(String) && field.start_with?('"')
@@ -45,10 +79,13 @@ module Sparkql::ParserTools
45
79
 
46
80
  if !field_manipulations.nil?
47
81
  # Keeping field_function and field_function_type for backward compatibility with datacon
48
- expression.merge!(field_manipulations: field_manipulations,
49
- field_function: field_manipulations[:function_name],
50
- field_function_type: field_manipulations[:return_type],
51
- args: field_manipulations[:function_parameters])
82
+ expression.merge!(field_manipulations: field_manipulations)
83
+
84
+ if field_manipulations[:type] == :function
85
+ expression.merge!(field_function: field_manipulations[:function_name],
86
+ field_function_type: field_manipulations[:return_type],
87
+ args: field_manipulations[:function_parameters])
88
+ end
52
89
  end
53
90
 
54
91
  expression = val.merge(expression) unless val.nil?
@@ -93,6 +130,7 @@ module Sparkql::ParserTools
93
130
  end
94
131
 
95
132
  def tokenize_list(list)
133
+ return if list.nil?
96
134
  validate_multiple_values list[:value]
97
135
  list[:condition] ||= list[:value]
98
136
  list
@@ -147,13 +185,13 @@ module Sparkql::ParserTools
147
185
  end
148
186
 
149
187
  def tokenize_field_arg(field)
150
- if field.is_a?(Hash) && field[:type] == :function
151
- field
152
- else
188
+ if field.is_a?(String)
153
189
  {
154
190
  :type => :field,
155
191
  :value => field,
156
192
  }
193
+ else
194
+ field
157
195
  end
158
196
  end
159
197
 
@@ -182,7 +220,81 @@ module Sparkql::ParserTools
182
220
  result.nil? ? result : result.merge(:condition => "#{name}(#{condition_list.join(',')})")
183
221
  end
184
222
  end
185
-
223
+
224
+ def tokenize_arithmetic(lhs, operator, rhs)
225
+ lhs = {type: :field, value: lhs} if lhs.is_a?(String)
226
+ rhs = {type: :field, value: rhs} if rhs.is_a?(String)
227
+
228
+ arithmetic_error?(lhs)
229
+ arithmetic_error?(rhs)
230
+ {
231
+ type: :arithmetic,
232
+ op: operator,
233
+ lhs: lhs,
234
+ rhs: rhs
235
+ }
236
+ end
237
+
238
+ def arithmetic_error?(side)
239
+ side_type = side[:type] == :function ? side[:return_type] : side[:type]
240
+ return false unless (!ARITHMETIC_TYPES.include?(side_type) || !ARITHMETIC_TYPES.include?(side_type))
241
+
242
+ compile_error(:token => side[:value], :expression => side,
243
+ :message => "Error attempting arithmetic with type: #{side_type}",
244
+ :status => :fatal, :syntax => false, :constraint => true )
245
+ true
246
+ end
247
+
248
+ def add_fold(n1, n2)
249
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
250
+
251
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) + escape_value(n2)).to_s }
252
+ end
253
+
254
+ def sub_fold(n1, n2)
255
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
256
+
257
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) - escape_value(n2)).to_s }
258
+ end
259
+
260
+ def mul_fold(n1, n2)
261
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
262
+
263
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) * escape_value(n2)).to_s }
264
+ end
265
+
266
+ def div_fold(n1, n2)
267
+ return if arithmetic_error?(n1) ||
268
+ arithmetic_error?(n2) ||
269
+ zero_error?(n2)
270
+
271
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) / escape_value(n2)).to_s }
272
+ end
273
+
274
+ def mod_fold(n1, n2)
275
+ return if arithmetic_error?(n1) ||
276
+ arithmetic_error?(n2) ||
277
+ zero_error?(n2)
278
+
279
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) % escape_value(n2)).to_s }
280
+ end
281
+
282
+ def arithmetic_type(num1, num2)
283
+ if (num1[:type] == :decimal || num2[:type] == :decimal)
284
+ :decimal
285
+ else
286
+ :integer
287
+ end
288
+ end
289
+
290
+ def zero_error?(number)
291
+ return unless escape_value(number) == 0
292
+
293
+ compile_error(:token => "#{number[:value]}", :expression => number,
294
+ :message => "Error attempting to divide by zero",
295
+ :status => :fatal, :syntax => false, :constraint => true )
296
+ end
297
+
186
298
  def on_error(error_token_id, error_value, value_stack)
187
299
  token_name = token_to_str(error_token_id)
188
300
  token_name.downcase!
data/lib/sparkql/token.rb CHANGED
@@ -4,6 +4,14 @@ module Sparkql::Token
4
4
  LPAREN = /\(/
5
5
  RPAREN = /\)/
6
6
  KEYWORD = /[A-Za-z]+/
7
+
8
+ ADD = 'Add'
9
+ SUB = 'Sub'
10
+
11
+ MUL = 'Mul'
12
+ DIV = 'Div'
13
+ MOD = 'Mod'
14
+
7
15
  STANDARD_FIELD = /[A-Z]+[A-Za-z0-9]*/
8
16
  CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]*)\")/
9
17
  INTEGER = /^\-?[0-9]+/
@@ -20,5 +28,4 @@ module Sparkql::Token
20
28
  OPERATORS = ['Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
21
29
  UNARY_CONJUNCTIONS = ['Not']
22
30
  CONJUNCTIONS = ['And','Or']
23
-
24
31
  end