sparkql 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sparkql/parser.y CHANGED
@@ -6,7 +6,7 @@ class Sparkql::Parser
6
6
 
7
7
  ###############################################################################
8
8
  # READ THIS!
9
- # The grammar documentation is parsed from this file and is in a sensitive
9
+ # The grammar documentation is parsed from this file and is in a sensitive
10
10
  # syntax between the START_MARKDOWN and STOP_MARKDOWN keywords. In general, all
11
11
  # line comments will be treated as markdown text, and everything else is padded
12
12
  # for code formatting
@@ -15,24 +15,26 @@ class Sparkql::Parser
15
15
  #START_MARKDOWN
16
16
 
17
17
  ### SparkQL BNF Grammar
18
- #
18
+ #
19
19
  # This document explains the rules for the Spark API filter language syntax and
20
- # is a living document generated from the reference implementation at
20
+ # is a living document generated from the reference implementation at
21
21
  # https://github.com/sparkapi/sparkql.
22
22
 
23
23
  #### Precedence Rules
24
- #
25
- # Unless otherwise specified, SparkQL follows SQL precendence conventions for
24
+ #
25
+ # Unless otherwise specified, SparkQL follows SQL precendence conventions for
26
26
  # operators and conjunctions.
27
- #
27
+ #
28
28
  # Unary minus is always tied to value, such as for negative numbers.
29
29
  prechigh
30
30
  nonassoc UMINUS
31
+ left MUL DIV MOD
32
+ left ADD SUB
31
33
  preclow
32
-
34
+
33
35
 
34
36
  #### Grammar Rules
35
- #
37
+ #
36
38
  # A filter (target) is a composition of filter basic filter expressions.
37
39
  rule
38
40
  target
@@ -41,7 +43,7 @@ rule
41
43
  ;
42
44
 
43
45
  ##### Expressions
44
- #
46
+ #
45
47
  # One or more expressions
46
48
  expressions
47
49
  : expression
@@ -50,66 +52,84 @@ rule
50
52
  ;
51
53
 
52
54
  ##### Expression
53
- #
54
- # The core of the filtering system, the expression requires a field, a condition
55
- # and criteria for comparing the value of the field to the value(s) of the
56
- # condition. The result of evaluating the expression on a resource is a true of
57
- # false for matching the criteria.
55
+ #
56
+ # The core of the filtering system, the expression requires a field, a condition
57
+ # and criteria for comparing the value of the field to the value(s) of the
58
+ # condition. The result of evaluating the expression on a resource is a true of
59
+ # false for matching the criteria. We are separating functions and arithmetic
60
+ # based on if we are acting on the field side or the literal side. This is to
61
+ # allow literal folding on the literal side.
58
62
  expression
59
- : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
60
- | field RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
63
+ : field_expression OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
64
+ | field_expression RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
61
65
  | group
62
66
  ;
63
-
67
+
64
68
  ##### Unary Conjunction
65
- #
66
- # Some conjunctions don't need to expression at all times (e.g. 'NOT').
69
+ #
70
+ # Some conjunctions don't need to expression at all times (e.g. 'NOT').
67
71
  unary_conjunction
68
72
  : UNARY_CONJUNCTION expression { result = tokenize_unary_conjunction(val[0], val[1]) }
69
- ;
70
-
73
+ ;
74
+
71
75
  ##### Conjunction
72
- #
76
+ #
73
77
  # Two expressions joined together using a supported conjunction
74
78
  conjunction
75
79
  : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
76
80
  | expressions UNARY_CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
77
81
  ;
78
-
82
+
79
83
  ##### Group
80
- #
84
+ #
81
85
  # One or more expressions encased in parenthesis. There are limitations on nesting depth at the time of this writing.
86
+
82
87
  group
83
- : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
84
- ;
88
+ : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
89
+ ;
90
+
91
+ field_expression
92
+ : field_arithmetic_expression
93
+ ;
94
+
95
+ field_arithmetic_expression
96
+ : field_arithmetic_expression ADD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
97
+ | field_arithmetic_expression SUB field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
98
+ | field_arithmetic_expression MUL field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
99
+ | field_arithmetic_expression DIV field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
100
+ | field_arithmetic_expression MOD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
101
+ | literals
102
+ | field_function_expression
103
+ ;
104
+
105
+ field_function_expression
106
+ : field
107
+ | function
108
+ ;
85
109
 
86
- ##### Field
87
- #
88
- # Keyword for searching on, these fields should be discovered using the metadata
89
- # rules. In general, Keywords that cannot be found will be dropped from the
90
- # filter.
91
- field
92
- : STANDARD_FIELD
93
- | CUSTOM_FIELD
94
- | function
95
- ;
96
-
97
110
  ##### Condition
98
- #
99
- # The determinant of the filter, this is typically a value or set of values of
100
- # a type that the field supports (review the field meta data for support).
111
+ #
112
+ # The determinant of the filter, this is typically a value or set of values of
113
+ # a type that the field supports (review the field meta data for support).
101
114
  # Functions are also supported on some field types, and provide more flexibility
102
115
  # on filtering values
103
116
  condition
104
- : literal
105
- | literal_function
117
+ : arithmetic_condition
106
118
  | literal_list { result = tokenize_list(val[0]) }
119
+ | literal
107
120
  ;
108
-
121
+
122
+ arithmetic_condition
123
+ : condition ADD condition { result = add_fold(val[0], val[2]) }
124
+ | condition SUB condition { result = sub_fold(val[0], val[2]) }
125
+ | condition MUL condition { result = mul_fold(val[0], val[2]) }
126
+ | condition DIV condition { result = div_fold(val[0], val[2]) }
127
+ | condition MOD condition { result = mod_fold(val[0], val[2]) }
128
+
109
129
  ##### Function
110
- #
111
- # Functions may replace static values for conditions with supported field
112
- # types. Functions may have parameters that match types supported by
130
+ #
131
+ # Functions may replace static values for conditions with supported field
132
+ # types. Functions may have parameters that match types supported by
113
133
  # fields.
114
134
  function
115
135
  : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
@@ -120,13 +140,13 @@ rule
120
140
  : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
121
141
  | function_name LPAREN literal_function_args RPAREN { result = tokenize_function(val[0], val[2]) }
122
142
  ;
123
-
143
+
124
144
  function_name
125
145
  : KEYWORD
126
146
  ;
127
-
147
+
128
148
  ##### Function Arguments
129
- #
149
+ #
130
150
  # Functions may optionally have a comma delimited list of parameters.
131
151
  function_args
132
152
  : function_arg
@@ -134,9 +154,9 @@ rule
134
154
  ;
135
155
 
136
156
  function_arg
137
- : literal
157
+ : field_function_expression { result = tokenize_field_arg(val[0]) }
158
+ | literal
138
159
  | literals
139
- | field { result = tokenize_field_arg(val[0]) }
140
160
  ;
141
161
 
142
162
  literal_function_args
@@ -147,11 +167,10 @@ rule
147
167
  literal_function_arg
148
168
  : literal
149
169
  | literals
150
- | literal_function
151
170
  ;
152
171
 
153
172
  ##### Literal List
154
- #
173
+ #
155
174
  # A comma delimited list of functions and values.
156
175
  literal_list
157
176
  : literals
@@ -159,17 +178,17 @@ rule
159
178
  | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
160
179
  | literal_list COMMA function { result = tokenize_multiple(val[0], val[2]) }
161
180
  ;
162
-
181
+
163
182
  ##### Range List
164
- #
165
- # A comma delimited list of values that support ranges for the Between operator
183
+ #
184
+ # A comma delimited list of values that support ranges for the Between operator
166
185
  # (see rangeable).
167
- range
186
+ range
168
187
  : rangeable COMMA rangeable { result = tokenize_multiple(val[0], val[2]) }
169
188
  ;
170
189
 
171
190
  ##### Literals
172
- #
191
+ #
173
192
  # Literals that support multiple values in a list for a condition
174
193
  literals
175
194
  : INTEGER
@@ -178,9 +197,9 @@ rule
178
197
  | LPAREN literals RPAREN { result = val[1] }
179
198
  | UMINUS literals { result = tokenize_literal_negation(val[1]) }
180
199
  ;
181
-
200
+
182
201
  ##### Literal
183
- #
202
+ #
184
203
  # Literals only support a single value in a condition
185
204
  literal
186
205
  : DATE
@@ -191,8 +210,8 @@ rule
191
210
  ;
192
211
 
193
212
  ##### Range List
194
- #
195
- # Functions, and literals that can be used in a range
213
+ #
214
+ # Functions, and literals that can be used in a range
196
215
  rangeable
197
216
  : INTEGER
198
217
  | DECIMAL
@@ -202,9 +221,19 @@ rule
202
221
  | function
203
222
  ;
204
223
 
224
+ ##### Field
225
+ #
226
+ # Keyword for searching on, these fields should be discovered using the metadata
227
+ # rules. In general, Keywords that cannot be found will be dropped from the
228
+ # filter.
229
+ field
230
+ : STANDARD_FIELD
231
+ | CUSTOM_FIELD
232
+ ;
233
+
205
234
  #STOP_MARKDOWN
206
235
 
207
-
236
+
208
237
  end
209
238
 
210
239
  ---- header
@@ -212,7 +241,7 @@ end
212
241
  ---- inner
213
242
  include Sparkql::ParserTools
214
243
  include Sparkql::ParserCompatibility
215
-
244
+
216
245
  ---- footer
217
246
 
218
247
  # END PARSER
@@ -4,7 +4,8 @@ module Sparkql::ParserTools
4
4
  # Coercible types from highest precision to lowest
5
5
  DATE_TYPES = [:datetime, :date]
6
6
  NUMBER_TYPES = [:decimal, :integer]
7
-
7
+ ARITHMETIC_TYPES = [:decimal, :integer, :field, :arithmetic]
8
+
8
9
  def parse(str)
9
10
  @lexer = Sparkql::Lexer.new(str)
10
11
  @expression_count = 0
@@ -21,7 +22,34 @@ module Sparkql::ParserTools
21
22
  end
22
23
  t
23
24
  end
24
-
25
+
26
+ def arithmetic_field(nested_representation)
27
+ lhs = nested_representation[:lhs]
28
+ rhs = nested_representation[:rhs]
29
+
30
+ if lhs[:type] == :field
31
+ lhs[:value]
32
+ elsif rhs[:type] == :field
33
+ rhs[:value]
34
+ elsif lhs.key?(:field)
35
+ lhs[:field]
36
+ elsif rhs.key?(:field)
37
+ rhs[:field]
38
+ elsif lhs[:type] == :arithmetic
39
+ arithmetic_field(lhs)
40
+ elsif rhs[:type] == :arithmetic
41
+ arithmetic_field(rhs)
42
+ else
43
+ nil
44
+ end
45
+ end
46
+
47
+ def no_field_error(field, operator)
48
+ tokenizer_error(:token => field,
49
+ :expression => {operator: operator, conjuction: 'And', conjunction_level: 0, level: @lexer.level},
50
+ :message => "Each expression must evaluate a field", :status => :fatal )
51
+ end
52
+
25
53
  def tokenize_expression(field, op, val)
26
54
  operator = get_operator(val,op) unless val.nil?
27
55
 
@@ -34,6 +62,12 @@ module Sparkql::ParserTools
34
62
  end
35
63
  field_manipulations = field
36
64
  field = field[:field]
65
+ elsif field.is_a?(Hash) && field[:type] == :arithmetic
66
+ field_manipulations = field
67
+ field = arithmetic_field(field)
68
+ no_field_error(field, operator) if field.nil?
69
+ elsif field.is_a?(Hash)
70
+ no_field_error(field, operator)
37
71
  end
38
72
 
39
73
  custom_field = !field.nil? && field.is_a?(String) && field.start_with?('"')
@@ -45,10 +79,13 @@ module Sparkql::ParserTools
45
79
 
46
80
  if !field_manipulations.nil?
47
81
  # Keeping field_function and field_function_type for backward compatibility with datacon
48
- expression.merge!(field_manipulations: field_manipulations,
49
- field_function: field_manipulations[:function_name],
50
- field_function_type: field_manipulations[:return_type],
51
- args: field_manipulations[:function_parameters])
82
+ expression.merge!(field_manipulations: field_manipulations)
83
+
84
+ if field_manipulations[:type] == :function
85
+ expression.merge!(field_function: field_manipulations[:function_name],
86
+ field_function_type: field_manipulations[:return_type],
87
+ args: field_manipulations[:function_parameters])
88
+ end
52
89
  end
53
90
 
54
91
  expression = val.merge(expression) unless val.nil?
@@ -93,6 +130,7 @@ module Sparkql::ParserTools
93
130
  end
94
131
 
95
132
  def tokenize_list(list)
133
+ return if list.nil?
96
134
  validate_multiple_values list[:value]
97
135
  list[:condition] ||= list[:value]
98
136
  list
@@ -147,13 +185,13 @@ module Sparkql::ParserTools
147
185
  end
148
186
 
149
187
  def tokenize_field_arg(field)
150
- if field.is_a?(Hash) && field[:type] == :function
151
- field
152
- else
188
+ if field.is_a?(String)
153
189
  {
154
190
  :type => :field,
155
191
  :value => field,
156
192
  }
193
+ else
194
+ field
157
195
  end
158
196
  end
159
197
 
@@ -182,7 +220,81 @@ module Sparkql::ParserTools
182
220
  result.nil? ? result : result.merge(:condition => "#{name}(#{condition_list.join(',')})")
183
221
  end
184
222
  end
185
-
223
+
224
+ def tokenize_arithmetic(lhs, operator, rhs)
225
+ lhs = {type: :field, value: lhs} if lhs.is_a?(String)
226
+ rhs = {type: :field, value: rhs} if rhs.is_a?(String)
227
+
228
+ arithmetic_error?(lhs)
229
+ arithmetic_error?(rhs)
230
+ {
231
+ type: :arithmetic,
232
+ op: operator,
233
+ lhs: lhs,
234
+ rhs: rhs
235
+ }
236
+ end
237
+
238
+ def arithmetic_error?(side)
239
+ side_type = side[:type] == :function ? side[:return_type] : side[:type]
240
+ return false unless (!ARITHMETIC_TYPES.include?(side_type) || !ARITHMETIC_TYPES.include?(side_type))
241
+
242
+ compile_error(:token => side[:value], :expression => side,
243
+ :message => "Error attempting arithmetic with type: #{side_type}",
244
+ :status => :fatal, :syntax => false, :constraint => true )
245
+ true
246
+ end
247
+
248
+ def add_fold(n1, n2)
249
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
250
+
251
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) + escape_value(n2)).to_s }
252
+ end
253
+
254
+ def sub_fold(n1, n2)
255
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
256
+
257
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) - escape_value(n2)).to_s }
258
+ end
259
+
260
+ def mul_fold(n1, n2)
261
+ return if arithmetic_error?(n1) || arithmetic_error?(n2)
262
+
263
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) * escape_value(n2)).to_s }
264
+ end
265
+
266
+ def div_fold(n1, n2)
267
+ return if arithmetic_error?(n1) ||
268
+ arithmetic_error?(n2) ||
269
+ zero_error?(n2)
270
+
271
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) / escape_value(n2)).to_s }
272
+ end
273
+
274
+ def mod_fold(n1, n2)
275
+ return if arithmetic_error?(n1) ||
276
+ arithmetic_error?(n2) ||
277
+ zero_error?(n2)
278
+
279
+ { type: arithmetic_type(n1, n2), value: (escape_value(n1) % escape_value(n2)).to_s }
280
+ end
281
+
282
+ def arithmetic_type(num1, num2)
283
+ if (num1[:type] == :decimal || num2[:type] == :decimal)
284
+ :decimal
285
+ else
286
+ :integer
287
+ end
288
+ end
289
+
290
+ def zero_error?(number)
291
+ return unless escape_value(number) == 0
292
+
293
+ compile_error(:token => "#{number[:value]}", :expression => number,
294
+ :message => "Error attempting to divide by zero",
295
+ :status => :fatal, :syntax => false, :constraint => true )
296
+ end
297
+
186
298
  def on_error(error_token_id, error_value, value_stack)
187
299
  token_name = token_to_str(error_token_id)
188
300
  token_name.downcase!
data/lib/sparkql/token.rb CHANGED
@@ -4,6 +4,14 @@ module Sparkql::Token
4
4
  LPAREN = /\(/
5
5
  RPAREN = /\)/
6
6
  KEYWORD = /[A-Za-z]+/
7
+
8
+ ADD = 'Add'
9
+ SUB = 'Sub'
10
+
11
+ MUL = 'Mul'
12
+ DIV = 'Div'
13
+ MOD = 'Mod'
14
+
7
15
  STANDARD_FIELD = /[A-Z]+[A-Za-z0-9]*/
8
16
  CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]*)\")/
9
17
  INTEGER = /^\-?[0-9]+/
@@ -20,5 +28,4 @@ module Sparkql::Token
20
28
  OPERATORS = ['Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
21
29
  UNARY_CONJUNCTIONS = ['Not']
22
30
  CONJUNCTIONS = ['And','Or']
23
-
24
31
  end