sparkql 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +4 -0
- data/GRAMMAR.md +63 -38
- data/VERSION +1 -1
- data/lib/sparkql/lexer.rb +11 -1
- data/lib/sparkql/parser.rb +318 -181
- data/lib/sparkql/parser.y +93 -64
- data/lib/sparkql/parser_tools.rb +122 -10
- data/lib/sparkql/token.rb +8 -1
- data/test/unit/parser_test.rb +258 -0
- metadata +2 -2
data/lib/sparkql/parser.y
CHANGED
@@ -6,7 +6,7 @@ class Sparkql::Parser
|
|
6
6
|
|
7
7
|
###############################################################################
|
8
8
|
# READ THIS!
|
9
|
-
# The grammar documentation is parsed from this file and is in a sensitive
|
9
|
+
# The grammar documentation is parsed from this file and is in a sensitive
|
10
10
|
# syntax between the START_MARKDOWN and STOP_MARKDOWN keywords. In general, all
|
11
11
|
# line comments will be treated as markdown text, and everything else is padded
|
12
12
|
# for code formatting
|
@@ -15,24 +15,26 @@ class Sparkql::Parser
|
|
15
15
|
#START_MARKDOWN
|
16
16
|
|
17
17
|
### SparkQL BNF Grammar
|
18
|
-
#
|
18
|
+
#
|
19
19
|
# This document explains the rules for the Spark API filter language syntax and
|
20
|
-
# is a living document generated from the reference implementation at
|
20
|
+
# is a living document generated from the reference implementation at
|
21
21
|
# https://github.com/sparkapi/sparkql.
|
22
22
|
|
23
23
|
#### Precedence Rules
|
24
|
-
#
|
25
|
-
# Unless otherwise specified, SparkQL follows SQL precendence conventions for
|
24
|
+
#
|
25
|
+
# Unless otherwise specified, SparkQL follows SQL precendence conventions for
|
26
26
|
# operators and conjunctions.
|
27
|
-
#
|
27
|
+
#
|
28
28
|
# Unary minus is always tied to value, such as for negative numbers.
|
29
29
|
prechigh
|
30
30
|
nonassoc UMINUS
|
31
|
+
left MUL DIV MOD
|
32
|
+
left ADD SUB
|
31
33
|
preclow
|
32
|
-
|
34
|
+
|
33
35
|
|
34
36
|
#### Grammar Rules
|
35
|
-
#
|
37
|
+
#
|
36
38
|
# A filter (target) is a composition of filter basic filter expressions.
|
37
39
|
rule
|
38
40
|
target
|
@@ -41,7 +43,7 @@ rule
|
|
41
43
|
;
|
42
44
|
|
43
45
|
##### Expressions
|
44
|
-
#
|
46
|
+
#
|
45
47
|
# One or more expressions
|
46
48
|
expressions
|
47
49
|
: expression
|
@@ -50,66 +52,84 @@ rule
|
|
50
52
|
;
|
51
53
|
|
52
54
|
##### Expression
|
53
|
-
#
|
54
|
-
# The core of the filtering system, the expression requires a field, a condition
|
55
|
-
# and criteria for comparing the value of the field to the value(s) of the
|
56
|
-
# condition. The result of evaluating the expression on a resource is a true of
|
57
|
-
# false for matching the criteria.
|
55
|
+
#
|
56
|
+
# The core of the filtering system, the expression requires a field, a condition
|
57
|
+
# and criteria for comparing the value of the field to the value(s) of the
|
58
|
+
# condition. The result of evaluating the expression on a resource is a true of
|
59
|
+
# false for matching the criteria. We are separating functions and arithmetic
|
60
|
+
# based on if we are acting on the field side or the literal side. This is to
|
61
|
+
# allow literal folding on the literal side.
|
58
62
|
expression
|
59
|
-
:
|
60
|
-
|
|
63
|
+
: field_expression OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
|
64
|
+
| field_expression RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
|
61
65
|
| group
|
62
66
|
;
|
63
|
-
|
67
|
+
|
64
68
|
##### Unary Conjunction
|
65
|
-
#
|
66
|
-
# Some conjunctions don't need to expression at all times (e.g. 'NOT').
|
69
|
+
#
|
70
|
+
# Some conjunctions don't need to expression at all times (e.g. 'NOT').
|
67
71
|
unary_conjunction
|
68
72
|
: UNARY_CONJUNCTION expression { result = tokenize_unary_conjunction(val[0], val[1]) }
|
69
|
-
;
|
70
|
-
|
73
|
+
;
|
74
|
+
|
71
75
|
##### Conjunction
|
72
|
-
#
|
76
|
+
#
|
73
77
|
# Two expressions joined together using a supported conjunction
|
74
78
|
conjunction
|
75
79
|
: expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
|
76
80
|
| expressions UNARY_CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
|
77
81
|
;
|
78
|
-
|
82
|
+
|
79
83
|
##### Group
|
80
|
-
#
|
84
|
+
#
|
81
85
|
# One or more expressions encased in parenthesis. There are limitations on nesting depth at the time of this writing.
|
86
|
+
|
82
87
|
group
|
83
|
-
|
84
|
-
|
88
|
+
: LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
|
89
|
+
;
|
90
|
+
|
91
|
+
field_expression
|
92
|
+
: field_arithmetic_expression
|
93
|
+
;
|
94
|
+
|
95
|
+
field_arithmetic_expression
|
96
|
+
: field_arithmetic_expression ADD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
|
97
|
+
| field_arithmetic_expression SUB field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
|
98
|
+
| field_arithmetic_expression MUL field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
|
99
|
+
| field_arithmetic_expression DIV field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
|
100
|
+
| field_arithmetic_expression MOD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
|
101
|
+
| literals
|
102
|
+
| field_function_expression
|
103
|
+
;
|
104
|
+
|
105
|
+
field_function_expression
|
106
|
+
: field
|
107
|
+
| function
|
108
|
+
;
|
85
109
|
|
86
|
-
##### Field
|
87
|
-
#
|
88
|
-
# Keyword for searching on, these fields should be discovered using the metadata
|
89
|
-
# rules. In general, Keywords that cannot be found will be dropped from the
|
90
|
-
# filter.
|
91
|
-
field
|
92
|
-
: STANDARD_FIELD
|
93
|
-
| CUSTOM_FIELD
|
94
|
-
| function
|
95
|
-
;
|
96
|
-
|
97
110
|
##### Condition
|
98
|
-
#
|
99
|
-
# The determinant of the filter, this is typically a value or set of values of
|
100
|
-
# a type that the field supports (review the field meta data for support).
|
111
|
+
#
|
112
|
+
# The determinant of the filter, this is typically a value or set of values of
|
113
|
+
# a type that the field supports (review the field meta data for support).
|
101
114
|
# Functions are also supported on some field types, and provide more flexibility
|
102
115
|
# on filtering values
|
103
116
|
condition
|
104
|
-
:
|
105
|
-
| literal_function
|
117
|
+
: arithmetic_condition
|
106
118
|
| literal_list { result = tokenize_list(val[0]) }
|
119
|
+
| literal
|
107
120
|
;
|
108
|
-
|
121
|
+
|
122
|
+
arithmetic_condition
|
123
|
+
: condition ADD condition { result = add_fold(val[0], val[2]) }
|
124
|
+
| condition SUB condition { result = sub_fold(val[0], val[2]) }
|
125
|
+
| condition MUL condition { result = mul_fold(val[0], val[2]) }
|
126
|
+
| condition DIV condition { result = div_fold(val[0], val[2]) }
|
127
|
+
| condition MOD condition { result = mod_fold(val[0], val[2]) }
|
128
|
+
|
109
129
|
##### Function
|
110
|
-
#
|
111
|
-
# Functions may replace static values for conditions with supported field
|
112
|
-
# types. Functions may have parameters that match types supported by
|
130
|
+
#
|
131
|
+
# Functions may replace static values for conditions with supported field
|
132
|
+
# types. Functions may have parameters that match types supported by
|
113
133
|
# fields.
|
114
134
|
function
|
115
135
|
: function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
|
@@ -120,13 +140,13 @@ rule
|
|
120
140
|
: function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
|
121
141
|
| function_name LPAREN literal_function_args RPAREN { result = tokenize_function(val[0], val[2]) }
|
122
142
|
;
|
123
|
-
|
143
|
+
|
124
144
|
function_name
|
125
145
|
: KEYWORD
|
126
146
|
;
|
127
|
-
|
147
|
+
|
128
148
|
##### Function Arguments
|
129
|
-
#
|
149
|
+
#
|
130
150
|
# Functions may optionally have a comma delimited list of parameters.
|
131
151
|
function_args
|
132
152
|
: function_arg
|
@@ -134,9 +154,9 @@ rule
|
|
134
154
|
;
|
135
155
|
|
136
156
|
function_arg
|
137
|
-
:
|
157
|
+
: field_function_expression { result = tokenize_field_arg(val[0]) }
|
158
|
+
| literal
|
138
159
|
| literals
|
139
|
-
| field { result = tokenize_field_arg(val[0]) }
|
140
160
|
;
|
141
161
|
|
142
162
|
literal_function_args
|
@@ -147,11 +167,10 @@ rule
|
|
147
167
|
literal_function_arg
|
148
168
|
: literal
|
149
169
|
| literals
|
150
|
-
| literal_function
|
151
170
|
;
|
152
171
|
|
153
172
|
##### Literal List
|
154
|
-
#
|
173
|
+
#
|
155
174
|
# A comma delimited list of functions and values.
|
156
175
|
literal_list
|
157
176
|
: literals
|
@@ -159,17 +178,17 @@ rule
|
|
159
178
|
| literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
|
160
179
|
| literal_list COMMA function { result = tokenize_multiple(val[0], val[2]) }
|
161
180
|
;
|
162
|
-
|
181
|
+
|
163
182
|
##### Range List
|
164
|
-
#
|
165
|
-
# A comma delimited list of values that support ranges for the Between operator
|
183
|
+
#
|
184
|
+
# A comma delimited list of values that support ranges for the Between operator
|
166
185
|
# (see rangeable).
|
167
|
-
range
|
186
|
+
range
|
168
187
|
: rangeable COMMA rangeable { result = tokenize_multiple(val[0], val[2]) }
|
169
188
|
;
|
170
189
|
|
171
190
|
##### Literals
|
172
|
-
#
|
191
|
+
#
|
173
192
|
# Literals that support multiple values in a list for a condition
|
174
193
|
literals
|
175
194
|
: INTEGER
|
@@ -178,9 +197,9 @@ rule
|
|
178
197
|
| LPAREN literals RPAREN { result = val[1] }
|
179
198
|
| UMINUS literals { result = tokenize_literal_negation(val[1]) }
|
180
199
|
;
|
181
|
-
|
200
|
+
|
182
201
|
##### Literal
|
183
|
-
#
|
202
|
+
#
|
184
203
|
# Literals only support a single value in a condition
|
185
204
|
literal
|
186
205
|
: DATE
|
@@ -191,8 +210,8 @@ rule
|
|
191
210
|
;
|
192
211
|
|
193
212
|
##### Range List
|
194
|
-
#
|
195
|
-
# Functions, and literals that can be used in a range
|
213
|
+
#
|
214
|
+
# Functions, and literals that can be used in a range
|
196
215
|
rangeable
|
197
216
|
: INTEGER
|
198
217
|
| DECIMAL
|
@@ -202,9 +221,19 @@ rule
|
|
202
221
|
| function
|
203
222
|
;
|
204
223
|
|
224
|
+
##### Field
|
225
|
+
#
|
226
|
+
# Keyword for searching on, these fields should be discovered using the metadata
|
227
|
+
# rules. In general, Keywords that cannot be found will be dropped from the
|
228
|
+
# filter.
|
229
|
+
field
|
230
|
+
: STANDARD_FIELD
|
231
|
+
| CUSTOM_FIELD
|
232
|
+
;
|
233
|
+
|
205
234
|
#STOP_MARKDOWN
|
206
235
|
|
207
|
-
|
236
|
+
|
208
237
|
end
|
209
238
|
|
210
239
|
---- header
|
@@ -212,7 +241,7 @@ end
|
|
212
241
|
---- inner
|
213
242
|
include Sparkql::ParserTools
|
214
243
|
include Sparkql::ParserCompatibility
|
215
|
-
|
244
|
+
|
216
245
|
---- footer
|
217
246
|
|
218
247
|
# END PARSER
|
data/lib/sparkql/parser_tools.rb
CHANGED
@@ -4,7 +4,8 @@ module Sparkql::ParserTools
|
|
4
4
|
# Coercible types from highest precision to lowest
|
5
5
|
DATE_TYPES = [:datetime, :date]
|
6
6
|
NUMBER_TYPES = [:decimal, :integer]
|
7
|
-
|
7
|
+
ARITHMETIC_TYPES = [:decimal, :integer, :field, :arithmetic]
|
8
|
+
|
8
9
|
def parse(str)
|
9
10
|
@lexer = Sparkql::Lexer.new(str)
|
10
11
|
@expression_count = 0
|
@@ -21,7 +22,34 @@ module Sparkql::ParserTools
|
|
21
22
|
end
|
22
23
|
t
|
23
24
|
end
|
24
|
-
|
25
|
+
|
26
|
+
def arithmetic_field(nested_representation)
|
27
|
+
lhs = nested_representation[:lhs]
|
28
|
+
rhs = nested_representation[:rhs]
|
29
|
+
|
30
|
+
if lhs[:type] == :field
|
31
|
+
lhs[:value]
|
32
|
+
elsif rhs[:type] == :field
|
33
|
+
rhs[:value]
|
34
|
+
elsif lhs.key?(:field)
|
35
|
+
lhs[:field]
|
36
|
+
elsif rhs.key?(:field)
|
37
|
+
rhs[:field]
|
38
|
+
elsif lhs[:type] == :arithmetic
|
39
|
+
arithmetic_field(lhs)
|
40
|
+
elsif rhs[:type] == :arithmetic
|
41
|
+
arithmetic_field(rhs)
|
42
|
+
else
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def no_field_error(field, operator)
|
48
|
+
tokenizer_error(:token => field,
|
49
|
+
:expression => {operator: operator, conjuction: 'And', conjunction_level: 0, level: @lexer.level},
|
50
|
+
:message => "Each expression must evaluate a field", :status => :fatal )
|
51
|
+
end
|
52
|
+
|
25
53
|
def tokenize_expression(field, op, val)
|
26
54
|
operator = get_operator(val,op) unless val.nil?
|
27
55
|
|
@@ -34,6 +62,12 @@ module Sparkql::ParserTools
|
|
34
62
|
end
|
35
63
|
field_manipulations = field
|
36
64
|
field = field[:field]
|
65
|
+
elsif field.is_a?(Hash) && field[:type] == :arithmetic
|
66
|
+
field_manipulations = field
|
67
|
+
field = arithmetic_field(field)
|
68
|
+
no_field_error(field, operator) if field.nil?
|
69
|
+
elsif field.is_a?(Hash)
|
70
|
+
no_field_error(field, operator)
|
37
71
|
end
|
38
72
|
|
39
73
|
custom_field = !field.nil? && field.is_a?(String) && field.start_with?('"')
|
@@ -45,10 +79,13 @@ module Sparkql::ParserTools
|
|
45
79
|
|
46
80
|
if !field_manipulations.nil?
|
47
81
|
# Keeping field_function and field_function_type for backward compatibility with datacon
|
48
|
-
expression.merge!(field_manipulations: field_manipulations
|
49
|
-
|
50
|
-
|
51
|
-
|
82
|
+
expression.merge!(field_manipulations: field_manipulations)
|
83
|
+
|
84
|
+
if field_manipulations[:type] == :function
|
85
|
+
expression.merge!(field_function: field_manipulations[:function_name],
|
86
|
+
field_function_type: field_manipulations[:return_type],
|
87
|
+
args: field_manipulations[:function_parameters])
|
88
|
+
end
|
52
89
|
end
|
53
90
|
|
54
91
|
expression = val.merge(expression) unless val.nil?
|
@@ -93,6 +130,7 @@ module Sparkql::ParserTools
|
|
93
130
|
end
|
94
131
|
|
95
132
|
def tokenize_list(list)
|
133
|
+
return if list.nil?
|
96
134
|
validate_multiple_values list[:value]
|
97
135
|
list[:condition] ||= list[:value]
|
98
136
|
list
|
@@ -147,13 +185,13 @@ module Sparkql::ParserTools
|
|
147
185
|
end
|
148
186
|
|
149
187
|
def tokenize_field_arg(field)
|
150
|
-
if field.is_a?(
|
151
|
-
field
|
152
|
-
else
|
188
|
+
if field.is_a?(String)
|
153
189
|
{
|
154
190
|
:type => :field,
|
155
191
|
:value => field,
|
156
192
|
}
|
193
|
+
else
|
194
|
+
field
|
157
195
|
end
|
158
196
|
end
|
159
197
|
|
@@ -182,7 +220,81 @@ module Sparkql::ParserTools
|
|
182
220
|
result.nil? ? result : result.merge(:condition => "#{name}(#{condition_list.join(',')})")
|
183
221
|
end
|
184
222
|
end
|
185
|
-
|
223
|
+
|
224
|
+
def tokenize_arithmetic(lhs, operator, rhs)
|
225
|
+
lhs = {type: :field, value: lhs} if lhs.is_a?(String)
|
226
|
+
rhs = {type: :field, value: rhs} if rhs.is_a?(String)
|
227
|
+
|
228
|
+
arithmetic_error?(lhs)
|
229
|
+
arithmetic_error?(rhs)
|
230
|
+
{
|
231
|
+
type: :arithmetic,
|
232
|
+
op: operator,
|
233
|
+
lhs: lhs,
|
234
|
+
rhs: rhs
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
def arithmetic_error?(side)
|
239
|
+
side_type = side[:type] == :function ? side[:return_type] : side[:type]
|
240
|
+
return false unless (!ARITHMETIC_TYPES.include?(side_type) || !ARITHMETIC_TYPES.include?(side_type))
|
241
|
+
|
242
|
+
compile_error(:token => side[:value], :expression => side,
|
243
|
+
:message => "Error attempting arithmetic with type: #{side_type}",
|
244
|
+
:status => :fatal, :syntax => false, :constraint => true )
|
245
|
+
true
|
246
|
+
end
|
247
|
+
|
248
|
+
def add_fold(n1, n2)
|
249
|
+
return if arithmetic_error?(n1) || arithmetic_error?(n2)
|
250
|
+
|
251
|
+
{ type: arithmetic_type(n1, n2), value: (escape_value(n1) + escape_value(n2)).to_s }
|
252
|
+
end
|
253
|
+
|
254
|
+
def sub_fold(n1, n2)
|
255
|
+
return if arithmetic_error?(n1) || arithmetic_error?(n2)
|
256
|
+
|
257
|
+
{ type: arithmetic_type(n1, n2), value: (escape_value(n1) - escape_value(n2)).to_s }
|
258
|
+
end
|
259
|
+
|
260
|
+
def mul_fold(n1, n2)
|
261
|
+
return if arithmetic_error?(n1) || arithmetic_error?(n2)
|
262
|
+
|
263
|
+
{ type: arithmetic_type(n1, n2), value: (escape_value(n1) * escape_value(n2)).to_s }
|
264
|
+
end
|
265
|
+
|
266
|
+
def div_fold(n1, n2)
|
267
|
+
return if arithmetic_error?(n1) ||
|
268
|
+
arithmetic_error?(n2) ||
|
269
|
+
zero_error?(n2)
|
270
|
+
|
271
|
+
{ type: arithmetic_type(n1, n2), value: (escape_value(n1) / escape_value(n2)).to_s }
|
272
|
+
end
|
273
|
+
|
274
|
+
def mod_fold(n1, n2)
|
275
|
+
return if arithmetic_error?(n1) ||
|
276
|
+
arithmetic_error?(n2) ||
|
277
|
+
zero_error?(n2)
|
278
|
+
|
279
|
+
{ type: arithmetic_type(n1, n2), value: (escape_value(n1) % escape_value(n2)).to_s }
|
280
|
+
end
|
281
|
+
|
282
|
+
def arithmetic_type(num1, num2)
|
283
|
+
if (num1[:type] == :decimal || num2[:type] == :decimal)
|
284
|
+
:decimal
|
285
|
+
else
|
286
|
+
:integer
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def zero_error?(number)
|
291
|
+
return unless escape_value(number) == 0
|
292
|
+
|
293
|
+
compile_error(:token => "#{number[:value]}", :expression => number,
|
294
|
+
:message => "Error attempting to divide by zero",
|
295
|
+
:status => :fatal, :syntax => false, :constraint => true )
|
296
|
+
end
|
297
|
+
|
186
298
|
def on_error(error_token_id, error_value, value_stack)
|
187
299
|
token_name = token_to_str(error_token_id)
|
188
300
|
token_name.downcase!
|
data/lib/sparkql/token.rb
CHANGED
@@ -4,6 +4,14 @@ module Sparkql::Token
|
|
4
4
|
LPAREN = /\(/
|
5
5
|
RPAREN = /\)/
|
6
6
|
KEYWORD = /[A-Za-z]+/
|
7
|
+
|
8
|
+
ADD = 'Add'
|
9
|
+
SUB = 'Sub'
|
10
|
+
|
11
|
+
MUL = 'Mul'
|
12
|
+
DIV = 'Div'
|
13
|
+
MOD = 'Mod'
|
14
|
+
|
7
15
|
STANDARD_FIELD = /[A-Z]+[A-Za-z0-9]*/
|
8
16
|
CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]*)\")/
|
9
17
|
INTEGER = /^\-?[0-9]+/
|
@@ -20,5 +28,4 @@ module Sparkql::Token
|
|
20
28
|
OPERATORS = ['Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
|
21
29
|
UNARY_CONJUNCTIONS = ['Not']
|
22
30
|
CONJUNCTIONS = ['And','Or']
|
23
|
-
|
24
31
|
end
|