sparkql 1.2.8 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +4 -0
- data/Rakefile +2 -3
- data/VERSION +1 -1
- data/lib/sparkql/errors.rb +68 -71
- data/lib/sparkql/evaluator.rb +13 -9
- data/lib/sparkql/expression_resolver.rb +2 -3
- data/lib/sparkql/expression_state.rb +7 -9
- data/lib/sparkql/function_resolver.rb +15 -10
- data/lib/sparkql/geo/record_circle.rb +1 -1
- data/lib/sparkql/lexer.rb +54 -56
- data/lib/sparkql/parser.rb +35 -35
- data/lib/sparkql/parser_compatibility.rb +97 -76
- data/lib/sparkql/parser_tools.rb +159 -139
- data/lib/sparkql/token.rb +25 -25
- data/lib/sparkql/version.rb +1 -1
- data/sparkql.gemspec +1 -1
- data/test/unit/errors_test.rb +4 -5
- data/test/unit/evaluator_test.rb +15 -16
- data/test/unit/expression_state_test.rb +14 -15
- data/test/unit/function_resolver_test.rb +125 -161
- data/test/unit/geo/record_circle_test.rb +2 -2
- data/test/unit/lexer_test.rb +15 -16
- data/test/unit/parser_compatability_test.rb +177 -151
- data/test/unit/parser_test.rb +90 -90
- metadata +8 -6
@@ -1,6 +1,5 @@
|
|
1
1
|
# Required interface for existing parser implementations
|
2
2
|
module Sparkql::ParserCompatibility
|
3
|
-
|
4
3
|
MAXIMUM_MULTIPLE_VALUES = 200
|
5
4
|
MAXIMUM_EXPRESSIONS = 75
|
6
5
|
MAXIMUM_LEVEL_DEPTH = 2
|
@@ -9,72 +8,71 @@ module Sparkql::ParserCompatibility
|
|
9
8
|
# Ordered by precedence.
|
10
9
|
FILTER_VALUES = [
|
11
10
|
{
|
12
|
-
:
|
13
|
-
:
|
11
|
+
type: :datetime,
|
12
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
14
13
|
},
|
15
14
|
{
|
16
|
-
:
|
17
|
-
:
|
15
|
+
type: :date,
|
16
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
18
17
|
},
|
19
18
|
{
|
20
|
-
:
|
21
|
-
:
|
19
|
+
type: :time,
|
20
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
22
21
|
},
|
23
22
|
{
|
24
|
-
:
|
25
|
-
:
|
26
|
-
:
|
23
|
+
type: :character,
|
24
|
+
multiple: /^'([^'\\]*(\\.[^'\\]*)*)'/,
|
25
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
27
26
|
},
|
28
27
|
{
|
29
|
-
:
|
30
|
-
:
|
31
|
-
:
|
28
|
+
type: :integer,
|
29
|
+
multiple: /^-?[0-9]+/,
|
30
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
32
31
|
},
|
33
32
|
{
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
33
|
+
type: :decimal,
|
34
|
+
multiple: /^-?[0-9]+\.[0-9]+/,
|
35
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
37
36
|
},
|
38
37
|
{
|
39
|
-
:
|
40
|
-
:
|
38
|
+
type: :shape,
|
39
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
41
40
|
},
|
42
41
|
{
|
43
|
-
:
|
44
|
-
:
|
42
|
+
type: :boolean,
|
43
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
45
44
|
},
|
46
45
|
{
|
47
|
-
:
|
48
|
-
:
|
46
|
+
type: :null,
|
47
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
49
48
|
},
|
50
49
|
{
|
51
|
-
:
|
52
|
-
:
|
53
|
-
}
|
54
|
-
]
|
50
|
+
type: :function,
|
51
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
52
|
+
}
|
53
|
+
].freeze
|
55
54
|
|
56
|
-
OPERATORS_SUPPORTING_MULTIPLES = [
|
55
|
+
OPERATORS_SUPPORTING_MULTIPLES = %w[Eq Ne].freeze
|
57
56
|
|
58
57
|
# To be implemented by child class.
|
59
58
|
# Shall return a valid query string for the respective database,
|
60
59
|
# or nil if the source could not be processed. It may be possible to return a valid
|
61
60
|
# SQL string AND have errors ( as checked by errors? ), but this will be left
|
62
61
|
# to the discretion of the child class.
|
63
|
-
def compile(
|
64
|
-
|
62
|
+
def compile(source, mapper)
|
63
|
+
raise NotImplementedError
|
65
64
|
end
|
66
65
|
|
67
66
|
# Returns a list of expressions tokenized in the following format:
|
68
67
|
# [{ :field => IdentifierName, :operator => "Eq", :value => "'Fargo'", :type => :character, :conjunction => "And" }]
|
69
68
|
# This step will set errors if source is not syntactically correct.
|
70
|
-
def tokenize(
|
69
|
+
def tokenize(source)
|
71
70
|
raise ArgumentError, "You must supply a source string to tokenize!" unless source.is_a?(String)
|
72
71
|
|
73
72
|
# Reset the parser error stack
|
74
73
|
@errors = []
|
75
74
|
|
76
|
-
|
77
|
-
expressions
|
75
|
+
self.parse(source)
|
78
76
|
end
|
79
77
|
|
80
78
|
# Returns an array of errors. This is an array of ParserError objects
|
@@ -93,32 +91,36 @@ module Sparkql::ParserCompatibility
|
|
93
91
|
def errors?
|
94
92
|
process_errors.errors?
|
95
93
|
end
|
94
|
+
|
96
95
|
def fatal_errors?
|
97
96
|
process_errors.fatal_errors?
|
98
97
|
end
|
98
|
+
|
99
99
|
def dropped_errors?
|
100
100
|
process_errors.dropped_errors?
|
101
101
|
end
|
102
|
+
|
102
103
|
def recovered_errors?
|
103
104
|
process_errors.recovered_errors?
|
104
105
|
end
|
105
106
|
|
106
|
-
def escape_value_list(
|
107
|
+
def escape_value_list(expression)
|
107
108
|
final_list = []
|
108
|
-
expression[:value].each do |
|
109
|
+
expression[:value].each do |value|
|
109
110
|
new_exp = {
|
110
|
-
:
|
111
|
-
:
|
111
|
+
value: value,
|
112
|
+
type: expression[:type]
|
112
113
|
}
|
113
114
|
final_list << escape_value(new_exp)
|
114
115
|
end
|
115
116
|
expression[:value] = final_list
|
116
117
|
end
|
117
118
|
|
118
|
-
def escape_value(
|
119
|
+
def escape_value(expression)
|
119
120
|
if expression[:value].is_a? Array
|
120
|
-
return escape_value_list(
|
121
|
+
return escape_value_list(expression)
|
121
122
|
end
|
123
|
+
|
122
124
|
case expression[:type]
|
123
125
|
when :character
|
124
126
|
return character_escape(expression[:value])
|
@@ -142,15 +144,15 @@ module Sparkql::ParserCompatibility
|
|
142
144
|
|
143
145
|
# processes escape characters for a given string. May be overridden by
|
144
146
|
# child classes.
|
145
|
-
def character_escape(
|
146
|
-
string.gsub(
|
147
|
+
def character_escape(string)
|
148
|
+
string.gsub(/^'/, '').gsub(/'$/, '').gsub(/\\'/, "'")
|
147
149
|
end
|
148
150
|
|
149
|
-
def integer_escape(
|
151
|
+
def integer_escape(string)
|
150
152
|
string.to_i
|
151
153
|
end
|
152
154
|
|
153
|
-
def decimal_escape(
|
155
|
+
def decimal_escape(string)
|
154
156
|
string.to_f
|
155
157
|
end
|
156
158
|
|
@@ -158,20 +160,29 @@ module Sparkql::ParserCompatibility
|
|
158
160
|
Date.parse(string)
|
159
161
|
end
|
160
162
|
|
163
|
+
# datetime may have timezone info. Given that, we should honor it it when
|
164
|
+
# present or setting an appropriate default when not. Either way, we should
|
165
|
+
# convert to local appropriate for the parser when we're done.
|
166
|
+
#
|
167
|
+
# DateTime in ruby is deprecated as of ruby 3.0. We've switched to the Time
|
168
|
+
# class to be future compatible. The :time type in sparkql != a ruby Time
|
169
|
+
# instance
|
161
170
|
def datetime_escape(string)
|
162
|
-
|
171
|
+
Time.parse(string)
|
163
172
|
end
|
164
173
|
|
174
|
+
# Per the lexer, times don't have any timezone info. When parsing, pick the
|
175
|
+
# proper offset to set things at.
|
165
176
|
def time_escape(string)
|
166
|
-
|
177
|
+
Time.parse("#{string}#{offset}")
|
167
178
|
end
|
168
179
|
|
169
180
|
def boolean_escape(string)
|
170
|
-
"true"
|
181
|
+
string == "true"
|
171
182
|
end
|
172
183
|
|
173
184
|
# Returns the rule hash for a given type
|
174
|
-
def rules_for_type(
|
185
|
+
def rules_for_type(type)
|
175
186
|
FILTER_VALUES.each do |rule|
|
176
187
|
return rule if rule[:type] == type
|
177
188
|
end
|
@@ -179,8 +190,8 @@ module Sparkql::ParserCompatibility
|
|
179
190
|
end
|
180
191
|
|
181
192
|
# true if a given type supports multiple values
|
182
|
-
def supports_multiple?(
|
183
|
-
rules_for_type(type).include?(
|
193
|
+
def supports_multiple?(type)
|
194
|
+
rules_for_type(type).include?(:multiple)
|
184
195
|
end
|
185
196
|
|
186
197
|
# Maximum supported nesting level for the parser filters
|
@@ -202,21 +213,20 @@ module Sparkql::ParserCompatibility
|
|
202
213
|
|
203
214
|
private
|
204
215
|
|
205
|
-
def tokenizer_error(
|
206
|
-
|
216
|
+
def tokenizer_error(error_hash)
|
207
217
|
if @lexer
|
208
218
|
error_hash[:token_index] = @lexer.token_index
|
209
219
|
end
|
210
220
|
|
211
|
-
self.errors << Sparkql::ParserError.new(
|
221
|
+
self.errors << Sparkql::ParserError.new(error_hash)
|
212
222
|
end
|
213
|
-
alias
|
223
|
+
alias compile_error tokenizer_error
|
214
224
|
|
215
225
|
# Checks the type of an expression with what is expected.
|
216
226
|
def check_type!(expression, expected, supports_nulls = true)
|
217
227
|
if (expected == expression[:type] && !expression.key?(:field_manipulations)) ||
|
218
|
-
|
219
|
-
|
228
|
+
(expression.key?(:field_manipulations) && check_function_type?(expression, expected)) ||
|
229
|
+
(supports_nulls && expression[:type] == :null)
|
220
230
|
return true
|
221
231
|
# If the field will be passed into a function,
|
222
232
|
# check the type of the return value of the function
|
@@ -235,7 +245,7 @@ module Sparkql::ParserCompatibility
|
|
235
245
|
expression[:type] = :date
|
236
246
|
expression[:cast] = :datetime
|
237
247
|
if multiple_values?(expression[:value])
|
238
|
-
expression[:value].map!{ |val| coerce_datetime val }
|
248
|
+
expression[:value].map! { |val| coerce_datetime val }
|
239
249
|
else
|
240
250
|
expression[:value] = coerce_datetime expression[:value]
|
241
251
|
end
|
@@ -245,14 +255,15 @@ module Sparkql::ParserCompatibility
|
|
245
255
|
expression[:cast] = :integer
|
246
256
|
return true
|
247
257
|
end
|
258
|
+
|
248
259
|
type_error(expression, expected)
|
249
260
|
false
|
250
261
|
end
|
251
262
|
|
252
|
-
def type_error(
|
253
|
-
|
254
|
-
|
255
|
-
|
263
|
+
def type_error(expression, expected)
|
264
|
+
compile_error(token: expression[:field], expression: expression,
|
265
|
+
message: "expected #{expected} but found #{expression[:type]}",
|
266
|
+
status: :fatal)
|
256
267
|
end
|
257
268
|
|
258
269
|
# If a function is being applied to a field, we check that the return type of
|
@@ -263,15 +274,17 @@ module Sparkql::ParserCompatibility
|
|
263
274
|
end
|
264
275
|
|
265
276
|
def validate_manipulation_types(field_manipulations, expected)
|
266
|
-
|
267
|
-
|
268
|
-
return false
|
277
|
+
case field_manipulations[:type]
|
278
|
+
when :function
|
279
|
+
return false unless supported_function?(field_manipulations[:function_name])
|
280
|
+
|
281
|
+
function = lookup_function(field_manipulations[:function_name])
|
269
282
|
field_manipulations[:args].each_with_index do |arg, index|
|
270
|
-
if arg[:type] == :field
|
271
|
-
return false
|
283
|
+
if arg[:type] == :field && !function[:args][index].include?(:field)
|
284
|
+
return false
|
272
285
|
end
|
273
286
|
end
|
274
|
-
|
287
|
+
when :arithmetic
|
275
288
|
lhs = field_manipulations[:lhs]
|
276
289
|
return false unless validate_side(lhs, expected)
|
277
290
|
|
@@ -285,31 +298,34 @@ module Sparkql::ParserCompatibility
|
|
285
298
|
if side[:type] == :arithmetic
|
286
299
|
return validate_manipulation_types(side, expected)
|
287
300
|
elsif side[:type] == :field
|
288
|
-
return false unless [
|
301
|
+
return false unless %i[decimal integer].include?(expected)
|
289
302
|
elsif side[:type] == :function
|
290
|
-
return false unless [
|
291
|
-
elsif
|
303
|
+
return false unless %i[decimal integer].include?(side[:return_type])
|
304
|
+
elsif !%i[decimal integer].include?(side[:type])
|
292
305
|
return false
|
293
306
|
end
|
307
|
+
|
294
308
|
true
|
295
309
|
end
|
296
310
|
|
297
311
|
# Builds the correct operator based on the type and the value.
|
298
312
|
# default should be the operator provided in the actual filter string
|
299
|
-
def get_operator(expression, default
|
313
|
+
def get_operator(expression, default)
|
300
314
|
f = rules_for_type(expression[:type])
|
301
315
|
if f[:operators].include?(default)
|
302
316
|
if f[:multiple] && range?(expression[:value]) && default == 'Bt'
|
303
317
|
return "Bt"
|
304
318
|
elsif f[:multiple] && multiple_values?(expression[:value])
|
305
319
|
return nil unless operator_supports_multiples?(default)
|
320
|
+
|
306
321
|
return default == "Ne" ? "Not In" : "In"
|
307
322
|
elsif default == "Ne"
|
308
323
|
return "Not Eq"
|
309
324
|
end
|
310
|
-
|
325
|
+
|
326
|
+
default
|
311
327
|
else
|
312
|
-
|
328
|
+
nil
|
313
329
|
end
|
314
330
|
end
|
315
331
|
|
@@ -325,12 +341,17 @@ module Sparkql::ParserCompatibility
|
|
325
341
|
OPERATORS_SUPPORTING_MULTIPLES.include?(operator)
|
326
342
|
end
|
327
343
|
|
328
|
-
|
329
|
-
|
330
|
-
|
344
|
+
# Datetime coercion to date factors in the current time zone when selecting a
|
345
|
+
# date.
|
346
|
+
def coerce_datetime(datetime_string)
|
347
|
+
case datetime_string
|
348
|
+
when /^(\d{4}-\d{2}-\d{2})$/
|
349
|
+
datetime_string
|
350
|
+
when /^(\d{4}-\d{2}-\d{2})/
|
351
|
+
datetime = datetime_escape(datetime_string)
|
352
|
+
datetime.strftime(Sparkql::FunctionResolver::STRFTIME_DATE_FORMAT)
|
331
353
|
else
|
332
|
-
|
354
|
+
datetime_string
|
333
355
|
end
|
334
356
|
end
|
335
|
-
|
336
357
|
end
|