sparkql 1.2.8 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +4 -0
- data/Rakefile +2 -3
- data/VERSION +1 -1
- data/lib/sparkql/errors.rb +68 -71
- data/lib/sparkql/evaluator.rb +13 -9
- data/lib/sparkql/expression_resolver.rb +2 -3
- data/lib/sparkql/expression_state.rb +7 -9
- data/lib/sparkql/function_resolver.rb +15 -10
- data/lib/sparkql/geo/record_circle.rb +1 -1
- data/lib/sparkql/lexer.rb +54 -56
- data/lib/sparkql/parser.rb +35 -35
- data/lib/sparkql/parser_compatibility.rb +97 -76
- data/lib/sparkql/parser_tools.rb +159 -139
- data/lib/sparkql/token.rb +25 -25
- data/lib/sparkql/version.rb +1 -1
- data/sparkql.gemspec +1 -1
- data/test/unit/errors_test.rb +4 -5
- data/test/unit/evaluator_test.rb +15 -16
- data/test/unit/expression_state_test.rb +14 -15
- data/test/unit/function_resolver_test.rb +125 -161
- data/test/unit/geo/record_circle_test.rb +2 -2
- data/test/unit/lexer_test.rb +15 -16
- data/test/unit/parser_compatability_test.rb +177 -151
- data/test/unit/parser_test.rb +90 -90
- metadata +8 -6
@@ -1,6 +1,5 @@
|
|
1
1
|
# Required interface for existing parser implementations
|
2
2
|
module Sparkql::ParserCompatibility
|
3
|
-
|
4
3
|
MAXIMUM_MULTIPLE_VALUES = 200
|
5
4
|
MAXIMUM_EXPRESSIONS = 75
|
6
5
|
MAXIMUM_LEVEL_DEPTH = 2
|
@@ -9,72 +8,71 @@ module Sparkql::ParserCompatibility
|
|
9
8
|
# Ordered by precedence.
|
10
9
|
FILTER_VALUES = [
|
11
10
|
{
|
12
|
-
:
|
13
|
-
:
|
11
|
+
type: :datetime,
|
12
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
14
13
|
},
|
15
14
|
{
|
16
|
-
:
|
17
|
-
:
|
15
|
+
type: :date,
|
16
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
18
17
|
},
|
19
18
|
{
|
20
|
-
:
|
21
|
-
:
|
19
|
+
type: :time,
|
20
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
22
21
|
},
|
23
22
|
{
|
24
|
-
:
|
25
|
-
:
|
26
|
-
:
|
23
|
+
type: :character,
|
24
|
+
multiple: /^'([^'\\]*(\\.[^'\\]*)*)'/,
|
25
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
27
26
|
},
|
28
27
|
{
|
29
|
-
:
|
30
|
-
:
|
31
|
-
:
|
28
|
+
type: :integer,
|
29
|
+
multiple: /^-?[0-9]+/,
|
30
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
32
31
|
},
|
33
32
|
{
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
33
|
+
type: :decimal,
|
34
|
+
multiple: /^-?[0-9]+\.[0-9]+/,
|
35
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
37
36
|
},
|
38
37
|
{
|
39
|
-
:
|
40
|
-
:
|
38
|
+
type: :shape,
|
39
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
41
40
|
},
|
42
41
|
{
|
43
|
-
:
|
44
|
-
:
|
42
|
+
type: :boolean,
|
43
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
45
44
|
},
|
46
45
|
{
|
47
|
-
:
|
48
|
-
:
|
46
|
+
type: :null,
|
47
|
+
operators: Sparkql::Token::EQUALITY_OPERATORS
|
49
48
|
},
|
50
49
|
{
|
51
|
-
:
|
52
|
-
:
|
53
|
-
}
|
54
|
-
]
|
50
|
+
type: :function,
|
51
|
+
operators: Sparkql::Token::OPERATORS + [Sparkql::Token::RANGE_OPERATOR]
|
52
|
+
}
|
53
|
+
].freeze
|
55
54
|
|
56
|
-
OPERATORS_SUPPORTING_MULTIPLES = [
|
55
|
+
OPERATORS_SUPPORTING_MULTIPLES = %w[Eq Ne].freeze
|
57
56
|
|
58
57
|
# To be implemented by child class.
|
59
58
|
# Shall return a valid query string for the respective database,
|
60
59
|
# or nil if the source could not be processed. It may be possible to return a valid
|
61
60
|
# SQL string AND have errors ( as checked by errors? ), but this will be left
|
62
61
|
# to the discretion of the child class.
|
63
|
-
def compile(
|
64
|
-
|
62
|
+
def compile(source, mapper)
|
63
|
+
raise NotImplementedError
|
65
64
|
end
|
66
65
|
|
67
66
|
# Returns a list of expressions tokenized in the following format:
|
68
67
|
# [{ :field => IdentifierName, :operator => "Eq", :value => "'Fargo'", :type => :character, :conjunction => "And" }]
|
69
68
|
# This step will set errors if source is not syntactically correct.
|
70
|
-
def tokenize(
|
69
|
+
def tokenize(source)
|
71
70
|
raise ArgumentError, "You must supply a source string to tokenize!" unless source.is_a?(String)
|
72
71
|
|
73
72
|
# Reset the parser error stack
|
74
73
|
@errors = []
|
75
74
|
|
76
|
-
|
77
|
-
expressions
|
75
|
+
self.parse(source)
|
78
76
|
end
|
79
77
|
|
80
78
|
# Returns an array of errors. This is an array of ParserError objects
|
@@ -93,32 +91,36 @@ module Sparkql::ParserCompatibility
|
|
93
91
|
def errors?
|
94
92
|
process_errors.errors?
|
95
93
|
end
|
94
|
+
|
96
95
|
def fatal_errors?
|
97
96
|
process_errors.fatal_errors?
|
98
97
|
end
|
98
|
+
|
99
99
|
def dropped_errors?
|
100
100
|
process_errors.dropped_errors?
|
101
101
|
end
|
102
|
+
|
102
103
|
def recovered_errors?
|
103
104
|
process_errors.recovered_errors?
|
104
105
|
end
|
105
106
|
|
106
|
-
def escape_value_list(
|
107
|
+
def escape_value_list(expression)
|
107
108
|
final_list = []
|
108
|
-
expression[:value].each do |
|
109
|
+
expression[:value].each do |value|
|
109
110
|
new_exp = {
|
110
|
-
:
|
111
|
-
:
|
111
|
+
value: value,
|
112
|
+
type: expression[:type]
|
112
113
|
}
|
113
114
|
final_list << escape_value(new_exp)
|
114
115
|
end
|
115
116
|
expression[:value] = final_list
|
116
117
|
end
|
117
118
|
|
118
|
-
def escape_value(
|
119
|
+
def escape_value(expression)
|
119
120
|
if expression[:value].is_a? Array
|
120
|
-
return escape_value_list(
|
121
|
+
return escape_value_list(expression)
|
121
122
|
end
|
123
|
+
|
122
124
|
case expression[:type]
|
123
125
|
when :character
|
124
126
|
return character_escape(expression[:value])
|
@@ -142,15 +144,15 @@ module Sparkql::ParserCompatibility
|
|
142
144
|
|
143
145
|
# processes escape characters for a given string. May be overridden by
|
144
146
|
# child classes.
|
145
|
-
def character_escape(
|
146
|
-
string.gsub(
|
147
|
+
def character_escape(string)
|
148
|
+
string.gsub(/^'/, '').gsub(/'$/, '').gsub(/\\'/, "'")
|
147
149
|
end
|
148
150
|
|
149
|
-
def integer_escape(
|
151
|
+
def integer_escape(string)
|
150
152
|
string.to_i
|
151
153
|
end
|
152
154
|
|
153
|
-
def decimal_escape(
|
155
|
+
def decimal_escape(string)
|
154
156
|
string.to_f
|
155
157
|
end
|
156
158
|
|
@@ -158,20 +160,29 @@ module Sparkql::ParserCompatibility
|
|
158
160
|
Date.parse(string)
|
159
161
|
end
|
160
162
|
|
163
|
+
# datetime may have timezone info. Given that, we should honor it it when
|
164
|
+
# present or setting an appropriate default when not. Either way, we should
|
165
|
+
# convert to local appropriate for the parser when we're done.
|
166
|
+
#
|
167
|
+
# DateTime in ruby is deprecated as of ruby 3.0. We've switched to the Time
|
168
|
+
# class to be future compatible. The :time type in sparkql != a ruby Time
|
169
|
+
# instance
|
161
170
|
def datetime_escape(string)
|
162
|
-
|
171
|
+
Time.parse(string)
|
163
172
|
end
|
164
173
|
|
174
|
+
# Per the lexer, times don't have any timezone info. When parsing, pick the
|
175
|
+
# proper offset to set things at.
|
165
176
|
def time_escape(string)
|
166
|
-
|
177
|
+
Time.parse("#{string}#{offset}")
|
167
178
|
end
|
168
179
|
|
169
180
|
def boolean_escape(string)
|
170
|
-
"true"
|
181
|
+
string == "true"
|
171
182
|
end
|
172
183
|
|
173
184
|
# Returns the rule hash for a given type
|
174
|
-
def rules_for_type(
|
185
|
+
def rules_for_type(type)
|
175
186
|
FILTER_VALUES.each do |rule|
|
176
187
|
return rule if rule[:type] == type
|
177
188
|
end
|
@@ -179,8 +190,8 @@ module Sparkql::ParserCompatibility
|
|
179
190
|
end
|
180
191
|
|
181
192
|
# true if a given type supports multiple values
|
182
|
-
def supports_multiple?(
|
183
|
-
rules_for_type(type).include?(
|
193
|
+
def supports_multiple?(type)
|
194
|
+
rules_for_type(type).include?(:multiple)
|
184
195
|
end
|
185
196
|
|
186
197
|
# Maximum supported nesting level for the parser filters
|
@@ -202,21 +213,20 @@ module Sparkql::ParserCompatibility
|
|
202
213
|
|
203
214
|
private
|
204
215
|
|
205
|
-
def tokenizer_error(
|
206
|
-
|
216
|
+
def tokenizer_error(error_hash)
|
207
217
|
if @lexer
|
208
218
|
error_hash[:token_index] = @lexer.token_index
|
209
219
|
end
|
210
220
|
|
211
|
-
self.errors << Sparkql::ParserError.new(
|
221
|
+
self.errors << Sparkql::ParserError.new(error_hash)
|
212
222
|
end
|
213
|
-
alias
|
223
|
+
alias compile_error tokenizer_error
|
214
224
|
|
215
225
|
# Checks the type of an expression with what is expected.
|
216
226
|
def check_type!(expression, expected, supports_nulls = true)
|
217
227
|
if (expected == expression[:type] && !expression.key?(:field_manipulations)) ||
|
218
|
-
|
219
|
-
|
228
|
+
(expression.key?(:field_manipulations) && check_function_type?(expression, expected)) ||
|
229
|
+
(supports_nulls && expression[:type] == :null)
|
220
230
|
return true
|
221
231
|
# If the field will be passed into a function,
|
222
232
|
# check the type of the return value of the function
|
@@ -235,7 +245,7 @@ module Sparkql::ParserCompatibility
|
|
235
245
|
expression[:type] = :date
|
236
246
|
expression[:cast] = :datetime
|
237
247
|
if multiple_values?(expression[:value])
|
238
|
-
expression[:value].map!{ |val| coerce_datetime val }
|
248
|
+
expression[:value].map! { |val| coerce_datetime val }
|
239
249
|
else
|
240
250
|
expression[:value] = coerce_datetime expression[:value]
|
241
251
|
end
|
@@ -245,14 +255,15 @@ module Sparkql::ParserCompatibility
|
|
245
255
|
expression[:cast] = :integer
|
246
256
|
return true
|
247
257
|
end
|
258
|
+
|
248
259
|
type_error(expression, expected)
|
249
260
|
false
|
250
261
|
end
|
251
262
|
|
252
|
-
def type_error(
|
253
|
-
|
254
|
-
|
255
|
-
|
263
|
+
def type_error(expression, expected)
|
264
|
+
compile_error(token: expression[:field], expression: expression,
|
265
|
+
message: "expected #{expected} but found #{expression[:type]}",
|
266
|
+
status: :fatal)
|
256
267
|
end
|
257
268
|
|
258
269
|
# If a function is being applied to a field, we check that the return type of
|
@@ -263,15 +274,17 @@ module Sparkql::ParserCompatibility
|
|
263
274
|
end
|
264
275
|
|
265
276
|
def validate_manipulation_types(field_manipulations, expected)
|
266
|
-
|
267
|
-
|
268
|
-
return false
|
277
|
+
case field_manipulations[:type]
|
278
|
+
when :function
|
279
|
+
return false unless supported_function?(field_manipulations[:function_name])
|
280
|
+
|
281
|
+
function = lookup_function(field_manipulations[:function_name])
|
269
282
|
field_manipulations[:args].each_with_index do |arg, index|
|
270
|
-
if arg[:type] == :field
|
271
|
-
return false
|
283
|
+
if arg[:type] == :field && !function[:args][index].include?(:field)
|
284
|
+
return false
|
272
285
|
end
|
273
286
|
end
|
274
|
-
|
287
|
+
when :arithmetic
|
275
288
|
lhs = field_manipulations[:lhs]
|
276
289
|
return false unless validate_side(lhs, expected)
|
277
290
|
|
@@ -285,31 +298,34 @@ module Sparkql::ParserCompatibility
|
|
285
298
|
if side[:type] == :arithmetic
|
286
299
|
return validate_manipulation_types(side, expected)
|
287
300
|
elsif side[:type] == :field
|
288
|
-
return false unless [
|
301
|
+
return false unless %i[decimal integer].include?(expected)
|
289
302
|
elsif side[:type] == :function
|
290
|
-
return false unless [
|
291
|
-
elsif
|
303
|
+
return false unless %i[decimal integer].include?(side[:return_type])
|
304
|
+
elsif !%i[decimal integer].include?(side[:type])
|
292
305
|
return false
|
293
306
|
end
|
307
|
+
|
294
308
|
true
|
295
309
|
end
|
296
310
|
|
297
311
|
# Builds the correct operator based on the type and the value.
|
298
312
|
# default should be the operator provided in the actual filter string
|
299
|
-
def get_operator(expression, default
|
313
|
+
def get_operator(expression, default)
|
300
314
|
f = rules_for_type(expression[:type])
|
301
315
|
if f[:operators].include?(default)
|
302
316
|
if f[:multiple] && range?(expression[:value]) && default == 'Bt'
|
303
317
|
return "Bt"
|
304
318
|
elsif f[:multiple] && multiple_values?(expression[:value])
|
305
319
|
return nil unless operator_supports_multiples?(default)
|
320
|
+
|
306
321
|
return default == "Ne" ? "Not In" : "In"
|
307
322
|
elsif default == "Ne"
|
308
323
|
return "Not Eq"
|
309
324
|
end
|
310
|
-
|
325
|
+
|
326
|
+
default
|
311
327
|
else
|
312
|
-
|
328
|
+
nil
|
313
329
|
end
|
314
330
|
end
|
315
331
|
|
@@ -325,12 +341,17 @@ module Sparkql::ParserCompatibility
|
|
325
341
|
OPERATORS_SUPPORTING_MULTIPLES.include?(operator)
|
326
342
|
end
|
327
343
|
|
328
|
-
|
329
|
-
|
330
|
-
|
344
|
+
# Datetime coercion to date factors in the current time zone when selecting a
|
345
|
+
# date.
|
346
|
+
def coerce_datetime(datetime_string)
|
347
|
+
case datetime_string
|
348
|
+
when /^(\d{4}-\d{2}-\d{2})$/
|
349
|
+
datetime_string
|
350
|
+
when /^(\d{4}-\d{2}-\d{2})/
|
351
|
+
datetime = datetime_escape(datetime_string)
|
352
|
+
datetime.strftime(Sparkql::FunctionResolver::STRFTIME_DATE_FORMAT)
|
331
353
|
else
|
332
|
-
|
354
|
+
datetime_string
|
333
355
|
end
|
334
356
|
end
|
335
|
-
|
336
357
|
end
|