janeway-jsonpath 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -2
- data/bin/janeway +1 -2
- data/lib/janeway/ast/array_slice_selector.rb +70 -18
- data/lib/janeway/ast/binary_operator.rb +3 -0
- data/lib/janeway/ast/child_segment.rb +4 -6
- data/lib/janeway/ast/current_node.rb +2 -6
- data/lib/janeway/ast/descendant_segment.rb +3 -8
- data/lib/janeway/ast/filter_selector.rb +9 -3
- data/lib/janeway/ast/function.rb +21 -1
- data/lib/janeway/ast/identifier.rb +1 -1
- data/lib/janeway/ast/index_selector.rb +6 -7
- data/lib/janeway/ast/name_selector.rb +12 -10
- data/lib/janeway/ast/number.rb +1 -1
- data/lib/janeway/ast/query.rb +22 -6
- data/lib/janeway/ast/root_node.rb +2 -6
- data/lib/janeway/ast/selector.rb +4 -1
- data/lib/janeway/ast/wildcard_selector.rb +8 -12
- data/lib/janeway/error.rb +7 -1
- data/lib/janeway/functions/count.rb +8 -3
- data/lib/janeway/functions/length.rb +8 -2
- data/lib/janeway/functions/match.rb +4 -2
- data/lib/janeway/functions/search.rb +4 -2
- data/lib/janeway/functions/value.rb +3 -1
- data/lib/janeway/functions.rb +4 -3
- data/lib/janeway/interpreter.rb +102 -110
- data/lib/janeway/lexer.rb +35 -24
- data/lib/janeway/parser.rb +107 -96
- data/lib/janeway/version.rb +1 -1
- data/lib/janeway.rb +5 -5
- metadata +2 -2
@@ -33,7 +33,9 @@ module Janeway
|
|
33
33
|
|
34
34
|
# Read parameter
|
35
35
|
parameters = [parse_function_parameter]
|
36
|
-
|
36
|
+
unless current.type == :group_end
|
37
|
+
raise Error, 'Too many parameters for value() function call'
|
38
|
+
end
|
37
39
|
|
38
40
|
AST::Function.new('value', parameters) do |nodes|
|
39
41
|
if nodes.is_a?(Array) && nodes.size == 1
|
data/lib/janeway/functions.rb
CHANGED
@@ -17,12 +17,12 @@ module Janeway
|
|
17
17
|
in_char_class = false
|
18
18
|
indexes = []
|
19
19
|
chars.each_with_index do |char, i|
|
20
|
-
# FIXME: does not handle escaped '[', ']', or '.'
|
21
20
|
case char
|
22
21
|
when '[' then in_char_class = true
|
23
|
-
when ']'
|
22
|
+
when ']'
|
23
|
+
in_char_class = false unless chars[i - 1] == '\\' # escaped ] does not close char class
|
24
24
|
when '.'
|
25
|
-
next if in_char_class || chars[i-1] == '\\' # escaped dot
|
25
|
+
next if in_char_class || chars[i - 1] == '\\' # escaped dot
|
26
26
|
|
27
27
|
indexes << i # replace this dot
|
28
28
|
end
|
@@ -45,6 +45,7 @@ module Janeway
|
|
45
45
|
when :string then parse_string
|
46
46
|
when :current_node then parse_current_node
|
47
47
|
when :root then parse_root
|
48
|
+
when :group_end then raise Error, 'Function call is missing parameter'
|
48
49
|
else
|
49
50
|
# Invalid, no function uses this.
|
50
51
|
# Instead of crashing here, accept it and let the function return an empty result.
|
data/lib/janeway/interpreter.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative 'parser'
|
4
|
+
|
3
5
|
module Janeway
|
4
|
-
# Tree-walk interpreter to apply the operations from the abstract syntax tree to the input
|
6
|
+
# Tree-walk interpreter to apply the operations from the abstract syntax tree to the input
|
5
7
|
class Interpreter
|
6
|
-
attr_reader :
|
8
|
+
attr_reader :jsonpath, :output, :env, :call_stack
|
7
9
|
|
8
10
|
class Error < Janeway::Error; end
|
9
11
|
|
@@ -23,29 +25,29 @@ module Janeway
|
|
23
25
|
# @param input [Hash, Array]
|
24
26
|
# @param query [String]
|
25
27
|
def self.interpret(input, query)
|
26
|
-
raise ArgumentError, "expect query string, got #{query.inspect}" unless query.is_a?(String)
|
27
|
-
|
28
28
|
tokens = Lexer.lex(query)
|
29
|
-
ast = Parser.new(tokens).parse
|
30
|
-
new(
|
29
|
+
ast = Parser.new(tokens, query).parse
|
30
|
+
new(ast).interpret(input)
|
31
31
|
end
|
32
32
|
|
33
|
-
# @param
|
34
|
-
def initialize(
|
35
|
-
|
33
|
+
# @param query [AST::Query] abstract syntax tree of the jsonpath query
|
34
|
+
def initialize(query)
|
35
|
+
raise ArgumentError, "expect AST::Query, got #{query.inspect}" unless query.is_a?(AST::Query)
|
36
|
+
|
37
|
+
@query = query
|
38
|
+
@jsonpath = query.jsonpath
|
39
|
+
@input = nil
|
36
40
|
end
|
37
41
|
|
38
|
-
# @param
|
42
|
+
# @param input [Array, Hash] object to be searched
|
39
43
|
# @return [Object]
|
40
|
-
def interpret(
|
41
|
-
@
|
42
|
-
raise "expect AST, got #{ast.inspect}" unless ast.is_a?(AST::Query)
|
43
|
-
|
44
|
+
def interpret(input)
|
45
|
+
@input = input
|
44
46
|
unless @input.is_a?(Hash) || @input.is_a?(Array)
|
45
47
|
return [] # can't query on any other types
|
46
48
|
end
|
47
49
|
|
48
|
-
interpret_node(
|
50
|
+
interpret_node(@query.root, nil)
|
49
51
|
end
|
50
52
|
|
51
53
|
private
|
@@ -62,26 +64,10 @@ module Janeway
|
|
62
64
|
when AST::Selector then interpret_selector(node.value, @input)
|
63
65
|
when nil then [@input]
|
64
66
|
else
|
65
|
-
raise "don't know how to interpret #{node.value.class}"
|
67
|
+
raise err("don't know how to interpret #{node.value.class}")
|
66
68
|
end
|
67
69
|
end
|
68
70
|
|
69
|
-
# Prepare a single node from an input node list to be sent to a selector.
|
70
|
-
# (Selectors require a node list as input)
|
71
|
-
# Helper method for interpret_child_segment.
|
72
|
-
#
|
73
|
-
# @param node [Object]
|
74
|
-
def as_node_list(node)
|
75
|
-
# FIXME: method still used? Can this be delted?
|
76
|
-
result =
|
77
|
-
case node
|
78
|
-
when Array then node
|
79
|
-
when Hash then node
|
80
|
-
else [node]
|
81
|
-
end
|
82
|
-
result
|
83
|
-
end
|
84
|
-
|
85
71
|
# Interpret a list of 1 or more selectors, seperated by the union operator.
|
86
72
|
#
|
87
73
|
# @param child_segment [AST::ChildSegment]
|
@@ -105,9 +91,8 @@ module Janeway
|
|
105
91
|
end
|
106
92
|
end
|
107
93
|
|
108
|
-
|
109
94
|
# Send result to the next node in the AST, if any
|
110
|
-
child = child_segment.
|
95
|
+
child = child_segment.next
|
111
96
|
unless child
|
112
97
|
return child_segment.size == 1 ? [results] : results
|
113
98
|
end
|
@@ -129,12 +114,11 @@ module Janeway
|
|
129
114
|
|
130
115
|
# early exit, no point continuing the chain with no results
|
131
116
|
|
132
|
-
return [result] unless selector.
|
117
|
+
return [result] unless selector.next
|
133
118
|
|
134
119
|
# Interpret child using output of this name selector, and return result
|
135
|
-
child = selector.
|
136
|
-
|
137
|
-
results
|
120
|
+
child = selector.next
|
121
|
+
send(:"interpret_#{child.type}", child, result)
|
138
122
|
end
|
139
123
|
|
140
124
|
# Filter the input by returning the array element with the given index.
|
@@ -152,11 +136,10 @@ module Janeway
|
|
152
136
|
result = input.fetch(selector.value) # raises IndexError if no such index
|
153
137
|
|
154
138
|
# Interpret child using output of this name selector, and return result
|
155
|
-
child = selector.
|
139
|
+
child = selector.next
|
156
140
|
return [result] unless child
|
157
141
|
|
158
|
-
|
159
|
-
results
|
142
|
+
send(:"interpret_#{child.type}", child, result)
|
160
143
|
rescue IndexError
|
161
144
|
[] # returns empty array if no such index
|
162
145
|
end
|
@@ -178,15 +161,13 @@ module Janeway
|
|
178
161
|
end
|
179
162
|
|
180
163
|
return values if values.empty? # early exit, no need for further processing on empty list
|
181
|
-
return values unless selector.
|
164
|
+
return values unless selector.next
|
182
165
|
|
183
|
-
# Apply
|
184
|
-
|
185
|
-
child = selector.child
|
166
|
+
# Apply child selector to each node in the output node list
|
167
|
+
child = selector.next
|
186
168
|
results = []
|
187
169
|
values.each do |value|
|
188
|
-
|
189
|
-
results << result.first unless result.empty?
|
170
|
+
results.concat send(:"interpret_#{child.type}", child, value)
|
190
171
|
end
|
191
172
|
results
|
192
173
|
end
|
@@ -199,23 +180,31 @@ module Janeway
|
|
199
180
|
# @return [Array]
|
200
181
|
def interpret_array_slice_selector(selector, input)
|
201
182
|
return [] unless input.is_a?(Array)
|
202
|
-
return [] if selector
|
203
|
-
|
204
|
-
# Calculate the "real" start and end index based on the array size
|
205
|
-
start_index = selector.start_index(input.size)
|
206
|
-
last_index = selector.end_index(input.size)
|
183
|
+
return [] if selector&.step&.zero? # IETF: When step is 0, no elements are selected.
|
207
184
|
|
185
|
+
# Calculate the upper and lower indices of the target range
|
186
|
+
lower = selector.lower_index(input.size)
|
187
|
+
upper = selector.upper_index(input.size)
|
208
188
|
|
209
|
-
# Collect values from target indices.
|
210
|
-
results =
|
211
|
-
|
212
|
-
|
189
|
+
# Collect values from target indices. Omit the value from the final index.
|
190
|
+
results =
|
191
|
+
if selector.step.positive?
|
192
|
+
lower.step(to: upper - 1, by: selector.step).map { input[_1] }
|
193
|
+
else
|
194
|
+
upper.step(to: lower + 1, by: selector.step).map { input[_1] }
|
195
|
+
end
|
213
196
|
|
214
197
|
# Interpret child using output of this name selector, and return result
|
215
|
-
child = selector.
|
198
|
+
child = selector.next
|
216
199
|
return results unless child
|
217
200
|
|
218
|
-
|
201
|
+
# Apply child selector to each node in the output node list
|
202
|
+
node_list = results
|
203
|
+
results = []
|
204
|
+
node_list.each do |node|
|
205
|
+
results.concat send(:"interpret_#{child.type}", child, node)
|
206
|
+
end
|
207
|
+
results
|
219
208
|
end
|
220
209
|
|
221
210
|
# Return the set of values from the input for which the filter is true.
|
@@ -231,12 +220,11 @@ module Janeway
|
|
231
220
|
case input
|
232
221
|
when Array then input
|
233
222
|
when Hash then input.values
|
234
|
-
else return []
|
223
|
+
else return [] # early exit
|
235
224
|
end
|
236
225
|
|
237
226
|
results = []
|
238
227
|
values.each do |value|
|
239
|
-
|
240
228
|
# Run filter and interpret result
|
241
229
|
result = interpret_node(selector.value, value)
|
242
230
|
|
@@ -249,11 +237,16 @@ module Janeway
|
|
249
237
|
end
|
250
238
|
end
|
251
239
|
|
252
|
-
|
240
|
+
child = selector.next
|
241
|
+
return results unless child
|
253
242
|
|
254
|
-
#
|
255
|
-
|
256
|
-
|
243
|
+
# Apply child selector to each node in the output node list
|
244
|
+
node_list = results
|
245
|
+
results = []
|
246
|
+
node_list.each do |node|
|
247
|
+
results.concat send(:"interpret_#{child.type}", child, node)
|
248
|
+
end
|
249
|
+
results
|
257
250
|
end
|
258
251
|
|
259
252
|
# Combine results from selectors into a single list.
|
@@ -277,12 +270,7 @@ module Janeway
|
|
277
270
|
# @param input [Object]
|
278
271
|
# @return [Array<AST::Expression>] node list
|
279
272
|
def interpret_descendant_segment(descendant_segment, input)
|
280
|
-
|
281
|
-
|
282
|
-
return results unless descendant_segment.child
|
283
|
-
|
284
|
-
child = descendant_segment.child
|
285
|
-
send(:"interpret_#{child.type}", child, results)
|
273
|
+
visit(input) { |node| interpret_node(descendant_segment.next, node) }
|
286
274
|
end
|
287
275
|
|
288
276
|
# Visit all descendants of `root`.
|
@@ -329,14 +317,13 @@ module Janeway
|
|
329
317
|
# @param node [AST::Expression]
|
330
318
|
# @param input [Object]
|
331
319
|
def interpret_node_as_value(node, input)
|
332
|
-
|
333
320
|
# nodes must be singular queries or literals
|
334
321
|
case node
|
335
322
|
when AST::CurrentNode, AST::RootNode
|
336
|
-
raise
|
323
|
+
raise err("Expression #{node} does not produce a singular value for comparison") unless node.singular_query?
|
337
324
|
when AST::Number, AST::StringType, AST::Null, AST::Function, AST::Boolean then nil
|
338
325
|
else
|
339
|
-
raise "Invalid expression for comparison: #{node}"
|
326
|
+
raise err("Invalid expression for comparison: #{node}")
|
340
327
|
end
|
341
328
|
|
342
329
|
result = interpret_node(node, input)
|
@@ -352,7 +339,7 @@ module Janeway
|
|
352
339
|
return result if result.empty?
|
353
340
|
|
354
341
|
# Return the only node in the node list
|
355
|
-
raise 'node list contains multiple elements but this is a comparison' unless result.size == 1
|
342
|
+
raise err('node list contains multiple elements but this is a comparison') unless result.size == 1
|
356
343
|
|
357
344
|
result.first
|
358
345
|
end
|
@@ -366,7 +353,7 @@ module Janeway
|
|
366
353
|
|
367
354
|
return node_list.first if node_list.size == 1
|
368
355
|
|
369
|
-
raise "don't know how to handle node list with size > 1: #{node_list.inspect}"
|
356
|
+
raise err("don't know how to handle node list with size > 1: #{node_list.inspect}")
|
370
357
|
end
|
371
358
|
|
372
359
|
# Evaluate a selector and return the result
|
@@ -379,7 +366,7 @@ module Janeway
|
|
379
366
|
when AST::ArraySliceSelector then interpret_array_slice_selector(selector, input)
|
380
367
|
when AST::FilterSelector then interpret_filter_selector(selector, input)
|
381
368
|
else
|
382
|
-
raise "Not a selector: #{selector.inspect}"
|
369
|
+
raise err("Not a selector: #{selector.inspect}")
|
383
370
|
end
|
384
371
|
end
|
385
372
|
|
@@ -387,7 +374,8 @@ module Janeway
|
|
387
374
|
#
|
388
375
|
# The result is an Array containing all results of evaluating the CurrentNode's selector (if any.)
|
389
376
|
#
|
390
|
-
# If the selector extracted values from nodes such as strings, numbers or nil/null,
|
377
|
+
# If the selector extracted values from nodes such as strings, numbers or nil/null,
|
378
|
+
# these will be included in the array.
|
391
379
|
# If the selector did not match any node, the array may be empty.
|
392
380
|
# If there was no selector, then the current input node is returned in the array.
|
393
381
|
#
|
@@ -396,21 +384,19 @@ module Janeway
|
|
396
384
|
# @return [Array] Node List containing all results from evaluating this node's selectors.
|
397
385
|
def interpret_current_node(current_node, input)
|
398
386
|
next_expr = current_node.value
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
end
|
413
|
-
result
|
387
|
+
# All of these return a node list
|
388
|
+
case next_expr
|
389
|
+
when AST::NameSelector then interpret_name_selector(next_expr, input)
|
390
|
+
when AST::WildcardSelector then interpret_wildcard_selector(next_expr, input)
|
391
|
+
when AST::IndexSelector then interpret_index_selector(next_expr, input)
|
392
|
+
when AST::ArraySliceSelector then interpret_array_slice_selector(next_expr, input)
|
393
|
+
when AST::FilterSelector then interpret_filter_selector(next_expr, input)
|
394
|
+
when AST::ChildSegment then interpret_child_segment(next_expr, input)
|
395
|
+
when AST::DescendantSegment then interpret_descendant_segment(next_expr, input)
|
396
|
+
when NilClass then input
|
397
|
+
else
|
398
|
+
raise err("don't know how to interpret @#{next_expr}")
|
399
|
+
end
|
414
400
|
end
|
415
401
|
|
416
402
|
def interpret_identifier(identifier, _input)
|
@@ -422,7 +408,7 @@ module Janeway
|
|
422
408
|
call_stack.last.env[identifier.name]
|
423
409
|
else
|
424
410
|
# Undefined variable.
|
425
|
-
raise
|
411
|
+
raise err("Undefined identifier: #{identifier.name}")
|
426
412
|
end
|
427
413
|
end
|
428
414
|
|
@@ -446,13 +432,12 @@ module Janeway
|
|
446
432
|
lhs = interpret_node_as_value(binary_op.left, input)
|
447
433
|
rhs = interpret_node_as_value(binary_op.right, input)
|
448
434
|
else
|
449
|
-
raise "
|
435
|
+
raise err("Don't know how to handle binary operator #{binary_op.inspect}")
|
450
436
|
end
|
451
437
|
send(:"interpret_#{binary_op.operator}", lhs, rhs)
|
452
438
|
end
|
453
439
|
|
454
440
|
def interpret_equal(lhs, rhs)
|
455
|
-
|
456
441
|
# When either side of a comparison results in an empty nodelist or the
|
457
442
|
# special result Nothing (see Section 2.4.1):
|
458
443
|
# A comparison using the operator == yields true if and only if the other
|
@@ -541,14 +526,14 @@ module Janeway
|
|
541
526
|
nil
|
542
527
|
end
|
543
528
|
|
544
|
-
#
|
545
|
-
#
|
529
|
+
# @param op [AST::UnaryOperator]
|
530
|
+
# @param input [Object]
|
546
531
|
def interpret_unary_operator(op, input)
|
532
|
+
# The only other unary operator, "-", is consumed in the parsing stage and never in the AST
|
547
533
|
node_list = send(:"interpret_#{op.operand.type}", op.operand, input)
|
548
534
|
case op.operator
|
549
535
|
when :not then interpret_not(node_list)
|
550
|
-
|
551
|
-
else raise "unknown unary operator #{op.inspect}"
|
536
|
+
else raise err("unknown unary operator #{op.inspect}")
|
552
537
|
end
|
553
538
|
end
|
554
539
|
|
@@ -557,28 +542,25 @@ module Janeway
|
|
557
542
|
# For a boolean, this inverts the meaning of the input.
|
558
543
|
# @return [Boolean]
|
559
544
|
def interpret_not(input)
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
end
|
567
|
-
result
|
545
|
+
case input
|
546
|
+
when Array then input.empty?
|
547
|
+
when TrueClass, FalseClass then !input
|
548
|
+
else
|
549
|
+
raise err("don't know how to apply not operator to #{input.inspect}")
|
550
|
+
end
|
568
551
|
end
|
569
552
|
|
570
553
|
# @param function [AST::Function]
|
571
554
|
# @param input [Hash, Array]
|
572
555
|
def interpret_function(function, input)
|
573
556
|
params = evaluate_function_parameters(function.parameters, function.name, input)
|
574
|
-
|
575
|
-
result
|
557
|
+
function.body.call(*params)
|
576
558
|
end
|
577
559
|
|
578
560
|
# Evaluate the expressions in the parameter list to make the parameter values
|
579
561
|
# to pass in to a JsonPath function.
|
580
562
|
#
|
581
|
-
# The node lists returned by a
|
563
|
+
# The node lists returned by a singular query must be deconstructed into a single value for
|
582
564
|
# parameters of ValueType, this is done here.
|
583
565
|
# For explanation:
|
584
566
|
# @see https://www.rfc-editor.org/rfc/rfc9535.html#name-well-typedness-of-function-
|
@@ -602,6 +584,8 @@ module Janeway
|
|
602
584
|
else
|
603
585
|
result
|
604
586
|
end
|
587
|
+
when AST::Function
|
588
|
+
interpret_function(parameter, input)
|
605
589
|
when AST::StringType, AST::Number
|
606
590
|
interpret_string_type(parameter, input)
|
607
591
|
else
|
@@ -640,5 +624,13 @@ module Janeway
|
|
640
624
|
input # input is a single node, which happens to be an Array
|
641
625
|
end
|
642
626
|
end
|
627
|
+
|
628
|
+
# Return an Interpreter::Error with the specified message, include the query.
|
629
|
+
#
|
630
|
+
# @param msg [String] error message
|
631
|
+
# @return [Parser::Error]
|
632
|
+
def err(msg)
|
633
|
+
Error.new(msg, @jsonpath)
|
634
|
+
end
|
643
635
|
end
|
644
636
|
end
|
data/lib/janeway/lexer.rb
CHANGED
@@ -73,10 +73,10 @@ module Janeway
|
|
73
73
|
|
74
74
|
def start_tokenization
|
75
75
|
if WHITESPACE.include?(@source[0]) || WHITESPACE.include?(@source[-1])
|
76
|
-
raise
|
76
|
+
raise err('JSONPath query may not start or end with whitespace')
|
77
77
|
end
|
78
|
-
tokenize while source_uncompleted?
|
79
78
|
|
79
|
+
tokenize while source_uncompleted?
|
80
80
|
tokens << Token.new(:eof, '', nil, after_source_end_location)
|
81
81
|
end
|
82
82
|
|
@@ -99,13 +99,13 @@ module Janeway
|
|
99
99
|
elsif digit?(c)
|
100
100
|
lex_number
|
101
101
|
elsif name_first_char?(c)
|
102
|
-
lex_member_name_shorthand(ignore_keywords: tokens.last
|
102
|
+
lex_member_name_shorthand(ignore_keywords: tokens.last&.type == :dot)
|
103
103
|
end
|
104
104
|
|
105
105
|
if token
|
106
106
|
tokens << token
|
107
107
|
else
|
108
|
-
raise
|
108
|
+
raise err("Unknown character: #{c.inspect}")
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
@@ -126,7 +126,7 @@ module Janeway
|
|
126
126
|
|
127
127
|
def token_from_one_char_lex(lexeme)
|
128
128
|
if %w[. -].include?(lexeme) && WHITESPACE.include?(lookahead)
|
129
|
-
raise
|
129
|
+
raise err("Operator #{lexeme.inspect} must not be followed by whitespace")
|
130
130
|
end
|
131
131
|
|
132
132
|
Token.new(OPERATORS.key(lexeme), lexeme, nil, current_location)
|
@@ -139,7 +139,7 @@ module Janeway
|
|
139
139
|
if TWO_CHAR_LEX.include?(next_two_chars)
|
140
140
|
consume
|
141
141
|
if next_two_chars == '..' && WHITESPACE.include?(lookahead)
|
142
|
-
raise
|
142
|
+
raise err("Operator #{next_two_chars.inspect} must not be followed by whitespace")
|
143
143
|
end
|
144
144
|
Token.new(OPERATORS.key(next_two_chars), next_two_chars, nil, current_location)
|
145
145
|
else
|
@@ -151,7 +151,9 @@ module Janeway
|
|
151
151
|
# @return [Token]
|
152
152
|
def token_from_two_char_lex(lexeme)
|
153
153
|
next_two_chars = [lexeme, lookahead].join
|
154
|
-
|
154
|
+
unless TWO_CHAR_LEX.include?(next_two_chars)
|
155
|
+
raise err("Unknown operator \"#{lexeme}\"")
|
156
|
+
end
|
155
157
|
|
156
158
|
consume
|
157
159
|
Token.new(OPERATORS.key(next_two_chars), next_two_chars, nil, current_location)
|
@@ -183,7 +185,7 @@ module Janeway
|
|
183
185
|
consume # delimiter
|
184
186
|
elsif lookahead(2) == non_delimiter
|
185
187
|
qtype = delimiter == '"' ? 'double' : 'single'
|
186
|
-
raise
|
188
|
+
raise err("Character #{non_delimiter} must not be escaped within #{qtype} quotes")
|
187
189
|
else
|
188
190
|
consume_escape_sequence # consumes multiple chars
|
189
191
|
end
|
@@ -192,10 +194,10 @@ module Janeway
|
|
192
194
|
elsif %w[' "].include?(next_char) && next_char != delimiter
|
193
195
|
consume
|
194
196
|
else
|
195
|
-
raise
|
197
|
+
raise err("invalid character #{next_char.inspect}")
|
196
198
|
end
|
197
199
|
end
|
198
|
-
raise
|
200
|
+
raise err("Unterminated string error: #{literal_chars.join.inspect}") if source_completed?
|
199
201
|
|
200
202
|
consume # closing delimiter
|
201
203
|
|
@@ -211,7 +213,7 @@ module Janeway
|
|
211
213
|
# Read escape char literals, and transform them into the described character
|
212
214
|
# @return [String] single character (possibly multi-byte)
|
213
215
|
def consume_escape_sequence
|
214
|
-
raise '
|
216
|
+
raise err('Expect escape sequence') unless consume == '\\'
|
215
217
|
|
216
218
|
char = consume
|
217
219
|
case char
|
@@ -224,10 +226,10 @@ module Janeway
|
|
224
226
|
when 'u' then consume_unicode_escape_sequence
|
225
227
|
else
|
226
228
|
if unescaped?(char)
|
227
|
-
raise
|
229
|
+
raise err("Character #{char} must not be escaped")
|
228
230
|
else
|
229
231
|
# whatever this is, it is not allowed even when escaped
|
230
|
-
raise
|
232
|
+
raise err("Invalid character #{char.inspect}")
|
231
233
|
end
|
232
234
|
end
|
233
235
|
end
|
@@ -255,7 +257,7 @@ module Janeway
|
|
255
257
|
return hex_str.hex.chr('UTF-8') unless hex_str.upcase.start_with?('D')
|
256
258
|
|
257
259
|
# hex string starts with D, but is still non-surrogate
|
258
|
-
return hex_str.hex.
|
260
|
+
return [hex_str.hex].pack('U') if '01234567'.include?(hex_str[1])
|
259
261
|
|
260
262
|
# hex value is in the high-surrogate or low-surrogate range.
|
261
263
|
|
@@ -273,11 +275,11 @@ module Janeway
|
|
273
275
|
return convert_surrogate_pair_to_codepoint(hex_str, hex_str2) if prefix == '\\u' && low_surrogate?(hex_str2)
|
274
276
|
|
275
277
|
# Not allowed to have high surrogate that is not followed by low surrogate
|
276
|
-
raise "
|
278
|
+
raise err("Invalid unicode escape sequence: \\u#{hex_str2}")
|
277
279
|
|
278
280
|
end
|
279
281
|
# Not allowed to have low surrogate that is not preceded by high surrogate
|
280
|
-
raise "
|
282
|
+
raise err("Invalid unicode escape sequence: \\u#{hex_str}")
|
281
283
|
end
|
282
284
|
|
283
285
|
# Convert a valid UTF-16 surrogate pair into a UTF-8 string containing a single code point.
|
@@ -313,7 +315,7 @@ module Janeway
|
|
313
315
|
end
|
314
316
|
|
315
317
|
# Consume and return 4 hex digits from the source. Either upper or lower case is accepted.
|
316
|
-
# No
|
318
|
+
# No judgment is made here on whether the resulting sequence is valid,
|
317
319
|
# as long as it is 4 hex digits.
|
318
320
|
#
|
319
321
|
# @return [String]
|
@@ -326,10 +328,10 @@ module Janeway
|
|
326
328
|
when 0x40..0x46 then next # 'A'..'F'
|
327
329
|
when 0x61..0x66 then next # 'a'..'f'
|
328
330
|
else
|
329
|
-
raise "
|
331
|
+
raise err("Invalid unicode escape sequence: \\u#{hex_digits.join}")
|
330
332
|
end
|
331
333
|
end
|
332
|
-
raise "
|
334
|
+
raise err("Incomplete unicode escape sequence: \\u#{hex_digits.join}") if hex_digits.size < 4
|
333
335
|
|
334
336
|
hex_digits.join
|
335
337
|
end
|
@@ -355,14 +357,14 @@ module Janeway
|
|
355
357
|
end
|
356
358
|
unless digit?(lookahead)
|
357
359
|
lexeme = source[lexeme_start_p..(next_p - 1)]
|
358
|
-
raise
|
360
|
+
raise err("Exponent 'e' must be followed by number: #{lexeme.inspect}")
|
359
361
|
end
|
360
362
|
consume_digits
|
361
363
|
end
|
362
364
|
|
363
365
|
lexeme = source[lexeme_start_p..(next_p - 1)]
|
364
366
|
if lexeme.start_with?('0') && lexeme.size > 1
|
365
|
-
raise
|
367
|
+
raise err("Number may not start with leading zero: #{lexeme.inspect}")
|
366
368
|
end
|
367
369
|
|
368
370
|
literal =
|
@@ -375,7 +377,7 @@ module Janeway
|
|
375
377
|
end
|
376
378
|
|
377
379
|
# Consume an alphanumeric string.
|
378
|
-
# If `ignore_keywords`, the result is
|
380
|
+
# If `ignore_keywords`, the result is always an :identifier token.
|
379
381
|
# Otherwise, keywords and function names will be recognized and tokenized as those types.
|
380
382
|
#
|
381
383
|
# @param ignore_keywords [Boolean]
|
@@ -445,7 +447,7 @@ module Janeway
|
|
445
447
|
# True if character is suitable as the first character in a name selector
|
446
448
|
# using shorthand notation (ie. no bracket notation.)
|
447
449
|
#
|
448
|
-
# Defined in RFC9535 by
|
450
|
+
# Defined in RFC9535 by this ABNF grammar:
|
449
451
|
# name-first = ALPHA /
|
450
452
|
# "_" /
|
451
453
|
# %x80-D7FF /
|
@@ -490,8 +492,9 @@ module Janeway
|
|
490
492
|
:identifier
|
491
493
|
end
|
492
494
|
if type == :function && WHITESPACE.include?(lookahead)
|
493
|
-
raise
|
495
|
+
raise err("Function name \"#{identifier}\" must not be followed by whitespace")
|
494
496
|
end
|
497
|
+
|
495
498
|
Token.new(type, identifier, identifier, current_location)
|
496
499
|
end
|
497
500
|
|
@@ -510,5 +513,13 @@ module Janeway
|
|
510
513
|
def after_source_end_location
|
511
514
|
Location.new(next_p, 1)
|
512
515
|
end
|
516
|
+
|
517
|
+
# Return a Lexer::Error with the specified message, include the query and location
|
518
|
+
#
|
519
|
+
# @param msg [String] error message
|
520
|
+
# @return [Lexer::Error]
|
521
|
+
def err(msg)
|
522
|
+
Error.new(msg, @source, current_location)
|
523
|
+
end
|
513
524
|
end
|
514
525
|
end
|