json_p3 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.rubocop.yml +14 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +7 -0
- data/LICENCE +21 -0
- data/README.md +353 -0
- data/Rakefile +23 -0
- data/Steepfile +27 -0
- data/lib/json_p3/cache.rb +40 -0
- data/lib/json_p3/environment.rb +76 -0
- data/lib/json_p3/errors.rb +49 -0
- data/lib/json_p3/filter.rb +426 -0
- data/lib/json_p3/function.rb +16 -0
- data/lib/json_p3/function_extensions/count.rb +15 -0
- data/lib/json_p3/function_extensions/length.rb +17 -0
- data/lib/json_p3/function_extensions/match.rb +62 -0
- data/lib/json_p3/function_extensions/pattern.rb +39 -0
- data/lib/json_p3/function_extensions/search.rb +44 -0
- data/lib/json_p3/function_extensions/value.rb +15 -0
- data/lib/json_p3/lexer.rb +420 -0
- data/lib/json_p3/node.rb +42 -0
- data/lib/json_p3/parser.rb +553 -0
- data/lib/json_p3/path.rb +42 -0
- data/lib/json_p3/segment.rb +102 -0
- data/lib/json_p3/selector.rb +285 -0
- data/lib/json_p3/token.rb +74 -0
- data/lib/json_p3/unescape.rb +112 -0
- data/lib/json_p3/version.rb +5 -0
- data/lib/json_p3.rb +17 -0
- data/performance/benchmark.rb +33 -0
- data/performance/benchmark_ips.rb +29 -0
- data/performance/benchmark_small_citylots.rb +18 -0
- data/performance/memory_profile.rb +19 -0
- data/performance/memory_profile_small_citylots.rb +14 -0
- data/performance/profile.rb +30 -0
- data/sig/json_p3.rbs +1058 -0
- data.tar.gz.sig +1 -0
- metadata +110 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,420 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
require "strscan"
|
5
|
+
require_relative "errors"
|
6
|
+
require_relative "token"
|
7
|
+
|
8
|
+
module JSONP3 # rubocop:disable Style/Documentation
|
9
|
+
# Return an array of tokens for the JSONPath expression _query_.
|
10
|
+
#
|
11
|
+
# @param query [String] the JSONPath expression to tokenize.
|
12
|
+
# @return [Array<Token>]
|
13
|
+
def self.tokenize(query)
|
14
|
+
lexer = Lexer.new(query)
|
15
|
+
lexer.run
|
16
|
+
tokens = lexer.tokens
|
17
|
+
|
18
|
+
if !tokens.empty? && tokens.last.type == Token::ERROR
|
19
|
+
raise JSONPathSyntaxError.new(tokens.last.message || raise,
|
20
|
+
tokens.last)
|
21
|
+
end
|
22
|
+
|
23
|
+
tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
# JSONPath query expression lexical scanner.
|
27
|
+
#
|
28
|
+
# @see tokenize
|
29
|
+
class Lexer # rubocop:disable Metrics/ClassLength
|
30
|
+
RE_INT = /-?[0-9]+/
|
31
|
+
RE_NAME = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
|
32
|
+
RE_WHITESPACE = /[ \n\r\t]+/
|
33
|
+
S_ESCAPES = Set["b", "f", "n", "r", "t", "u", "/", "\\"].freeze
|
34
|
+
|
35
|
+
# @dynamic tokens
|
36
|
+
attr_reader :tokens
|
37
|
+
|
38
|
+
def initialize(query)
|
39
|
+
@filter_depth = 0
|
40
|
+
@paren_stack = []
|
41
|
+
@tokens = []
|
42
|
+
@start = 0
|
43
|
+
@query = query.freeze
|
44
|
+
@scanner = StringScanner.new(query)
|
45
|
+
end
|
46
|
+
|
47
|
+
def run
|
48
|
+
state = :lex_root
|
49
|
+
state = method(state).call until state.nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
|
54
|
+
# Generate a new token with the given type.
|
55
|
+
# @param token_type [Symbol] one of the constants defined on the _Token_ class.
|
56
|
+
# @param value [String | nil] a the token's value, if it is known, otherwise the
|
57
|
+
# value will be sliced from @query. This is a performance optimization.
|
58
|
+
def emit(token_type, value = nil)
|
59
|
+
@tokens << Token.new(token_type, value || @query[@start...@scanner.charpos], @start, @query)
|
60
|
+
@start = @scanner.charpos
|
61
|
+
end
|
62
|
+
|
63
|
+
def next
|
64
|
+
@scanner.getch || ""
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore
|
68
|
+
@start = @scanner.charpos
|
69
|
+
end
|
70
|
+
|
71
|
+
def backup
|
72
|
+
# Assumes we're backing-up from a single byte character.
|
73
|
+
@scanner.pos -= 1
|
74
|
+
end
|
75
|
+
|
76
|
+
def peek
|
77
|
+
# Assumes we're peeking single byte characters.
|
78
|
+
@scanner.peek(1)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Advance the lexer if the next character is equal to _char_.
|
82
|
+
def accept?(pattern)
|
83
|
+
!@scanner.scan(pattern).nil?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Accept a run of digits, possibly preceded by a negative sign.
|
87
|
+
# Does not handle exponents.
|
88
|
+
def accept_int?
|
89
|
+
!@scanner.scan(RE_INT).nil?
|
90
|
+
end
|
91
|
+
|
92
|
+
def ignore_whitespace?
|
93
|
+
if @scanner.scan(RE_WHITESPACE).nil?
|
94
|
+
false
|
95
|
+
else
|
96
|
+
ignore
|
97
|
+
true
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def error(message)
|
102
|
+
@tokens << Token.new(
|
103
|
+
Token::ERROR, @query[@start...@scanner.charpos] || "", @start, @query, message: message
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def lex_root
|
108
|
+
c = self.next
|
109
|
+
|
110
|
+
unless c == "$"
|
111
|
+
error "expected '$', found '#{c}'"
|
112
|
+
return nil
|
113
|
+
end
|
114
|
+
|
115
|
+
emit(Token::ROOT, "$")
|
116
|
+
:lex_segment
|
117
|
+
end
|
118
|
+
|
119
|
+
def lex_segment # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
120
|
+
if accept?(RE_WHITESPACE) && peek.empty?
|
121
|
+
error "unexpected trailing whitespace"
|
122
|
+
return nil
|
123
|
+
end
|
124
|
+
|
125
|
+
ignore
|
126
|
+
c = self.next
|
127
|
+
|
128
|
+
case c
|
129
|
+
when ""
|
130
|
+
emit(Token::EOI, "")
|
131
|
+
nil
|
132
|
+
when "."
|
133
|
+
return :lex_shorthand_selector unless peek == "."
|
134
|
+
|
135
|
+
self.next
|
136
|
+
emit(Token::DOUBLE_DOT, "..")
|
137
|
+
:lex_descendant_segment
|
138
|
+
when "["
|
139
|
+
emit(Token::LBRACKET, "[")
|
140
|
+
:lex_inside_bracketed_segment
|
141
|
+
else
|
142
|
+
if @filter_depth.positive?
|
143
|
+
backup
|
144
|
+
:lex_inside_filter
|
145
|
+
else
|
146
|
+
error "expected '.', '..' or a bracketed selection, found '#{c}'"
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def lex_descendant_segment # rubocop:disable Metrics/MethodLength
|
153
|
+
case self.next
|
154
|
+
when ""
|
155
|
+
error "bald descendant segment"
|
156
|
+
nil
|
157
|
+
when "*"
|
158
|
+
emit(Token::WILD, "*")
|
159
|
+
:lex_segment
|
160
|
+
when "["
|
161
|
+
emit(Token::LBRACKET, "[")
|
162
|
+
:lex_inside_bracketed_segment
|
163
|
+
else
|
164
|
+
backup
|
165
|
+
if accept?(RE_NAME)
|
166
|
+
emit(Token::NAME)
|
167
|
+
:lex_segment
|
168
|
+
else
|
169
|
+
c = self.next
|
170
|
+
error "unexpected descendant selection token '#{c}'"
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def lex_shorthand_selector # rubocop:disable Metrics/MethodLength
|
177
|
+
if peek == ""
|
178
|
+
error "unexpected trailing dot"
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
ignore # ignore dot
|
183
|
+
|
184
|
+
if accept?(RE_WHITESPACE)
|
185
|
+
error "unexpected whitespace after dot"
|
186
|
+
return nil
|
187
|
+
end
|
188
|
+
|
189
|
+
if peek == "*"
|
190
|
+
self.next
|
191
|
+
emit(Token::WILD, "*")
|
192
|
+
return :lex_segment
|
193
|
+
end
|
194
|
+
|
195
|
+
if accept?(RE_NAME)
|
196
|
+
emit(Token::NAME)
|
197
|
+
return :lex_segment
|
198
|
+
end
|
199
|
+
|
200
|
+
c = self.next
|
201
|
+
error "unexpected shorthand selector '#{c}'"
|
202
|
+
nil
|
203
|
+
end
|
204
|
+
|
205
|
+
def lex_inside_bracketed_segment # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
206
|
+
loop do # rubocop:disable Metrics/BlockLength
|
207
|
+
ignore_whitespace?
|
208
|
+
c = self.next
|
209
|
+
|
210
|
+
case c
|
211
|
+
when "]"
|
212
|
+
emit(Token::RBRACKET, "]")
|
213
|
+
return @filter_depth.positive? ? :lex_inside_filter : :lex_segment
|
214
|
+
when ""
|
215
|
+
error "unclosed bracketed selection"
|
216
|
+
return nil
|
217
|
+
when "*"
|
218
|
+
emit(Token::WILD, "*")
|
219
|
+
when "?"
|
220
|
+
emit(Token::FILTER, "?")
|
221
|
+
@filter_depth += 1
|
222
|
+
return :lex_inside_filter
|
223
|
+
when ","
|
224
|
+
emit(Token::COMMA, ",")
|
225
|
+
when ":"
|
226
|
+
emit(Token::COLON, ":")
|
227
|
+
when "'"
|
228
|
+
return :lex_single_quoted_string_inside_bracketed_segment
|
229
|
+
when '"'
|
230
|
+
return :lex_double_quoted_string_inside_bracketed_segment
|
231
|
+
else
|
232
|
+
backup
|
233
|
+
if accept_int?
|
234
|
+
# Index selector or part of a slice selector.
|
235
|
+
emit Token::INDEX
|
236
|
+
else
|
237
|
+
error "unexpected token '#{c}' in bracketed selection"
|
238
|
+
return nil
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def lex_inside_filter # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
245
|
+
loop do # rubocop:disable Metrics/BlockLength
|
246
|
+
ignore_whitespace?
|
247
|
+
c = self.next
|
248
|
+
|
249
|
+
case c
|
250
|
+
when ""
|
251
|
+
error "unclosed bracketed selection"
|
252
|
+
return nil
|
253
|
+
when "]"
|
254
|
+
@filter_depth -= 1
|
255
|
+
if @paren_stack.length == 1
|
256
|
+
error "unbalanced parentheses"
|
257
|
+
return nil
|
258
|
+
end
|
259
|
+
backup
|
260
|
+
return :lex_inside_bracketed_segment
|
261
|
+
when ","
|
262
|
+
emit(Token::COMMA, ",")
|
263
|
+
# If we have unbalanced parens, we are inside a function call and a
|
264
|
+
# comma separates arguments. Otherwise a comma separates selectors.
|
265
|
+
next if @paren_stack.length.positive?
|
266
|
+
|
267
|
+
@filter_depth -= 1
|
268
|
+
return :lex_inside_bracketed_segment
|
269
|
+
when "'"
|
270
|
+
return :lex_single_quoted_string_inside_filter_expression
|
271
|
+
when '"'
|
272
|
+
return :lex_double_quoted_string_inside_filter_expression
|
273
|
+
when "("
|
274
|
+
emit(Token::LPAREN, "(")
|
275
|
+
# Are we in a function call? If so, a function argument contains parens.
|
276
|
+
@paren_stack[-1] += 1 if @paren_stack.length.positive?
|
277
|
+
when ")"
|
278
|
+
emit(Token::RPAREN, ")")
|
279
|
+
# Are we closing a function call or a parenthesized expression?
|
280
|
+
if @paren_stack.length.positive?
|
281
|
+
if @paren_stack[-1] == 1
|
282
|
+
@paren_stack.pop
|
283
|
+
else
|
284
|
+
@paren_stack[-1] -= 1
|
285
|
+
end
|
286
|
+
end
|
287
|
+
when "$"
|
288
|
+
emit(Token::ROOT, "$")
|
289
|
+
return :lex_segment
|
290
|
+
when "@"
|
291
|
+
emit(Token::CURRENT, "@")
|
292
|
+
return :lex_segment
|
293
|
+
when "."
|
294
|
+
backup
|
295
|
+
return :lex_segment
|
296
|
+
when "!"
|
297
|
+
if peek == "="
|
298
|
+
self.next
|
299
|
+
emit(Token::NE, "!=")
|
300
|
+
else
|
301
|
+
emit(Token::NOT, "!")
|
302
|
+
end
|
303
|
+
when "="
|
304
|
+
if peek == "="
|
305
|
+
self.next
|
306
|
+
emit(Token::EQ, "==")
|
307
|
+
else
|
308
|
+
backup
|
309
|
+
error "unexpected filter selector token '#{c}'"
|
310
|
+
return nil
|
311
|
+
end
|
312
|
+
when "<"
|
313
|
+
if peek == "="
|
314
|
+
self.next
|
315
|
+
emit(Token::LE, "<=")
|
316
|
+
else
|
317
|
+
emit(Token::LT, "<")
|
318
|
+
end
|
319
|
+
when ">"
|
320
|
+
if peek == "="
|
321
|
+
self.next
|
322
|
+
emit(Token::GE, ">=")
|
323
|
+
else
|
324
|
+
emit(Token::GT, ">")
|
325
|
+
end
|
326
|
+
else
|
327
|
+
backup
|
328
|
+
if accept_int?
|
329
|
+
if peek == "."
|
330
|
+
# A float
|
331
|
+
self.next
|
332
|
+
unless accept_int? # rubocop:disable Metrics/BlockNesting
|
333
|
+
error "a fractional digit is required after a decimal point"
|
334
|
+
return nil
|
335
|
+
end
|
336
|
+
|
337
|
+
accept?(/[eE][+-]?[0-9]+/)
|
338
|
+
emit Token::FLOAT
|
339
|
+
# An int, or float if exponent is negative
|
340
|
+
elsif accept?(/[eE]-[0-9]+/)
|
341
|
+
emit Token::FLOAT
|
342
|
+
else
|
343
|
+
accept?(/[eE][+-]?[0-9]+/)
|
344
|
+
emit Token::INT
|
345
|
+
end
|
346
|
+
elsif accept?("&&")
|
347
|
+
emit(Token::AND, "&&")
|
348
|
+
elsif accept?("||")
|
349
|
+
emit(Token::OR, "||")
|
350
|
+
elsif accept?("true")
|
351
|
+
emit(Token::TRUE, "true")
|
352
|
+
elsif accept?("false")
|
353
|
+
emit(Token::FALSE, "false")
|
354
|
+
elsif accept?("null")
|
355
|
+
emit(Token::NULL, "null")
|
356
|
+
elsif accept?(/[a-z][a-z_0-9]*/)
|
357
|
+
unless peek == "("
|
358
|
+
error "unexpected filter selector token"
|
359
|
+
return nil
|
360
|
+
end
|
361
|
+
# Function name
|
362
|
+
# Keep track of parentheses for this function call.
|
363
|
+
@paren_stack << 1
|
364
|
+
emit Token::FUNCTION
|
365
|
+
self.next
|
366
|
+
ignore # move past LPAREN
|
367
|
+
else
|
368
|
+
error "unexpected filter selector token '#{c}'"
|
369
|
+
return nil
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
class << self
|
376
|
+
def lex_string_factory(quote, state, token) # rubocop:disable Metrics/MethodLength
|
377
|
+
proc {
|
378
|
+
# @type self: Lexer
|
379
|
+
ignore # move past opening quote
|
380
|
+
|
381
|
+
loop do
|
382
|
+
c = self.next
|
383
|
+
peeked = peek
|
384
|
+
|
385
|
+
case c
|
386
|
+
when ""
|
387
|
+
error "unclosed string starting at index #{@start}"
|
388
|
+
return nil
|
389
|
+
when "\\"
|
390
|
+
if S_ESCAPES.member?(peeked) || peeked == quote
|
391
|
+
self.next
|
392
|
+
else
|
393
|
+
error "invalid escape"
|
394
|
+
return nil
|
395
|
+
end
|
396
|
+
when quote
|
397
|
+
backup
|
398
|
+
emit(token)
|
399
|
+
self.next
|
400
|
+
ignore # move past closing quote
|
401
|
+
return state
|
402
|
+
end
|
403
|
+
end
|
404
|
+
}
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
define_method(:lex_double_quoted_string_inside_bracketed_segment,
|
409
|
+
lex_string_factory('"', :lex_inside_bracketed_segment, Token::DOUBLE_QUOTE_STRING))
|
410
|
+
|
411
|
+
define_method(:lex_single_quoted_string_inside_bracketed_segment,
|
412
|
+
lex_string_factory("'", :lex_inside_bracketed_segment, Token::SINGLE_QUOTE_STRING))
|
413
|
+
|
414
|
+
define_method(:lex_double_quoted_string_inside_filter_expression,
|
415
|
+
lex_string_factory('"', :lex_inside_filter, Token::DOUBLE_QUOTE_STRING))
|
416
|
+
|
417
|
+
define_method(:lex_single_quoted_string_inside_filter_expression,
|
418
|
+
lex_string_factory("'", :lex_inside_filter, Token::SINGLE_QUOTE_STRING))
|
419
|
+
end
|
420
|
+
end
|
data/lib/json_p3/node.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JSONP3
|
4
|
+
# A JSON-like value and its location.
|
5
|
+
class JSONPathNode
|
6
|
+
# @dynamic value, location, root
|
7
|
+
attr_reader :value, :location, :root
|
8
|
+
|
9
|
+
# @param value [JSON-like] the value at this node.
|
10
|
+
# @param location [Array<String | Integer | Array<String | Integer>>] the sequence of
|
11
|
+
# names and/or indices leading to _value_ in _root_.
|
12
|
+
# @param root [JSON-like] the root value containing _value_ at _location_.
|
13
|
+
def initialize(value, location, root)
|
14
|
+
@value = value
|
15
|
+
@location = location
|
16
|
+
@root = root
|
17
|
+
end
|
18
|
+
|
19
|
+
# Return the normalized path to this node.
|
20
|
+
# @return [String] the normalized path.
|
21
|
+
def path
|
22
|
+
segments = @location.flatten.map { |i| i.is_a?(String) ? "['#{i}']" : "[#{i}]" }
|
23
|
+
"$#{segments.join}"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return a new node that is a child of this node.
|
27
|
+
# @param value the JSON-like value at the new node.
|
28
|
+
# @param key [Integer, String] the array index or hash key associated with _value_.
|
29
|
+
def new_child(value, key)
|
30
|
+
JSONPathNode.new(value, [@location, key], @root)
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
"JSONPathNode(#{value} at #{path})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# An array of JSONPathNode instances. We use this internally to differentiate
|
39
|
+
# arrays of Nodes and arrays of data values, which is required when calling
|
40
|
+
# filter functions expecting nodes as arguments. It is just an array though.
|
41
|
+
class JSONPathNodeList < Array; end
|
42
|
+
end
|