json_p3 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.rubocop.yml +14 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +7 -0
- data/LICENCE +21 -0
- data/README.md +353 -0
- data/Rakefile +23 -0
- data/Steepfile +27 -0
- data/lib/json_p3/cache.rb +40 -0
- data/lib/json_p3/environment.rb +76 -0
- data/lib/json_p3/errors.rb +49 -0
- data/lib/json_p3/filter.rb +426 -0
- data/lib/json_p3/function.rb +16 -0
- data/lib/json_p3/function_extensions/count.rb +15 -0
- data/lib/json_p3/function_extensions/length.rb +17 -0
- data/lib/json_p3/function_extensions/match.rb +62 -0
- data/lib/json_p3/function_extensions/pattern.rb +39 -0
- data/lib/json_p3/function_extensions/search.rb +44 -0
- data/lib/json_p3/function_extensions/value.rb +15 -0
- data/lib/json_p3/lexer.rb +420 -0
- data/lib/json_p3/node.rb +42 -0
- data/lib/json_p3/parser.rb +553 -0
- data/lib/json_p3/path.rb +42 -0
- data/lib/json_p3/segment.rb +102 -0
- data/lib/json_p3/selector.rb +285 -0
- data/lib/json_p3/token.rb +74 -0
- data/lib/json_p3/unescape.rb +112 -0
- data/lib/json_p3/version.rb +5 -0
- data/lib/json_p3.rb +17 -0
- data/performance/benchmark.rb +33 -0
- data/performance/benchmark_ips.rb +29 -0
- data/performance/benchmark_small_citylots.rb +18 -0
- data/performance/memory_profile.rb +19 -0
- data/performance/memory_profile_small_citylots.rb +14 -0
- data/performance/profile.rb +30 -0
- data/sig/json_p3.rbs +1058 -0
- data.tar.gz.sig +1 -0
- metadata +110 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,420 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
require "strscan"
|
5
|
+
require_relative "errors"
|
6
|
+
require_relative "token"
|
7
|
+
|
8
|
+
module JSONP3 # rubocop:disable Style/Documentation
|
9
|
+
# Return an array of tokens for the JSONPath expression _query_.
|
10
|
+
#
|
11
|
+
# @param query [String] the JSONPath expression to tokenize.
|
12
|
+
# @return [Array<Token>]
|
13
|
+
def self.tokenize(query)
|
14
|
+
lexer = Lexer.new(query)
|
15
|
+
lexer.run
|
16
|
+
tokens = lexer.tokens
|
17
|
+
|
18
|
+
if !tokens.empty? && tokens.last.type == Token::ERROR
|
19
|
+
raise JSONPathSyntaxError.new(tokens.last.message || raise,
|
20
|
+
tokens.last)
|
21
|
+
end
|
22
|
+
|
23
|
+
tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
# JSONPath query expression lexical scanner.
|
27
|
+
#
|
28
|
+
# @see tokenize
|
29
|
+
class Lexer # rubocop:disable Metrics/ClassLength
|
30
|
+
RE_INT = /-?[0-9]+/
|
31
|
+
RE_NAME = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
|
32
|
+
RE_WHITESPACE = /[ \n\r\t]+/
|
33
|
+
S_ESCAPES = Set["b", "f", "n", "r", "t", "u", "/", "\\"].freeze
|
34
|
+
|
35
|
+
# @dynamic tokens
|
36
|
+
attr_reader :tokens
|
37
|
+
|
38
|
+
def initialize(query)
|
39
|
+
@filter_depth = 0
|
40
|
+
@paren_stack = []
|
41
|
+
@tokens = []
|
42
|
+
@start = 0
|
43
|
+
@query = query.freeze
|
44
|
+
@scanner = StringScanner.new(query)
|
45
|
+
end
|
46
|
+
|
47
|
+
def run
|
48
|
+
state = :lex_root
|
49
|
+
state = method(state).call until state.nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
|
54
|
+
# Generate a new token with the given type.
|
55
|
+
# @param token_type [Symbol] one of the constants defined on the _Token_ class.
|
56
|
+
# @param value [String | nil] a the token's value, if it is known, otherwise the
|
57
|
+
# value will be sliced from @query. This is a performance optimization.
|
58
|
+
def emit(token_type, value = nil)
|
59
|
+
@tokens << Token.new(token_type, value || @query[@start...@scanner.charpos], @start, @query)
|
60
|
+
@start = @scanner.charpos
|
61
|
+
end
|
62
|
+
|
63
|
+
def next
|
64
|
+
@scanner.getch || ""
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore
|
68
|
+
@start = @scanner.charpos
|
69
|
+
end
|
70
|
+
|
71
|
+
def backup
|
72
|
+
# Assumes we're backing-up from a single byte character.
|
73
|
+
@scanner.pos -= 1
|
74
|
+
end
|
75
|
+
|
76
|
+
def peek
|
77
|
+
# Assumes we're peeking single byte characters.
|
78
|
+
@scanner.peek(1)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Advance the lexer if the next character is equal to _char_.
|
82
|
+
def accept?(pattern)
|
83
|
+
!@scanner.scan(pattern).nil?
|
84
|
+
end
|
85
|
+
|
86
|
+
# Accept a run of digits, possibly preceded by a negative sign.
|
87
|
+
# Does not handle exponents.
|
88
|
+
def accept_int?
|
89
|
+
!@scanner.scan(RE_INT).nil?
|
90
|
+
end
|
91
|
+
|
92
|
+
def ignore_whitespace?
|
93
|
+
if @scanner.scan(RE_WHITESPACE).nil?
|
94
|
+
false
|
95
|
+
else
|
96
|
+
ignore
|
97
|
+
true
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def error(message)
|
102
|
+
@tokens << Token.new(
|
103
|
+
Token::ERROR, @query[@start...@scanner.charpos] || "", @start, @query, message: message
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def lex_root
|
108
|
+
c = self.next
|
109
|
+
|
110
|
+
unless c == "$"
|
111
|
+
error "expected '$', found '#{c}'"
|
112
|
+
return nil
|
113
|
+
end
|
114
|
+
|
115
|
+
emit(Token::ROOT, "$")
|
116
|
+
:lex_segment
|
117
|
+
end
|
118
|
+
|
119
|
+
def lex_segment # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
120
|
+
if accept?(RE_WHITESPACE) && peek.empty?
|
121
|
+
error "unexpected trailing whitespace"
|
122
|
+
return nil
|
123
|
+
end
|
124
|
+
|
125
|
+
ignore
|
126
|
+
c = self.next
|
127
|
+
|
128
|
+
case c
|
129
|
+
when ""
|
130
|
+
emit(Token::EOI, "")
|
131
|
+
nil
|
132
|
+
when "."
|
133
|
+
return :lex_shorthand_selector unless peek == "."
|
134
|
+
|
135
|
+
self.next
|
136
|
+
emit(Token::DOUBLE_DOT, "..")
|
137
|
+
:lex_descendant_segment
|
138
|
+
when "["
|
139
|
+
emit(Token::LBRACKET, "[")
|
140
|
+
:lex_inside_bracketed_segment
|
141
|
+
else
|
142
|
+
if @filter_depth.positive?
|
143
|
+
backup
|
144
|
+
:lex_inside_filter
|
145
|
+
else
|
146
|
+
error "expected '.', '..' or a bracketed selection, found '#{c}'"
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def lex_descendant_segment # rubocop:disable Metrics/MethodLength
|
153
|
+
case self.next
|
154
|
+
when ""
|
155
|
+
error "bald descendant segment"
|
156
|
+
nil
|
157
|
+
when "*"
|
158
|
+
emit(Token::WILD, "*")
|
159
|
+
:lex_segment
|
160
|
+
when "["
|
161
|
+
emit(Token::LBRACKET, "[")
|
162
|
+
:lex_inside_bracketed_segment
|
163
|
+
else
|
164
|
+
backup
|
165
|
+
if accept?(RE_NAME)
|
166
|
+
emit(Token::NAME)
|
167
|
+
:lex_segment
|
168
|
+
else
|
169
|
+
c = self.next
|
170
|
+
error "unexpected descendant selection token '#{c}'"
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def lex_shorthand_selector # rubocop:disable Metrics/MethodLength
|
177
|
+
if peek == ""
|
178
|
+
error "unexpected trailing dot"
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
ignore # ignore dot
|
183
|
+
|
184
|
+
if accept?(RE_WHITESPACE)
|
185
|
+
error "unexpected whitespace after dot"
|
186
|
+
return nil
|
187
|
+
end
|
188
|
+
|
189
|
+
if peek == "*"
|
190
|
+
self.next
|
191
|
+
emit(Token::WILD, "*")
|
192
|
+
return :lex_segment
|
193
|
+
end
|
194
|
+
|
195
|
+
if accept?(RE_NAME)
|
196
|
+
emit(Token::NAME)
|
197
|
+
return :lex_segment
|
198
|
+
end
|
199
|
+
|
200
|
+
c = self.next
|
201
|
+
error "unexpected shorthand selector '#{c}'"
|
202
|
+
nil
|
203
|
+
end
|
204
|
+
|
205
|
+
def lex_inside_bracketed_segment # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
206
|
+
loop do # rubocop:disable Metrics/BlockLength
|
207
|
+
ignore_whitespace?
|
208
|
+
c = self.next
|
209
|
+
|
210
|
+
case c
|
211
|
+
when "]"
|
212
|
+
emit(Token::RBRACKET, "]")
|
213
|
+
return @filter_depth.positive? ? :lex_inside_filter : :lex_segment
|
214
|
+
when ""
|
215
|
+
error "unclosed bracketed selection"
|
216
|
+
return nil
|
217
|
+
when "*"
|
218
|
+
emit(Token::WILD, "*")
|
219
|
+
when "?"
|
220
|
+
emit(Token::FILTER, "?")
|
221
|
+
@filter_depth += 1
|
222
|
+
return :lex_inside_filter
|
223
|
+
when ","
|
224
|
+
emit(Token::COMMA, ",")
|
225
|
+
when ":"
|
226
|
+
emit(Token::COLON, ":")
|
227
|
+
when "'"
|
228
|
+
return :lex_single_quoted_string_inside_bracketed_segment
|
229
|
+
when '"'
|
230
|
+
return :lex_double_quoted_string_inside_bracketed_segment
|
231
|
+
else
|
232
|
+
backup
|
233
|
+
if accept_int?
|
234
|
+
# Index selector or part of a slice selector.
|
235
|
+
emit Token::INDEX
|
236
|
+
else
|
237
|
+
error "unexpected token '#{c}' in bracketed selection"
|
238
|
+
return nil
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def lex_inside_filter # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
245
|
+
loop do # rubocop:disable Metrics/BlockLength
|
246
|
+
ignore_whitespace?
|
247
|
+
c = self.next
|
248
|
+
|
249
|
+
case c
|
250
|
+
when ""
|
251
|
+
error "unclosed bracketed selection"
|
252
|
+
return nil
|
253
|
+
when "]"
|
254
|
+
@filter_depth -= 1
|
255
|
+
if @paren_stack.length == 1
|
256
|
+
error "unbalanced parentheses"
|
257
|
+
return nil
|
258
|
+
end
|
259
|
+
backup
|
260
|
+
return :lex_inside_bracketed_segment
|
261
|
+
when ","
|
262
|
+
emit(Token::COMMA, ",")
|
263
|
+
# If we have unbalanced parens, we are inside a function call and a
|
264
|
+
# comma separates arguments. Otherwise a comma separates selectors.
|
265
|
+
next if @paren_stack.length.positive?
|
266
|
+
|
267
|
+
@filter_depth -= 1
|
268
|
+
return :lex_inside_bracketed_segment
|
269
|
+
when "'"
|
270
|
+
return :lex_single_quoted_string_inside_filter_expression
|
271
|
+
when '"'
|
272
|
+
return :lex_double_quoted_string_inside_filter_expression
|
273
|
+
when "("
|
274
|
+
emit(Token::LPAREN, "(")
|
275
|
+
# Are we in a function call? If so, a function argument contains parens.
|
276
|
+
@paren_stack[-1] += 1 if @paren_stack.length.positive?
|
277
|
+
when ")"
|
278
|
+
emit(Token::RPAREN, ")")
|
279
|
+
# Are we closing a function call or a parenthesized expression?
|
280
|
+
if @paren_stack.length.positive?
|
281
|
+
if @paren_stack[-1] == 1
|
282
|
+
@paren_stack.pop
|
283
|
+
else
|
284
|
+
@paren_stack[-1] -= 1
|
285
|
+
end
|
286
|
+
end
|
287
|
+
when "$"
|
288
|
+
emit(Token::ROOT, "$")
|
289
|
+
return :lex_segment
|
290
|
+
when "@"
|
291
|
+
emit(Token::CURRENT, "@")
|
292
|
+
return :lex_segment
|
293
|
+
when "."
|
294
|
+
backup
|
295
|
+
return :lex_segment
|
296
|
+
when "!"
|
297
|
+
if peek == "="
|
298
|
+
self.next
|
299
|
+
emit(Token::NE, "!=")
|
300
|
+
else
|
301
|
+
emit(Token::NOT, "!")
|
302
|
+
end
|
303
|
+
when "="
|
304
|
+
if peek == "="
|
305
|
+
self.next
|
306
|
+
emit(Token::EQ, "==")
|
307
|
+
else
|
308
|
+
backup
|
309
|
+
error "unexpected filter selector token '#{c}'"
|
310
|
+
return nil
|
311
|
+
end
|
312
|
+
when "<"
|
313
|
+
if peek == "="
|
314
|
+
self.next
|
315
|
+
emit(Token::LE, "<=")
|
316
|
+
else
|
317
|
+
emit(Token::LT, "<")
|
318
|
+
end
|
319
|
+
when ">"
|
320
|
+
if peek == "="
|
321
|
+
self.next
|
322
|
+
emit(Token::GE, ">=")
|
323
|
+
else
|
324
|
+
emit(Token::GT, ">")
|
325
|
+
end
|
326
|
+
else
|
327
|
+
backup
|
328
|
+
if accept_int?
|
329
|
+
if peek == "."
|
330
|
+
# A float
|
331
|
+
self.next
|
332
|
+
unless accept_int? # rubocop:disable Metrics/BlockNesting
|
333
|
+
error "a fractional digit is required after a decimal point"
|
334
|
+
return nil
|
335
|
+
end
|
336
|
+
|
337
|
+
accept?(/[eE][+-]?[0-9]+/)
|
338
|
+
emit Token::FLOAT
|
339
|
+
# An int, or float if exponent is negative
|
340
|
+
elsif accept?(/[eE]-[0-9]+/)
|
341
|
+
emit Token::FLOAT
|
342
|
+
else
|
343
|
+
accept?(/[eE][+-]?[0-9]+/)
|
344
|
+
emit Token::INT
|
345
|
+
end
|
346
|
+
elsif accept?("&&")
|
347
|
+
emit(Token::AND, "&&")
|
348
|
+
elsif accept?("||")
|
349
|
+
emit(Token::OR, "||")
|
350
|
+
elsif accept?("true")
|
351
|
+
emit(Token::TRUE, "true")
|
352
|
+
elsif accept?("false")
|
353
|
+
emit(Token::FALSE, "false")
|
354
|
+
elsif accept?("null")
|
355
|
+
emit(Token::NULL, "null")
|
356
|
+
elsif accept?(/[a-z][a-z_0-9]*/)
|
357
|
+
unless peek == "("
|
358
|
+
error "unexpected filter selector token"
|
359
|
+
return nil
|
360
|
+
end
|
361
|
+
# Function name
|
362
|
+
# Keep track of parentheses for this function call.
|
363
|
+
@paren_stack << 1
|
364
|
+
emit Token::FUNCTION
|
365
|
+
self.next
|
366
|
+
ignore # move past LPAREN
|
367
|
+
else
|
368
|
+
error "unexpected filter selector token '#{c}'"
|
369
|
+
return nil
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
class << self
|
376
|
+
def lex_string_factory(quote, state, token) # rubocop:disable Metrics/MethodLength
|
377
|
+
proc {
|
378
|
+
# @type self: Lexer
|
379
|
+
ignore # move past opening quote
|
380
|
+
|
381
|
+
loop do
|
382
|
+
c = self.next
|
383
|
+
peeked = peek
|
384
|
+
|
385
|
+
case c
|
386
|
+
when ""
|
387
|
+
error "unclosed string starting at index #{@start}"
|
388
|
+
return nil
|
389
|
+
when "\\"
|
390
|
+
if S_ESCAPES.member?(peeked) || peeked == quote
|
391
|
+
self.next
|
392
|
+
else
|
393
|
+
error "invalid escape"
|
394
|
+
return nil
|
395
|
+
end
|
396
|
+
when quote
|
397
|
+
backup
|
398
|
+
emit(token)
|
399
|
+
self.next
|
400
|
+
ignore # move past closing quote
|
401
|
+
return state
|
402
|
+
end
|
403
|
+
end
|
404
|
+
}
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
define_method(:lex_double_quoted_string_inside_bracketed_segment,
|
409
|
+
lex_string_factory('"', :lex_inside_bracketed_segment, Token::DOUBLE_QUOTE_STRING))
|
410
|
+
|
411
|
+
define_method(:lex_single_quoted_string_inside_bracketed_segment,
|
412
|
+
lex_string_factory("'", :lex_inside_bracketed_segment, Token::SINGLE_QUOTE_STRING))
|
413
|
+
|
414
|
+
define_method(:lex_double_quoted_string_inside_filter_expression,
|
415
|
+
lex_string_factory('"', :lex_inside_filter, Token::DOUBLE_QUOTE_STRING))
|
416
|
+
|
417
|
+
define_method(:lex_single_quoted_string_inside_filter_expression,
|
418
|
+
lex_string_factory("'", :lex_inside_filter, Token::SINGLE_QUOTE_STRING))
|
419
|
+
end
|
420
|
+
end
|
data/lib/json_p3/node.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JSONP3
|
4
|
+
# A JSON-like value and its location.
|
5
|
+
class JSONPathNode
|
6
|
+
# @dynamic value, location, root
|
7
|
+
attr_reader :value, :location, :root
|
8
|
+
|
9
|
+
# @param value [JSON-like] the value at this node.
|
10
|
+
# @param location [Array<String | Integer | Array<String | Integer>>] the sequence of
|
11
|
+
# names and/or indices leading to _value_ in _root_.
|
12
|
+
# @param root [JSON-like] the root value containing _value_ at _location_.
|
13
|
+
def initialize(value, location, root)
|
14
|
+
@value = value
|
15
|
+
@location = location
|
16
|
+
@root = root
|
17
|
+
end
|
18
|
+
|
19
|
+
# Return the normalized path to this node.
|
20
|
+
# @return [String] the normalized path.
|
21
|
+
def path
|
22
|
+
segments = @location.flatten.map { |i| i.is_a?(String) ? "['#{i}']" : "[#{i}]" }
|
23
|
+
"$#{segments.join}"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return a new node that is a child of this node.
|
27
|
+
# @param value the JSON-like value at the new node.
|
28
|
+
# @param key [Integer, String] the array index or hash key associated with _value_.
|
29
|
+
def new_child(value, key)
|
30
|
+
JSONPathNode.new(value, [@location, key], @root)
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
"JSONPathNode(#{value} at #{path})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# An array of JSONPathNode instances. We use this internally to differentiate
|
39
|
+
# arrays of Nodes and arrays of data values, which is required when calling
|
40
|
+
# filter functions expecting nodes as arguments. It is just an array though.
|
41
|
+
class JSONPathNodeList < Array; end
|
42
|
+
end
|