json_p3 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,420 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "strscan"
5
+ require_relative "errors"
6
+ require_relative "token"
7
+
8
+ module JSONP3 # rubocop:disable Style/Documentation
9
+ # Return an array of tokens for the JSONPath expression _query_.
10
+ #
11
+ # @param query [String] the JSONPath expression to tokenize.
12
+ # @return [Array<Token>]
13
+ def self.tokenize(query)
14
+ lexer = Lexer.new(query)
15
+ lexer.run
16
+ tokens = lexer.tokens
17
+
18
+ if !tokens.empty? && tokens.last.type == Token::ERROR
19
+ raise JSONPathSyntaxError.new(tokens.last.message || raise,
20
+ tokens.last)
21
+ end
22
+
23
+ tokens
24
+ end
25
+
26
+ # JSONPath query expression lexical scanner.
27
+ #
28
+ # @see tokenize
29
+ class Lexer # rubocop:disable Metrics/ClassLength
30
+ RE_INT = /-?[0-9]+/
31
+ RE_NAME = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
32
+ RE_WHITESPACE = /[ \n\r\t]+/
33
+ S_ESCAPES = Set["b", "f", "n", "r", "t", "u", "/", "\\"].freeze
34
+
35
+ # @dynamic tokens
36
+ attr_reader :tokens
37
+
38
+ def initialize(query)
39
+ @filter_depth = 0
40
+ @paren_stack = []
41
+ @tokens = []
42
+ @start = 0
43
+ @query = query.freeze
44
+ @scanner = StringScanner.new(query)
45
+ end
46
+
47
+ def run
48
+ state = :lex_root
49
+ state = method(state).call until state.nil?
50
+ end
51
+
52
+ protected
53
+
54
+ # Generate a new token with the given type.
55
+ # @param token_type [Symbol] one of the constants defined on the _Token_ class.
56
+ # @param value [String | nil] a the token's value, if it is known, otherwise the
57
+ # value will be sliced from @query. This is a performance optimization.
58
+ def emit(token_type, value = nil)
59
+ @tokens << Token.new(token_type, value || @query[@start...@scanner.charpos], @start, @query)
60
+ @start = @scanner.charpos
61
+ end
62
+
63
+ def next
64
+ @scanner.getch || ""
65
+ end
66
+
67
+ def ignore
68
+ @start = @scanner.charpos
69
+ end
70
+
71
+ def backup
72
+ # Assumes we're backing-up from a single byte character.
73
+ @scanner.pos -= 1
74
+ end
75
+
76
+ def peek
77
+ # Assumes we're peeking single byte characters.
78
+ @scanner.peek(1)
79
+ end
80
+
81
+ # Advance the lexer if the next character is equal to _char_.
82
+ def accept?(pattern)
83
+ !@scanner.scan(pattern).nil?
84
+ end
85
+
86
+ # Accept a run of digits, possibly preceded by a negative sign.
87
+ # Does not handle exponents.
88
+ def accept_int?
89
+ !@scanner.scan(RE_INT).nil?
90
+ end
91
+
92
+ def ignore_whitespace?
93
+ if @scanner.scan(RE_WHITESPACE).nil?
94
+ false
95
+ else
96
+ ignore
97
+ true
98
+ end
99
+ end
100
+
101
+ def error(message)
102
+ @tokens << Token.new(
103
+ Token::ERROR, @query[@start...@scanner.charpos] || "", @start, @query, message: message
104
+ )
105
+ end
106
+
107
+ def lex_root
108
+ c = self.next
109
+
110
+ unless c == "$"
111
+ error "expected '$', found '#{c}'"
112
+ return nil
113
+ end
114
+
115
+ emit(Token::ROOT, "$")
116
+ :lex_segment
117
+ end
118
+
119
+ def lex_segment # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
120
+ if accept?(RE_WHITESPACE) && peek.empty?
121
+ error "unexpected trailing whitespace"
122
+ return nil
123
+ end
124
+
125
+ ignore
126
+ c = self.next
127
+
128
+ case c
129
+ when ""
130
+ emit(Token::EOI, "")
131
+ nil
132
+ when "."
133
+ return :lex_shorthand_selector unless peek == "."
134
+
135
+ self.next
136
+ emit(Token::DOUBLE_DOT, "..")
137
+ :lex_descendant_segment
138
+ when "["
139
+ emit(Token::LBRACKET, "[")
140
+ :lex_inside_bracketed_segment
141
+ else
142
+ if @filter_depth.positive?
143
+ backup
144
+ :lex_inside_filter
145
+ else
146
+ error "expected '.', '..' or a bracketed selection, found '#{c}'"
147
+ nil
148
+ end
149
+ end
150
+ end
151
+
152
+ def lex_descendant_segment # rubocop:disable Metrics/MethodLength
153
+ case self.next
154
+ when ""
155
+ error "bald descendant segment"
156
+ nil
157
+ when "*"
158
+ emit(Token::WILD, "*")
159
+ :lex_segment
160
+ when "["
161
+ emit(Token::LBRACKET, "[")
162
+ :lex_inside_bracketed_segment
163
+ else
164
+ backup
165
+ if accept?(RE_NAME)
166
+ emit(Token::NAME)
167
+ :lex_segment
168
+ else
169
+ c = self.next
170
+ error "unexpected descendant selection token '#{c}'"
171
+ nil
172
+ end
173
+ end
174
+ end
175
+
176
+ def lex_shorthand_selector # rubocop:disable Metrics/MethodLength
177
+ if peek == ""
178
+ error "unexpected trailing dot"
179
+ return nil
180
+ end
181
+
182
+ ignore # ignore dot
183
+
184
+ if accept?(RE_WHITESPACE)
185
+ error "unexpected whitespace after dot"
186
+ return nil
187
+ end
188
+
189
+ if peek == "*"
190
+ self.next
191
+ emit(Token::WILD, "*")
192
+ return :lex_segment
193
+ end
194
+
195
+ if accept?(RE_NAME)
196
+ emit(Token::NAME)
197
+ return :lex_segment
198
+ end
199
+
200
+ c = self.next
201
+ error "unexpected shorthand selector '#{c}'"
202
+ nil
203
+ end
204
+
205
+ def lex_inside_bracketed_segment # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
206
+ loop do # rubocop:disable Metrics/BlockLength
207
+ ignore_whitespace?
208
+ c = self.next
209
+
210
+ case c
211
+ when "]"
212
+ emit(Token::RBRACKET, "]")
213
+ return @filter_depth.positive? ? :lex_inside_filter : :lex_segment
214
+ when ""
215
+ error "unclosed bracketed selection"
216
+ return nil
217
+ when "*"
218
+ emit(Token::WILD, "*")
219
+ when "?"
220
+ emit(Token::FILTER, "?")
221
+ @filter_depth += 1
222
+ return :lex_inside_filter
223
+ when ","
224
+ emit(Token::COMMA, ",")
225
+ when ":"
226
+ emit(Token::COLON, ":")
227
+ when "'"
228
+ return :lex_single_quoted_string_inside_bracketed_segment
229
+ when '"'
230
+ return :lex_double_quoted_string_inside_bracketed_segment
231
+ else
232
+ backup
233
+ if accept_int?
234
+ # Index selector or part of a slice selector.
235
+ emit Token::INDEX
236
+ else
237
+ error "unexpected token '#{c}' in bracketed selection"
238
+ return nil
239
+ end
240
+ end
241
+ end
242
+ end
243
+
244
+ def lex_inside_filter # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
+ loop do # rubocop:disable Metrics/BlockLength
246
+ ignore_whitespace?
247
+ c = self.next
248
+
249
+ case c
250
+ when ""
251
+ error "unclosed bracketed selection"
252
+ return nil
253
+ when "]"
254
+ @filter_depth -= 1
255
+ if @paren_stack.length == 1
256
+ error "unbalanced parentheses"
257
+ return nil
258
+ end
259
+ backup
260
+ return :lex_inside_bracketed_segment
261
+ when ","
262
+ emit(Token::COMMA, ",")
263
+ # If we have unbalanced parens, we are inside a function call and a
264
+ # comma separates arguments. Otherwise a comma separates selectors.
265
+ next if @paren_stack.length.positive?
266
+
267
+ @filter_depth -= 1
268
+ return :lex_inside_bracketed_segment
269
+ when "'"
270
+ return :lex_single_quoted_string_inside_filter_expression
271
+ when '"'
272
+ return :lex_double_quoted_string_inside_filter_expression
273
+ when "("
274
+ emit(Token::LPAREN, "(")
275
+ # Are we in a function call? If so, a function argument contains parens.
276
+ @paren_stack[-1] += 1 if @paren_stack.length.positive?
277
+ when ")"
278
+ emit(Token::RPAREN, ")")
279
+ # Are we closing a function call or a parenthesized expression?
280
+ if @paren_stack.length.positive?
281
+ if @paren_stack[-1] == 1
282
+ @paren_stack.pop
283
+ else
284
+ @paren_stack[-1] -= 1
285
+ end
286
+ end
287
+ when "$"
288
+ emit(Token::ROOT, "$")
289
+ return :lex_segment
290
+ when "@"
291
+ emit(Token::CURRENT, "@")
292
+ return :lex_segment
293
+ when "."
294
+ backup
295
+ return :lex_segment
296
+ when "!"
297
+ if peek == "="
298
+ self.next
299
+ emit(Token::NE, "!=")
300
+ else
301
+ emit(Token::NOT, "!")
302
+ end
303
+ when "="
304
+ if peek == "="
305
+ self.next
306
+ emit(Token::EQ, "==")
307
+ else
308
+ backup
309
+ error "unexpected filter selector token '#{c}'"
310
+ return nil
311
+ end
312
+ when "<"
313
+ if peek == "="
314
+ self.next
315
+ emit(Token::LE, "<=")
316
+ else
317
+ emit(Token::LT, "<")
318
+ end
319
+ when ">"
320
+ if peek == "="
321
+ self.next
322
+ emit(Token::GE, ">=")
323
+ else
324
+ emit(Token::GT, ">")
325
+ end
326
+ else
327
+ backup
328
+ if accept_int?
329
+ if peek == "."
330
+ # A float
331
+ self.next
332
+ unless accept_int? # rubocop:disable Metrics/BlockNesting
333
+ error "a fractional digit is required after a decimal point"
334
+ return nil
335
+ end
336
+
337
+ accept?(/[eE][+-]?[0-9]+/)
338
+ emit Token::FLOAT
339
+ # An int, or float if exponent is negative
340
+ elsif accept?(/[eE]-[0-9]+/)
341
+ emit Token::FLOAT
342
+ else
343
+ accept?(/[eE][+-]?[0-9]+/)
344
+ emit Token::INT
345
+ end
346
+ elsif accept?("&&")
347
+ emit(Token::AND, "&&")
348
+ elsif accept?("||")
349
+ emit(Token::OR, "||")
350
+ elsif accept?("true")
351
+ emit(Token::TRUE, "true")
352
+ elsif accept?("false")
353
+ emit(Token::FALSE, "false")
354
+ elsif accept?("null")
355
+ emit(Token::NULL, "null")
356
+ elsif accept?(/[a-z][a-z_0-9]*/)
357
+ unless peek == "("
358
+ error "unexpected filter selector token"
359
+ return nil
360
+ end
361
+ # Function name
362
+ # Keep track of parentheses for this function call.
363
+ @paren_stack << 1
364
+ emit Token::FUNCTION
365
+ self.next
366
+ ignore # move past LPAREN
367
+ else
368
+ error "unexpected filter selector token '#{c}'"
369
+ return nil
370
+ end
371
+ end
372
+ end
373
+ end
374
+
375
+ class << self
376
+ def lex_string_factory(quote, state, token) # rubocop:disable Metrics/MethodLength
377
+ proc {
378
+ # @type self: Lexer
379
+ ignore # move past opening quote
380
+
381
+ loop do
382
+ c = self.next
383
+ peeked = peek
384
+
385
+ case c
386
+ when ""
387
+ error "unclosed string starting at index #{@start}"
388
+ return nil
389
+ when "\\"
390
+ if S_ESCAPES.member?(peeked) || peeked == quote
391
+ self.next
392
+ else
393
+ error "invalid escape"
394
+ return nil
395
+ end
396
+ when quote
397
+ backup
398
+ emit(token)
399
+ self.next
400
+ ignore # move past closing quote
401
+ return state
402
+ end
403
+ end
404
+ }
405
+ end
406
+ end
407
+
408
+ define_method(:lex_double_quoted_string_inside_bracketed_segment,
409
+ lex_string_factory('"', :lex_inside_bracketed_segment, Token::DOUBLE_QUOTE_STRING))
410
+
411
+ define_method(:lex_single_quoted_string_inside_bracketed_segment,
412
+ lex_string_factory("'", :lex_inside_bracketed_segment, Token::SINGLE_QUOTE_STRING))
413
+
414
+ define_method(:lex_double_quoted_string_inside_filter_expression,
415
+ lex_string_factory('"', :lex_inside_filter, Token::DOUBLE_QUOTE_STRING))
416
+
417
+ define_method(:lex_single_quoted_string_inside_filter_expression,
418
+ lex_string_factory("'", :lex_inside_filter, Token::SINGLE_QUOTE_STRING))
419
+ end
420
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ # A JSON-like value and its location.
5
+ class JSONPathNode
6
+ # @dynamic value, location, root
7
+ attr_reader :value, :location, :root
8
+
9
+ # @param value [JSON-like] the value at this node.
10
+ # @param location [Array<String | Integer | Array<String | Integer>>] the sequence of
11
+ # names and/or indices leading to _value_ in _root_.
12
+ # @param root [JSON-like] the root value containing _value_ at _location_.
13
+ def initialize(value, location, root)
14
+ @value = value
15
+ @location = location
16
+ @root = root
17
+ end
18
+
19
+ # Return the normalized path to this node.
20
+ # @return [String] the normalized path.
21
+ def path
22
+ segments = @location.flatten.map { |i| i.is_a?(String) ? "['#{i}']" : "[#{i}]" }
23
+ "$#{segments.join}"
24
+ end
25
+
26
+ # Return a new node that is a child of this node.
27
+ # @param value the JSON-like value at the new node.
28
+ # @param key [Integer, String] the array index or hash key associated with _value_.
29
+ def new_child(value, key)
30
+ JSONPathNode.new(value, [@location, key], @root)
31
+ end
32
+
33
+ def to_s
34
+ "JSONPathNode(#{value} at #{path})"
35
+ end
36
+ end
37
+
38
+ # An array of JSONPathNode instances. We use this internally to differentiate
39
+ # arrays of Nodes and arrays of data values, which is required when calling
40
+ # filter functions expecting nodes as arguments. It is just an array though.
41
+ class JSONPathNodeList < Array; end
42
+ end