json_p3 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,420 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "strscan"
5
+ require_relative "errors"
6
+ require_relative "token"
7
+
8
+ module JSONP3 # rubocop:disable Style/Documentation
9
+ # Return an array of tokens for the JSONPath expression _query_.
10
+ #
11
+ # @param query [String] the JSONPath expression to tokenize.
12
+ # @return [Array<Token>]
13
+ def self.tokenize(query)
14
+ lexer = Lexer.new(query)
15
+ lexer.run
16
+ tokens = lexer.tokens
17
+
18
+ if !tokens.empty? && tokens.last.type == Token::ERROR
19
+ raise JSONPathSyntaxError.new(tokens.last.message || raise,
20
+ tokens.last)
21
+ end
22
+
23
+ tokens
24
+ end
25
+
26
+ # JSONPath query expression lexical scanner.
27
+ #
28
+ # @see tokenize
29
+ class Lexer # rubocop:disable Metrics/ClassLength
30
+ RE_INT = /-?[0-9]+/
31
+ RE_NAME = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
32
+ RE_WHITESPACE = /[ \n\r\t]+/
33
+ S_ESCAPES = Set["b", "f", "n", "r", "t", "u", "/", "\\"].freeze
34
+
35
+ # @dynamic tokens
36
+ attr_reader :tokens
37
+
38
+ def initialize(query)
39
+ @filter_depth = 0
40
+ @paren_stack = []
41
+ @tokens = []
42
+ @start = 0
43
+ @query = query.freeze
44
+ @scanner = StringScanner.new(query)
45
+ end
46
+
47
+ def run
48
+ state = :lex_root
49
+ state = method(state).call until state.nil?
50
+ end
51
+
52
+ protected
53
+
54
+ # Generate a new token with the given type.
55
+ # @param token_type [Symbol] one of the constants defined on the _Token_ class.
56
+ # @param value [String | nil] a the token's value, if it is known, otherwise the
57
+ # value will be sliced from @query. This is a performance optimization.
58
+ def emit(token_type, value = nil)
59
+ @tokens << Token.new(token_type, value || @query[@start...@scanner.charpos], @start, @query)
60
+ @start = @scanner.charpos
61
+ end
62
+
63
+ def next
64
+ @scanner.getch || ""
65
+ end
66
+
67
+ def ignore
68
+ @start = @scanner.charpos
69
+ end
70
+
71
+ def backup
72
+ # Assumes we're backing-up from a single byte character.
73
+ @scanner.pos -= 1
74
+ end
75
+
76
+ def peek
77
+ # Assumes we're peeking single byte characters.
78
+ @scanner.peek(1)
79
+ end
80
+
81
+ # Advance the lexer if the next character is equal to _char_.
82
+ def accept?(pattern)
83
+ !@scanner.scan(pattern).nil?
84
+ end
85
+
86
+ # Accept a run of digits, possibly preceded by a negative sign.
87
+ # Does not handle exponents.
88
+ def accept_int?
89
+ !@scanner.scan(RE_INT).nil?
90
+ end
91
+
92
+ def ignore_whitespace?
93
+ if @scanner.scan(RE_WHITESPACE).nil?
94
+ false
95
+ else
96
+ ignore
97
+ true
98
+ end
99
+ end
100
+
101
+ def error(message)
102
+ @tokens << Token.new(
103
+ Token::ERROR, @query[@start...@scanner.charpos] || "", @start, @query, message: message
104
+ )
105
+ end
106
+
107
+ def lex_root
108
+ c = self.next
109
+
110
+ unless c == "$"
111
+ error "expected '$', found '#{c}'"
112
+ return nil
113
+ end
114
+
115
+ emit(Token::ROOT, "$")
116
+ :lex_segment
117
+ end
118
+
119
+ def lex_segment # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
120
+ if accept?(RE_WHITESPACE) && peek.empty?
121
+ error "unexpected trailing whitespace"
122
+ return nil
123
+ end
124
+
125
+ ignore
126
+ c = self.next
127
+
128
+ case c
129
+ when ""
130
+ emit(Token::EOI, "")
131
+ nil
132
+ when "."
133
+ return :lex_shorthand_selector unless peek == "."
134
+
135
+ self.next
136
+ emit(Token::DOUBLE_DOT, "..")
137
+ :lex_descendant_segment
138
+ when "["
139
+ emit(Token::LBRACKET, "[")
140
+ :lex_inside_bracketed_segment
141
+ else
142
+ if @filter_depth.positive?
143
+ backup
144
+ :lex_inside_filter
145
+ else
146
+ error "expected '.', '..' or a bracketed selection, found '#{c}'"
147
+ nil
148
+ end
149
+ end
150
+ end
151
+
152
+ def lex_descendant_segment # rubocop:disable Metrics/MethodLength
153
+ case self.next
154
+ when ""
155
+ error "bald descendant segment"
156
+ nil
157
+ when "*"
158
+ emit(Token::WILD, "*")
159
+ :lex_segment
160
+ when "["
161
+ emit(Token::LBRACKET, "[")
162
+ :lex_inside_bracketed_segment
163
+ else
164
+ backup
165
+ if accept?(RE_NAME)
166
+ emit(Token::NAME)
167
+ :lex_segment
168
+ else
169
+ c = self.next
170
+ error "unexpected descendant selection token '#{c}'"
171
+ nil
172
+ end
173
+ end
174
+ end
175
+
176
+ def lex_shorthand_selector # rubocop:disable Metrics/MethodLength
177
+ if peek == ""
178
+ error "unexpected trailing dot"
179
+ return nil
180
+ end
181
+
182
+ ignore # ignore dot
183
+
184
+ if accept?(RE_WHITESPACE)
185
+ error "unexpected whitespace after dot"
186
+ return nil
187
+ end
188
+
189
+ if peek == "*"
190
+ self.next
191
+ emit(Token::WILD, "*")
192
+ return :lex_segment
193
+ end
194
+
195
+ if accept?(RE_NAME)
196
+ emit(Token::NAME)
197
+ return :lex_segment
198
+ end
199
+
200
+ c = self.next
201
+ error "unexpected shorthand selector '#{c}'"
202
+ nil
203
+ end
204
+
205
+ def lex_inside_bracketed_segment # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
206
+ loop do # rubocop:disable Metrics/BlockLength
207
+ ignore_whitespace?
208
+ c = self.next
209
+
210
+ case c
211
+ when "]"
212
+ emit(Token::RBRACKET, "]")
213
+ return @filter_depth.positive? ? :lex_inside_filter : :lex_segment
214
+ when ""
215
+ error "unclosed bracketed selection"
216
+ return nil
217
+ when "*"
218
+ emit(Token::WILD, "*")
219
+ when "?"
220
+ emit(Token::FILTER, "?")
221
+ @filter_depth += 1
222
+ return :lex_inside_filter
223
+ when ","
224
+ emit(Token::COMMA, ",")
225
+ when ":"
226
+ emit(Token::COLON, ":")
227
+ when "'"
228
+ return :lex_single_quoted_string_inside_bracketed_segment
229
+ when '"'
230
+ return :lex_double_quoted_string_inside_bracketed_segment
231
+ else
232
+ backup
233
+ if accept_int?
234
+ # Index selector or part of a slice selector.
235
+ emit Token::INDEX
236
+ else
237
+ error "unexpected token '#{c}' in bracketed selection"
238
+ return nil
239
+ end
240
+ end
241
+ end
242
+ end
243
+
244
+ def lex_inside_filter # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
+ loop do # rubocop:disable Metrics/BlockLength
246
+ ignore_whitespace?
247
+ c = self.next
248
+
249
+ case c
250
+ when ""
251
+ error "unclosed bracketed selection"
252
+ return nil
253
+ when "]"
254
+ @filter_depth -= 1
255
+ if @paren_stack.length == 1
256
+ error "unbalanced parentheses"
257
+ return nil
258
+ end
259
+ backup
260
+ return :lex_inside_bracketed_segment
261
+ when ","
262
+ emit(Token::COMMA, ",")
263
+ # If we have unbalanced parens, we are inside a function call and a
264
+ # comma separates arguments. Otherwise a comma separates selectors.
265
+ next if @paren_stack.length.positive?
266
+
267
+ @filter_depth -= 1
268
+ return :lex_inside_bracketed_segment
269
+ when "'"
270
+ return :lex_single_quoted_string_inside_filter_expression
271
+ when '"'
272
+ return :lex_double_quoted_string_inside_filter_expression
273
+ when "("
274
+ emit(Token::LPAREN, "(")
275
+ # Are we in a function call? If so, a function argument contains parens.
276
+ @paren_stack[-1] += 1 if @paren_stack.length.positive?
277
+ when ")"
278
+ emit(Token::RPAREN, ")")
279
+ # Are we closing a function call or a parenthesized expression?
280
+ if @paren_stack.length.positive?
281
+ if @paren_stack[-1] == 1
282
+ @paren_stack.pop
283
+ else
284
+ @paren_stack[-1] -= 1
285
+ end
286
+ end
287
+ when "$"
288
+ emit(Token::ROOT, "$")
289
+ return :lex_segment
290
+ when "@"
291
+ emit(Token::CURRENT, "@")
292
+ return :lex_segment
293
+ when "."
294
+ backup
295
+ return :lex_segment
296
+ when "!"
297
+ if peek == "="
298
+ self.next
299
+ emit(Token::NE, "!=")
300
+ else
301
+ emit(Token::NOT, "!")
302
+ end
303
+ when "="
304
+ if peek == "="
305
+ self.next
306
+ emit(Token::EQ, "==")
307
+ else
308
+ backup
309
+ error "unexpected filter selector token '#{c}'"
310
+ return nil
311
+ end
312
+ when "<"
313
+ if peek == "="
314
+ self.next
315
+ emit(Token::LE, "<=")
316
+ else
317
+ emit(Token::LT, "<")
318
+ end
319
+ when ">"
320
+ if peek == "="
321
+ self.next
322
+ emit(Token::GE, ">=")
323
+ else
324
+ emit(Token::GT, ">")
325
+ end
326
+ else
327
+ backup
328
+ if accept_int?
329
+ if peek == "."
330
+ # A float
331
+ self.next
332
+ unless accept_int? # rubocop:disable Metrics/BlockNesting
333
+ error "a fractional digit is required after a decimal point"
334
+ return nil
335
+ end
336
+
337
+ accept?(/[eE][+-]?[0-9]+/)
338
+ emit Token::FLOAT
339
+ # An int, or float if exponent is negative
340
+ elsif accept?(/[eE]-[0-9]+/)
341
+ emit Token::FLOAT
342
+ else
343
+ accept?(/[eE][+-]?[0-9]+/)
344
+ emit Token::INT
345
+ end
346
+ elsif accept?("&&")
347
+ emit(Token::AND, "&&")
348
+ elsif accept?("||")
349
+ emit(Token::OR, "||")
350
+ elsif accept?("true")
351
+ emit(Token::TRUE, "true")
352
+ elsif accept?("false")
353
+ emit(Token::FALSE, "false")
354
+ elsif accept?("null")
355
+ emit(Token::NULL, "null")
356
+ elsif accept?(/[a-z][a-z_0-9]*/)
357
+ unless peek == "("
358
+ error "unexpected filter selector token"
359
+ return nil
360
+ end
361
+ # Function name
362
+ # Keep track of parentheses for this function call.
363
+ @paren_stack << 1
364
+ emit Token::FUNCTION
365
+ self.next
366
+ ignore # move past LPAREN
367
+ else
368
+ error "unexpected filter selector token '#{c}'"
369
+ return nil
370
+ end
371
+ end
372
+ end
373
+ end
374
+
375
+ class << self
376
+ def lex_string_factory(quote, state, token) # rubocop:disable Metrics/MethodLength
377
+ proc {
378
+ # @type self: Lexer
379
+ ignore # move past opening quote
380
+
381
+ loop do
382
+ c = self.next
383
+ peeked = peek
384
+
385
+ case c
386
+ when ""
387
+ error "unclosed string starting at index #{@start}"
388
+ return nil
389
+ when "\\"
390
+ if S_ESCAPES.member?(peeked) || peeked == quote
391
+ self.next
392
+ else
393
+ error "invalid escape"
394
+ return nil
395
+ end
396
+ when quote
397
+ backup
398
+ emit(token)
399
+ self.next
400
+ ignore # move past closing quote
401
+ return state
402
+ end
403
+ end
404
+ }
405
+ end
406
+ end
407
+
408
+ define_method(:lex_double_quoted_string_inside_bracketed_segment,
409
+ lex_string_factory('"', :lex_inside_bracketed_segment, Token::DOUBLE_QUOTE_STRING))
410
+
411
+ define_method(:lex_single_quoted_string_inside_bracketed_segment,
412
+ lex_string_factory("'", :lex_inside_bracketed_segment, Token::SINGLE_QUOTE_STRING))
413
+
414
+ define_method(:lex_double_quoted_string_inside_filter_expression,
415
+ lex_string_factory('"', :lex_inside_filter, Token::DOUBLE_QUOTE_STRING))
416
+
417
+ define_method(:lex_single_quoted_string_inside_filter_expression,
418
+ lex_string_factory("'", :lex_inside_filter, Token::SINGLE_QUOTE_STRING))
419
+ end
420
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ # A JSON-like value and its location.
5
+ class JSONPathNode
6
+ # @dynamic value, location, root
7
+ attr_reader :value, :location, :root
8
+
9
+ # @param value [JSON-like] the value at this node.
10
+ # @param location [Array<String | Integer | Array<String | Integer>>] the sequence of
11
+ # names and/or indices leading to _value_ in _root_.
12
+ # @param root [JSON-like] the root value containing _value_ at _location_.
13
+ def initialize(value, location, root)
14
+ @value = value
15
+ @location = location
16
+ @root = root
17
+ end
18
+
19
+ # Return the normalized path to this node.
20
+ # @return [String] the normalized path.
21
+ def path
22
+ segments = @location.flatten.map { |i| i.is_a?(String) ? "['#{i}']" : "[#{i}]" }
23
+ "$#{segments.join}"
24
+ end
25
+
26
+ # Return a new node that is a child of this node.
27
+ # @param value the JSON-like value at the new node.
28
+ # @param key [Integer, String] the array index or hash key associated with _value_.
29
+ def new_child(value, key)
30
+ JSONPathNode.new(value, [@location, key], @root)
31
+ end
32
+
33
+ def to_s
34
+ "JSONPathNode(#{value} at #{path})"
35
+ end
36
+ end
37
+
38
+ # An array of JSONPathNode instances. We use this internally to differentiate
39
+ # arrays of Nodes and arrays of data values, which is required when calling
40
+ # filter functions expecting nodes as arguments. It is just an array though.
41
+ class JSONPathNodeList < Array; end
42
+ end