janeway-jsonpath 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'functions'
4
+ require_relative 'lexer'
4
5
 
5
6
  module Janeway
6
7
  # Transform a list of tokens into an Abstract Syntax Tree
7
8
  class Parser
8
9
  class Error < Janeway::Error; end
9
10
 
10
- attr_accessor :tokens, :ast
11
+ attr_accessor :tokens
11
12
 
12
13
  include Functions
13
14
 
@@ -33,27 +34,36 @@ module Janeway
33
34
  # @param query [String] jsonpath query to be lexed and parsed
34
35
  #
35
36
  # @return [AST]
36
- def self.parse(query)
37
- raise ArgumentError, "expect string, got #{query.inspect}" unless query.is_a?(String)
37
+ def self.parse(jsonpath)
38
+ raise ArgumentError, "expect jsonpath string, got #{jsonpath.inspect}" unless jsonpath.is_a?(String)
38
39
 
39
- tokens = Janeway::Lexer.lex(query)
40
- new(tokens).parse
40
+ tokens = Janeway::Lexer.lex(jsonpath)
41
+ new(tokens, jsonpath).parse
41
42
  end
42
43
 
43
- def initialize(tokens)
44
+ # @param token [Array<Token>]
45
+ # @param jsonpath [String] original jsonpath query string
46
+ def initialize(tokens, jsonpath)
44
47
  @tokens = tokens
45
- @ast = AST::Query.new
46
48
  @next_p = 0
49
+ @jsonpath = jsonpath
47
50
  end
48
51
 
52
+ # Parse the token list and create an Abstract Syntax Tree
53
+ # @return [AST::Query]
49
54
  def parse
50
-
51
55
  consume
52
- @ast.root = parse_expr_recursively
56
+ raise err('JsonPath queries must start with root identifier "$"') unless current.type == :root
57
+
58
+ root_node = parse_expr_recursively
53
59
  consume
54
- raise "unparsed tokens" unless current.type == :eof
60
+ unless current.type == :eof
61
+ remaining = tokens[@next_p..].map(&:lexeme).join
62
+ raise err("Unrecognized expressions after query: #{remaining}")
63
+ end
55
64
 
56
- @ast
65
+ # Freeze so this can be used in ractors
66
+ AST::Query.new(root_node, @jsonpath).freeze
57
67
  end
58
68
 
59
69
  private
@@ -123,10 +133,12 @@ module Janeway
123
133
 
124
134
  def unexpected_token_error(expected = nil)
125
135
  if expected
126
- raise Error, "Unexpected token #{current.lexeme.inspect} (expected #{expected.inspect}) (next is #{next_token.inspect})"
127
- else
128
- raise Error, "Unexpected token #{current.lexeme.inspect} (next is #{next_token.inspect})"
136
+ raise err(
137
+ "Unexpected token #{current.lexeme.inspect} " \
138
+ "(expected #{expected.inspect}, got #{next_token.lexeme.inspect} )"
139
+ )
129
140
  end
141
+ raise err("Unexpected token #{current.lexeme.inspect} (next is #{next_token.inspect})")
130
142
  end
131
143
 
132
144
  def check_syntax_compliance(ast_node)
@@ -156,7 +168,7 @@ module Janeway
156
168
  elsif current.type == :null # null
157
169
  :parse_null
158
170
  else
159
- raise "Don't know how to parse #{current}"
171
+ raise err("Don't know how to parse #{current}")
160
172
  end
161
173
  end
162
174
 
@@ -184,15 +196,16 @@ module Janeway
184
196
  # Consume minus operator and apply it to the (expected) number token following it.
185
197
  # Don't consume the number token.
186
198
  def parse_minus_operator
187
- raise "Expect token '-', got #{current.lexeme.inspect}" unless current.type == :minus
199
+ raise err("Expect token '-', got #{current.lexeme.inspect}") unless current.type == :minus
188
200
 
189
- # RFC: negative 0 is allowed within a filter selector comparison, but is NOT allowed within an index selector or array slice selector.
201
+ # RFC: negative 0 is allowed within a filter selector comparison, but is NOT allowed
202
+ # within an index selector or array slice selector.
190
203
  # Detect that condition here
191
- if next_token.type == :number && next_token.literal == 0
192
- if [previous.type, lookahead(2).type].any? { _1 == :array_slice_separator}
193
- raise Error, 'Negative zero is not allowed in an array slice selector'
204
+ if next_token.type == :number && next_token.literal.zero?
205
+ if [previous.type, lookahead(2).type].any? { _1 == :array_slice_separator }
206
+ raise err('Negative zero is not allowed in an array slice selector')
194
207
  elsif %i[union child_start].include?(previous.type)
195
- raise Error, 'Negative zero is not allowed in an index selector'
208
+ raise err('Negative zero is not allowed in an index selector')
196
209
  end
197
210
  end
198
211
 
@@ -200,6 +213,10 @@ module Janeway
200
213
  # Parse number and apply - sign to its literal value
201
214
  consume
202
215
  parse_number
216
+ unless current.literal.is_a?(Numeric)
217
+ raise err("Minus operator \"-\" must be followed by number, got #{current.lexeme.inspect}")
218
+ end
219
+
203
220
  current.literal *= -1
204
221
  current
205
222
  end
@@ -209,6 +226,7 @@ module Janeway
209
226
  AST::Null.new
210
227
  end
211
228
 
229
+ # @return [AST::Boolean]
212
230
  def parse_boolean
213
231
  AST::Boolean.new(current.literal == 'true')
214
232
  end
@@ -236,33 +254,24 @@ module Janeway
236
254
  def parse_descendant_segment
237
255
  consume # '..'
238
256
 
239
- # DescendantSegment must be followed by a selector S which it applies to all descendants.
240
- #
241
- # Normally the parser makes the selector after S be a child of S.
242
- # However that is not the desired behavior for DescendantSelector.
243
- # Consider '$.a..b[1]'. The first element must be taken from the set of all 'b' keys.
244
- # If the ChildSegment was a child of the `b` NameSelector, then it would be taking
245
- # index 1 from every 'b' found rather than from the set of all 'b's.
246
- #
247
- # To get around this, the Parser must embed a Selector object that
248
- # doesn't include the following selector as a child. Then the following
249
- # selector must be made a child of the DescendantSegment.
250
257
  selector =
251
258
  case next_token.type
252
- when :wildcard then parse_wildcard_selector(and_child: false)
253
- when :child_start then parse_child_segment(and_child: false)
254
- when :string, :identifier then parse_name_selector(and_child: false)
259
+ when :wildcard then parse_wildcard_selector
260
+ when :child_start then parse_child_segment
261
+ when :string, :identifier then parse_name_selector
255
262
  else
256
- raise "Invalid query: descendant segment must have selector, got ..#{next_token.type}"
263
+ msg = 'Descendant segment ".." must be followed by selector'
264
+ msg += ", got ..#{next_token.type}" unless next_token.type == :eof
265
+ raise err(msg)
257
266
  end
258
267
 
259
- AST::DescendantSegment.new(selector).tap do |ds|
268
+ AST::DescendantSegment.new.tap do |ds|
260
269
  # If there is another selector after this one, make it a child
261
- ds.child = parse_next_selector
270
+ ds.next = selector
262
271
  end
263
272
  end
264
273
 
265
- # Dot notation reprsents a name selector, and is an alternative to bracket notation.
274
+ # Dot notation represents a name selector, and is an alternative to bracket notation.
266
275
  # These examples are equivalent:
267
276
  # $.store
268
277
  # $[store]
@@ -272,16 +281,26 @@ module Janeway
272
281
  # * member name (with only certain chars useable. For example, names containing dots are not allowed here.)
273
282
  def parse_dot_notation
274
283
  consume # "."
275
- raise "#parse_dot_notation expects to consume :dot, got #{current}" unless current.type == :dot
276
-
284
+ unless current.type == :dot
285
+ # Parse error, determine most useful error message
286
+ msg =
287
+ if current.type == :number
288
+ "Decimal point must be preceded by number, got \".#{current.lexeme}\""
289
+ else
290
+ 'Dot "." begins a name selector, and must be followed by an ' \
291
+ "object member name, #{next_token.lexeme.inspect} is invalid here"
292
+ end
293
+ raise err(msg)
294
+ end
277
295
 
278
296
  case next_token.type
279
- # FIXME: implement a different name lexer which is limited to only the chars allowed under dot notation
280
- # @see https://www.rfc-editor.org/rfc/rfc9535.html#section-2.5.1.1
281
297
  when :identifier then parse_name_selector
282
298
  when :wildcard then parse_wildcard_selector
283
299
  else
284
- raise "cannot parse #{current.type}"
300
+ raise err(
301
+ 'Dot "." begins a name selector, and must be followed by an ' \
302
+ "object member name, #{next_token.lexeme.inspect} is invalid here"
303
+ )
285
304
  end
286
305
  end
287
306
 
@@ -300,7 +319,6 @@ module Janeway
300
319
  end
301
320
 
302
321
  def parse_root
303
-
304
322
  # detect optional following selector
305
323
  selector =
306
324
  case next_token.type
@@ -314,7 +332,6 @@ module Janeway
314
332
 
315
333
  # Parse the current node operator "@", and optionally a selector which is applied to it
316
334
  def parse_current_node
317
-
318
335
  # detect optional following selector
319
336
  selector =
320
337
  case next_token.type
@@ -347,17 +364,16 @@ module Janeway
347
364
  # This is not just a speed optimization. Serial selectors that feed into
348
365
  # each other have different behaviour than serial child segments.
349
366
  #
350
- # @param and_child [Boolean] make following token a child of this selector list
351
367
  # @return [AST::ChildSegment]
352
- def parse_child_segment(and_child: true)
368
+ def parse_child_segment
353
369
  consume
354
- raise "Expect token [, got #{current.lexeme.inspect}" unless current.type == :child_start
370
+ raise err("Expect token [, got #{current.lexeme.inspect}") unless current.type == :child_start
355
371
 
356
372
  consume # "["
357
373
 
358
374
  child_segment = AST::ChildSegment.new
359
375
  loop do
360
- selector = parse_selector
376
+ selector = parse_current_selector
361
377
  child_segment << selector if selector # nil selector means empty brackets
362
378
 
363
379
  break unless current.type == :union # no more selectors in these parentheses
@@ -366,33 +382,26 @@ module Janeway
366
382
  consume # ","
367
383
 
368
384
  # not allowed to have comma with nothing after it
369
- if current.type == :child_end
370
- raise Error.new("Comma must be followed by another expression in filter selector")
371
- end
385
+ raise err('Comma must be followed by another expression in filter selector') if current.type == :child_end
372
386
  end
373
387
 
374
- # Do not consume the final ']', the top-level parsing loop will eat that
375
- unless current.type == :child_end
376
- # developer error, check the parsing function
377
- raise "expect current token to be ], got #{current.type.inspect}"
378
- end
388
+ # Expect ']' after the selector definitions
389
+ raise err("Unexpected character #{current.lexeme.inspect} within brackets") unless current.type == :child_end
379
390
 
380
391
  # if the child_segment contains just one selector, then return the selector instead.
381
392
  # This way a series of selectors feed results to each other without
382
393
  # combining results in a node list.
383
- node =
394
+ expr =
384
395
  case child_segment.size
385
- when 0 then raise Error.new('Empty child segment')
396
+ when 0 then raise err('Empty child segment')
386
397
  when 1 then child_segment.first
387
398
  else child_segment
388
399
  end
389
400
 
390
- if and_child
391
- # Parse any subsequent expression which consumes this child segment
392
- node.child = parse_next_selector
393
- end
401
+ # Parse any subsequent expression which consumes this child segment
402
+ expr.next = parse_next_selector
394
403
 
395
- node
404
+ expr
396
405
  end
397
406
 
398
407
  # Parse a selector and return it.
@@ -401,6 +410,9 @@ module Janeway
401
410
  case next_token.type
402
411
  when :child_start then parse_child_segment
403
412
  when :dot then parse_dot_notation
413
+ when :descendants then parse_descendant_segment
414
+ when :wildcard then parse_wildcard_selector
415
+ when :eof, :child_end then nil
404
416
  end
405
417
  end
406
418
 
@@ -415,7 +427,7 @@ module Janeway
415
427
  end
416
428
 
417
429
  # Parse a selector which is inside brackets
418
- def parse_selector
430
+ def parse_current_selector
419
431
  case current.type
420
432
  when :array_slice_separator then parse_array_slice_selector
421
433
  when :filter then parse_filter_selector
@@ -423,7 +435,7 @@ module Janeway
423
435
  when :minus
424
436
  # apply the - sign to the following number and retry
425
437
  parse_minus_operator
426
- parse_selector
438
+ parse_current_selector
427
439
  when :number
428
440
  if lookahead.type == :array_slice_separator
429
441
  parse_array_slice_selector
@@ -434,16 +446,17 @@ module Janeway
434
446
  AST::NameSelector.new(current_literal_and_consume)
435
447
  when :child_end then nil # empty brackets, do nothing.
436
448
  else
437
- raise "Unhandled selector: #{current}"
449
+ raise err("Expect selector, got #{current.lexeme.inspect}")
438
450
  end
439
451
  end
440
452
 
441
453
  # Parse wildcard selector and any following selector
442
- # @param and_child [Boolean] make following token a child of this selector
443
- def parse_wildcard_selector(and_child: true)
454
+ def parse_wildcard_selector
444
455
  selector = AST::WildcardSelector.new
445
456
  consume
446
- selector.child = parse_next_selector if and_child
457
+ return selector if %i[child_end union].include?(current.type)
458
+
459
+ selector.next = parse_next_selector
447
460
  selector
448
461
  end
449
462
 
@@ -461,9 +474,7 @@ module Janeway
461
474
  # @return [AST::ArraySliceSelector]
462
475
  def parse_array_slice_selector
463
476
  start, end_, step = Array.new(3) { parse_array_slice_component }.map { _1&.literal }
464
-
465
-
466
- raise "After array slice, expect ], got #{current.lexeme}" unless current.type == :child_end # ]
477
+ raise err("After array slice selector, expect ], got #{current.lexeme}") unless current.type == :child_end # ]
467
478
 
468
479
  AST::ArraySliceSelector.new(start, end_, step)
469
480
  end
@@ -480,7 +491,7 @@ module Janeway
480
491
  parse_minus_operator
481
492
  parse_array_slice_component
482
493
  when :number then current
483
- else raise "Unexpected token in array slice selector: #{current}"
494
+ else raise err("Unexpected token in array slice selector: #{current.lexeme.inspect}")
484
495
  end
485
496
  consume if current.type == :number
486
497
  consume if current.type == :array_slice_separator
@@ -491,25 +502,21 @@ module Janeway
491
502
  # The name selector may have been in dot notation or parentheses, that part is already parsed.
492
503
  # Next token is just the name.
493
504
  #
494
- # @param and_child [Boolean] make following token a child of this selector
495
505
  # @return [AST::NameSelector]
496
- def parse_name_selector(and_child: true)
506
+ def parse_name_selector
497
507
  consume
498
508
  selector = AST::NameSelector.new(current.lexeme)
499
- if and_child
500
- # If there is a following expression, parse that too
501
- case next_token.type
502
- when :dot then selector.child = parse_dot_notation
503
- when :child_start then selector.child = parse_child_segment
504
- when :descendants then selector.child = parse_descendant_segment
505
- end
509
+ # If there is a following expression, parse that too
510
+ case next_token.type
511
+ when :dot then selector.next = parse_dot_notation
512
+ when :child_start then selector.next = parse_child_segment
513
+ when :descendants then selector.next = parse_descendant_segment
506
514
  end
507
515
  selector
508
516
  end
509
517
 
510
518
  # Feed tokens to the FilterSelector until hitting a terminator
511
519
  def parse_filter_selector
512
-
513
520
  selector = AST::FilterSelector.new
514
521
  terminator_types = %I[child_end union eof]
515
522
  while next_token && !terminator_types.include?(next_token.type)
@@ -526,9 +533,8 @@ module Janeway
526
533
  end
527
534
 
528
535
  # Check for literal, they are not allowed to be a complete condition in a filter selector
529
- if selector.value.literal?
530
- raise Error, "Literal #{selector.value} must be used within a comparison"
531
- end
536
+ # This includes jsonpath functions that return a numeric value.
537
+ raise err("Literal value #{selector.value} must be used within a comparison") if selector.value.literal?
532
538
 
533
539
  consume
534
540
 
@@ -543,7 +549,7 @@ module Janeway
543
549
  parse_minus_operator
544
550
  parse_number
545
551
  else
546
- raise "unknown unary operator: #{current.inspect}"
552
+ raise err("Unknown unary operator: #{current.lexeme.inspect}")
547
553
  end
548
554
  end
549
555
 
@@ -567,29 +573,26 @@ module Janeway
567
573
  # Parse a JSONPath function call
568
574
  def parse_function
569
575
  parsing_function = "parse_function_#{current.literal}"
570
- result = send(parsing_function)
571
- result
576
+ send(parsing_function)
572
577
  end
573
578
 
574
579
  # Parse an expression
575
580
  def parse_expr
576
581
  parsing_function = determine_parsing_function
577
- raise Error, "Unrecognized token: #{current.lexeme.inspect}" unless parsing_function
582
+ raise err("Unrecognized token: #{current.lexeme.inspect}") unless parsing_function
578
583
 
579
584
  send(parsing_function)
580
585
  end
581
586
 
582
587
  def parse_expr_recursively(precedence = LOWEST_PRECEDENCE)
583
588
  parsing_function = determine_parsing_function
584
- raise Error, "Unrecognized token: #{current.lexeme.inspect}" unless parsing_function
589
+ raise err("Unrecognized token: #{current.lexeme.inspect}") unless parsing_function
585
590
 
586
- tk = current
591
+ current
587
592
  expr = send(parsing_function)
588
593
  return unless expr # When expr is nil, it means we have reached a \n or a eof.
589
594
 
590
595
  # Note that here we are checking the NEXT token.
591
- if next_not_terminator? && precedence < next_precedence
592
- end
593
596
  while next_not_terminator? && precedence < next_precedence
594
597
  infix_parsing_function = determine_infix_function(next_token)
595
598
 
@@ -602,6 +605,14 @@ module Janeway
602
605
  expr
603
606
  end
604
607
 
608
+ # Return a Parser::Error with the specified message, include the query.
609
+ #
610
+ # @param msg [String] error message
611
+ # @return [Parser::Error]
612
+ def err(msg)
613
+ Error.new(msg, @jsonpath)
614
+ end
615
+
605
616
  alias parse_true parse_boolean
606
617
  alias parse_false parse_boolean
607
618
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Janeway
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/janeway.rb CHANGED
@@ -6,6 +6,9 @@ require 'English'
6
6
  module Janeway
7
7
  # Abstract Syntax Tree
8
8
  module AST
9
+ # These are the limits of what javascript's Number type can represent
10
+ INTEGER_MIN = -9_007_199_254_740_991
11
+ INTEGER_MAX = 9_007_199_254_740_991
9
12
  end
10
13
 
11
14
  # Apply a JsonPath query to the input, and return the result.
@@ -14,8 +17,8 @@ module Janeway
14
17
  # @param input [Object] ruby object to be searched
15
18
  # @return [Array] all matched objects
16
19
  def self.find_all(query, input)
17
- query = compile(query)
18
- Janeway::Interpreter.new(input).interpret(query)
20
+ ast = compile(query)
21
+ Janeway::Interpreter.new(ast).interpret(input)
19
22
  end
20
23
 
21
24
  # Compile a JsonPath query into an Abstract Syntax Tree.
@@ -44,8 +47,5 @@ def require_libs(dir)
44
47
  end
45
48
  end
46
49
 
47
- # These are dependencies of the other AST source files, and must come first
48
- require_relative 'janeway/ast/expression'
49
-
50
50
  require_libs('janeway/ast')
51
51
  require_libs('janeway')
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: janeway-jsonpath
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fraser Hanson
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-01-09 00:00:00.000000000 Z
10
+ date: 2025-01-13 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: |+
13
13
  JSONPath is a query language for selecting and extracting values from a JSON text.