janeway-jsonpath 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -6
  3. data/bin/janeway +77 -28
  4. data/lib/janeway/ast/child_segment.rb +9 -7
  5. data/lib/janeway/ast/expression.rb +2 -1
  6. data/lib/janeway/ast/filter_selector.rb +0 -2
  7. data/lib/janeway/ast/function.rb +2 -0
  8. data/lib/janeway/ast/selector.rb +17 -18
  9. data/lib/janeway/ast.rb +31 -0
  10. data/lib/janeway/enumerator.rb +161 -5
  11. data/lib/janeway/interpreter.rb +11 -5
  12. data/lib/janeway/interpreters/array_slice_selector_delete_if.rb +57 -0
  13. data/lib/janeway/interpreters/array_slice_selector_deleter.rb +1 -1
  14. data/lib/janeway/interpreters/child_segment_delete_if.rb +20 -0
  15. data/lib/janeway/interpreters/child_segment_deleter.rb +1 -1
  16. data/lib/janeway/interpreters/descendant_segment_interpreter.rb +1 -1
  17. data/lib/janeway/interpreters/filter_selector_delete_if.rb +73 -0
  18. data/lib/janeway/interpreters/index_selector_delete_if.rb +42 -0
  19. data/lib/janeway/interpreters/index_selector_interpreter.rb +3 -1
  20. data/lib/janeway/interpreters/iteration_helper.rb +45 -0
  21. data/lib/janeway/interpreters/name_selector_delete_if.rb +42 -0
  22. data/lib/janeway/interpreters/root_node_delete_if.rb +34 -0
  23. data/lib/janeway/interpreters/tree_constructor.rb +31 -1
  24. data/lib/janeway/interpreters/wildcard_selector_delete_if.rb +61 -0
  25. data/lib/janeway/interpreters/yielder.rb +7 -33
  26. data/lib/janeway/lexer.rb +46 -40
  27. data/lib/janeway/normalized_path.rb +12 -5
  28. data/lib/janeway/parser.rb +2 -0
  29. data/lib/janeway/query.rb +38 -0
  30. data/lib/janeway/version.rb +2 -1
  31. data/lib/janeway.rb +61 -35
  32. metadata +12 -3
data/lib/janeway/lexer.rb CHANGED
@@ -5,50 +5,50 @@ require_relative 'token'
5
5
  require_relative 'error'
6
6
 
7
7
  module Janeway
8
- OPERATORS = {
9
- and: '&&',
10
- array_slice_separator: ':',
11
- child_end: ']',
12
- child_start: '[',
13
- current_node: '@',
14
- descendants: '..',
15
- dot: '.',
16
- equal: '==',
17
- filter: '?',
18
- greater_than: '>',
19
- greater_than_or_equal: '>=',
20
- group_end: ')',
21
- group_start: '(',
22
- less_than: '<',
23
- less_than_or_equal: '<=',
24
- minus: '-',
25
- not: '!',
26
- not_equal: '!=',
27
- or: '||',
28
- root: '$',
29
- union: ',',
30
- wildcard: '*',
31
- }.freeze
32
- ONE_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 1 }.freeze
33
- TWO_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 2 }.freeze
34
- TWO_CHAR_LEX_FIRST = TWO_CHAR_LEX.map { |lexeme| lexeme[0] }.freeze
35
- ONE_OR_TWO_CHAR_LEX = ONE_CHAR_LEX & TWO_CHAR_LEX.map { |str| str[0] }.freeze
36
-
37
- WHITESPACE = " \t\n\r"
38
- KEYWORD = %w[true false null].freeze
39
- FUNCTIONS = %w[length count match search value].freeze
40
-
41
- # faster to check membership in a string than an array of char (benchmarked ruby 3.1.2)
42
- ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
43
- DIGITS = '0123456789'
44
-
45
- # chars that may be used as the first letter of member-name-shorthand
46
- NAME_FIRST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
47
-
48
8
  # Transforms source code into tokens
49
9
  class Lexer
50
10
  class Error < Janeway::Error; end
51
11
 
12
+ OPERATORS = {
13
+ and: '&&',
14
+ array_slice_separator: ':',
15
+ child_end: ']',
16
+ child_start: '[',
17
+ current_node: '@',
18
+ descendants: '..',
19
+ dot: '.',
20
+ equal: '==',
21
+ filter: '?',
22
+ greater_than: '>',
23
+ greater_than_or_equal: '>=',
24
+ group_end: ')',
25
+ group_start: '(',
26
+ less_than: '<',
27
+ less_than_or_equal: '<=',
28
+ minus: '-',
29
+ not: '!',
30
+ not_equal: '!=',
31
+ or: '||',
32
+ root: '$',
33
+ union: ',',
34
+ wildcard: '*',
35
+ }.freeze
36
+ ONE_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 1 }.freeze
37
+ TWO_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 2 }.freeze
38
+ TWO_CHAR_LEX_FIRST = TWO_CHAR_LEX.map { |lexeme| lexeme[0] }.freeze
39
+ ONE_OR_TWO_CHAR_LEX = ONE_CHAR_LEX & TWO_CHAR_LEX.map { |str| str[0] }.freeze
40
+
41
+ WHITESPACE = " \t\n\r"
42
+ KEYWORD = %w[true false null].freeze
43
+ FUNCTIONS = %w[length count match search value].freeze
44
+
45
+ # faster to check membership in a string than an array of char (benchmarked ruby 3.1.2)
46
+ ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
47
+ DIGITS = '0123456789'
48
+
49
+ # chars that may be used as the first letter of member-name-shorthand
50
+ NAME_FIRST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
51
+
52
52
  attr_reader :source, :tokens
53
53
  attr_accessor :next_p, :lexeme_start_p
54
54
 
@@ -72,6 +72,7 @@ module Janeway
72
72
  end
73
73
 
74
74
  def start_tokenization
75
+ raise err('JSONPath query is empty') if @source.empty?
75
76
  if WHITESPACE.include?(@source[0]) || WHITESPACE.include?(@source[-1])
76
77
  raise err('JSONPath query may not start or end with whitespace')
77
78
  end
@@ -470,6 +471,7 @@ module Janeway
470
471
  end
471
472
 
472
473
  # Lex a member name that is found within dot notation.
474
+ # This name is not delimited and allows a subset of the characters that can appear in a delimited string.
473
475
  #
474
476
  # Recognize keywords and given them the correct type.
475
477
  # @see https://www.rfc-editor.org/rfc/rfc9535.html#section-2.5.1.1-3
@@ -477,6 +479,10 @@ module Janeway
477
479
  # @param ignore_keywords [Boolean]
478
480
  # @return [Token]
479
481
  def lex_member_name_shorthand(ignore_keywords: false)
482
+ # Abort if name is preceded by child_start. Catches non-delimited identifiers in brackets,
483
+ # eg. $["key"] is allowed, but $[key] is not
484
+ raise err('Identifier within brackets must be surrounded by quotes') if @tokens.last&.type == :child_start
485
+
480
486
  consume while name_char?(lookahead)
481
487
  identifier = source[lexeme_start_p..(next_p - 1)]
482
488
  type =
@@ -46,11 +46,18 @@ module Janeway
46
46
  def self.escape_char(char)
47
47
  # Character ranges defined by https://www.rfc-editor.org/rfc/rfc9535.html#section-2.7-8
48
48
  case char.ord
49
- when 0x20..0x26, 0x28..0x5B, 0x5D..0xD7FF, 0xE000..0x10FFFF # normal-unescaped range
50
- char # unescaped
51
- when 0x62, 0x66, 0x6E, 0x72, 0x74, 0x27, 0x5C # normal-escapable range
52
- # backspace, form feed, line feed, carriage return, horizontal tab, apostrophe, backslash
53
- "\\#{char}" # escaped
49
+ # normal-unescaped range
50
+ when 0x20..0x26, 0x28..0x5B, 0x5D..0xD7FF, 0xE000..0x10FFFF then char # unescaped
51
+
52
+ # normal-escapable range
53
+ when 0x08 then '\\b' # backspace
54
+ when 0x0C then '\\f' # form feed
55
+ when 0x0A then '\\n' # line feed / newline
56
+ when 0x0D then '\\r' # carriage return
57
+ when 0x09 then '\\t' # horizontal tab
58
+ when 0x27 then '\\\'' # apostrophe
59
+ when 0x5C then '\\\\' # backslash
60
+
54
61
  else # normal-hexchar range
55
62
  hex_encode_char(char)
56
63
  end
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'ast'
3
4
  require_relative 'error'
4
5
  require_relative 'functions'
5
6
  require_relative 'lexer'
7
+ require_relative 'query'
6
8
 
7
9
  module Janeway
8
10
  # Transform a list of tokens into an Abstract Syntax Tree
data/lib/janeway/query.rb CHANGED
@@ -30,10 +30,19 @@ module Janeway
30
30
  Janeway::Enumerator.new(self, input)
31
31
  end
32
32
 
33
+ # @return [String]
33
34
  def to_s
34
35
  @root.to_s
35
36
  end
36
37
 
38
+ # Return true if this query can only possibly match 0 or 1 elements in any input.
39
+ # Such a query must be composed exclusively of the root identifier followed by
40
+ # name selectors and / or index selectors.
41
+ # @return [Boolean]
42
+ def singular_query?
43
+ @root.singular_query?
44
+ end
45
+
37
46
  # Return a list of the nodes in the AST.
38
47
  # The AST of a jsonpath query is a straight line, so this is expressible as an array.
39
48
  # The only part of the AST with branches is inside a filter selector, but that doesn't show up here.
@@ -66,5 +75,34 @@ module Janeway
66
75
 
67
76
  result.flatten.join("\n")
68
77
  end
78
+
79
+ # Deep copy the query
80
+ # @return [Query]
81
+ def dup
82
+ Parser.parse(to_s)
83
+ end
84
+
85
+ # Delete the last element from the chain of selectors.
86
+ # For a singular query, this makes the query point to the match's parent instead of the match itself.
87
+ #
88
+ # Don't do this for a non-singular query, those may contain child segments and
89
+ # descendant segments which would lead to different results.
90
+ #
91
+ # @return [AST::Selector]
92
+ def pop
93
+ raise Janeway::Error.new('not allowed to pop from a non-singular query', to_s) unless singular_query?
94
+
95
+ # Sever the link to the last selector
96
+ nodes = node_list
97
+ if nodes.size == 1
98
+ # Special case: cannot pop from the query "$" even though it is a singular query
99
+ raise Janeway::Error.new('cannot pop from single-element query', to_s)
100
+ end
101
+
102
+ nodes[-2].next = nil
103
+
104
+ # Return the last selector
105
+ nodes.last
106
+ end
69
107
  end
70
108
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Janeway
4
- VERSION = '0.5.0'
4
+ # Version for janeway-jsonpath gem
5
+ VERSION = '1.0.0'
5
6
  end
data/lib/janeway.rb CHANGED
@@ -1,59 +1,85 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'English'
4
+ require_relative 'janeway/enumerator'
5
+ require_relative 'janeway/parser'
4
6
 
5
- # Janeway JSONPath parsing library
7
+ # Janeway JSONPath query library
8
+ #
9
+ # https://github.com/gongfarmer/janeway
6
10
  module Janeway
7
- # Abstract Syntax Tree
8
- module AST
9
- # These are the limits of what javascript's Number type can represent
10
- INTEGER_MIN = -9_007_199_254_740_991
11
- INTEGER_MAX = 9_007_199_254_740_991
12
- end
13
-
14
- # Pair a jsonpath query with data to make an enumerator.
15
- # This can be used to apply the query to the data using Enumerator module
16
- # methods such as #each and #map.
11
+ # Parse a jsonpath string and combine it with data to make an Enumerator.
12
+ #
13
+ # The Enumerator can be used to apply the query to the data using Enumerator
14
+ # module methods such as #each and #map.
15
+ #
16
+ # @example Apply query to data and search to get array of results
17
+ # results = Janeway.parse('$.store.books[? length(@.title) > 20]').search
18
+ #
19
+ # @example Apply query to data and iterate over results
20
+ # enum = Janeway.parse('$.store.books[? length(@.title) > 20]')
21
+ # enum.each do |book|
22
+ # results << book
23
+ # end
24
+ #
25
+ # @see Janeway::Enumerator docs for more ways to use the Enumerator
17
26
  #
18
27
  # @param jsonpath [String] jsonpath query
19
28
  # @param data [Array, Hash] input data
20
29
  # @return [Janeway::Enumerator]
21
30
  def self.enum_for(jsonpath, data)
22
- query = compile(jsonpath)
31
+ query = parse(jsonpath)
23
32
  Janeway::Enumerator.new(query, data)
24
33
  end
25
34
 
26
- # Compile a JSONPath query into an Abstract Syntax Tree.
35
+ # Parse a JSONPath string into a Janeway::Query object.
36
+ #
37
+ # This object can be combined with data to create Enumerators that apply the query to the data.
38
+ #
39
+ # Use this method if you want to parse the query once and re-use it for multiple data sets.
27
40
  #
28
- # This can be combined with inputs (using #enum_for) to create Enumerators.
29
- # @example
30
- # query = Janeway.compile('$.store.books[? length(@.title) > 20]')
31
- # long_title_books = query.enum_for(some_data).search
32
- # query.enum_for(other_data).each do |book|
33
- # long_title_books << book
34
- # end
41
+ # Otherwise, use Janeway.enum_for to parse the query and pair it with data in a single step.
42
+ #
43
+ # @example Use a query to search several JSON files
44
+ # results = []
45
+ # query = Janeway.parse('$.store.books[? length(@.title) > 20]')
46
+ # data_files.each do |path|
47
+ # data = JSON.parse File.read(path)
48
+ # results.concat query.enum_for(data).search
49
+ # end
35
50
  #
36
51
  # @param query [String] jsonpath query
37
52
  # @return [Janeway::AST::Query]
38
- def self.compile(query)
53
+ def self.parse(query)
39
54
  Janeway::Parser.parse(query)
40
55
  end
41
- end
42
56
 
43
- # Require ruby source files in the given dir. Do not recurse to subdirs.
44
- # @param dir [String] dir path relative to __dir__
45
- # @return [void]
46
- def require_libs(dir)
47
- absolute_path = File.join(__dir__, dir)
48
- raise "No such dir: #{dir.inspect}" unless File.directory?(absolute_path)
57
+ # Transform a jsonpath singular query into an array of hash keys and/or array
58
+ # indexes suitable for providing to Hash#dig or Array#dig.
59
+ #
60
+ # Only singular queries are allowed, meaning queries that contain only name
61
+ # selectors (ie. hash keys) and index selectors (array indexes.)
62
+ # The paths that are yielded to Enumerator#each are all suitable for this.
63
+ #
64
+ # @example convert normalized jsonpath to array of hash keys / array indices
65
+ # Janeway.path_to_diggable('$["a"].b.c[0]') => ["a", "b", "c", 0]
66
+ #
67
+ # @param jsonpath [String] jsonpath query
68
+ # @return [Array<String, Integer>]
69
+ def self.path_to_diggable(jsonpath)
70
+ raise Janeway::Error.new('Query has nothing to dig', jsonpath) if jsonpath == '$'
71
+
72
+ # Parse query and determine whether it can be converted
73
+ query = parse(jsonpath)
74
+ unless query.singular_query?
75
+ raise Janeway::Error.new('Only a singular query can be converted to dig parameters', jsonpath)
76
+ end
49
77
 
50
- Dir.children(absolute_path).sort.each do |filename|
51
- next if File.directory?(File.join(absolute_path, filename))
78
+ # Convert query to a list of name and index selectors
79
+ nodes = query.node_list
80
+ nodes.shift # discard the root identifier
52
81
 
53
- rel_path = File.join(dir, filename)
54
- require_relative(rel_path[0..-4]) # omits ".rb" extension
82
+ # Extract values from selectors
83
+ nodes.map(&:value)
55
84
  end
56
85
  end
57
-
58
- require_libs('janeway/ast')
59
- require_libs('janeway')
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: janeway-jsonpath
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fraser Hanson
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-01-30 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: |+
13
13
  JSONPath is a query language for selecting and extracting values from a JSON text.
@@ -31,6 +31,7 @@ files:
31
31
  - README.md
32
32
  - bin/janeway
33
33
  - lib/janeway.rb
34
+ - lib/janeway/ast.rb
34
35
  - lib/janeway/ast/array_slice_selector.rb
35
36
  - lib/janeway/ast/binary_operator.rb
36
37
  - lib/janeway/ast/boolean.rb
@@ -60,25 +61,33 @@ files:
60
61
  - lib/janeway/functions/search.rb
61
62
  - lib/janeway/functions/value.rb
62
63
  - lib/janeway/interpreter.rb
64
+ - lib/janeway/interpreters/array_slice_selector_delete_if.rb
63
65
  - lib/janeway/interpreters/array_slice_selector_deleter.rb
64
66
  - lib/janeway/interpreters/array_slice_selector_interpreter.rb
65
67
  - lib/janeway/interpreters/base.rb
66
68
  - lib/janeway/interpreters/binary_operator_interpreter.rb
69
+ - lib/janeway/interpreters/child_segment_delete_if.rb
67
70
  - lib/janeway/interpreters/child_segment_deleter.rb
68
71
  - lib/janeway/interpreters/child_segment_interpreter.rb
69
72
  - lib/janeway/interpreters/current_node_interpreter.rb
70
73
  - lib/janeway/interpreters/descendant_segment_interpreter.rb
74
+ - lib/janeway/interpreters/filter_selector_delete_if.rb
71
75
  - lib/janeway/interpreters/filter_selector_deleter.rb
72
76
  - lib/janeway/interpreters/filter_selector_interpreter.rb
73
77
  - lib/janeway/interpreters/function_interpreter.rb
78
+ - lib/janeway/interpreters/index_selector_delete_if.rb
74
79
  - lib/janeway/interpreters/index_selector_deleter.rb
75
80
  - lib/janeway/interpreters/index_selector_interpreter.rb
81
+ - lib/janeway/interpreters/iteration_helper.rb
82
+ - lib/janeway/interpreters/name_selector_delete_if.rb
76
83
  - lib/janeway/interpreters/name_selector_deleter.rb
77
84
  - lib/janeway/interpreters/name_selector_interpreter.rb
85
+ - lib/janeway/interpreters/root_node_delete_if.rb
78
86
  - lib/janeway/interpreters/root_node_deleter.rb
79
87
  - lib/janeway/interpreters/root_node_interpreter.rb
80
88
  - lib/janeway/interpreters/tree_constructor.rb
81
89
  - lib/janeway/interpreters/unary_operator_interpreter.rb
90
+ - lib/janeway/interpreters/wildcard_selector_delete_if.rb
82
91
  - lib/janeway/interpreters/wildcard_selector_deleter.rb
83
92
  - lib/janeway/interpreters/wildcard_selector_interpreter.rb
84
93
  - lib/janeway/interpreters/yielder.rb
@@ -107,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
116
  - !ruby/object:Gem::Version
108
117
  version: '0'
109
118
  requirements: []
110
- rubygems_version: 3.6.2
119
+ rubygems_version: 3.6.9
111
120
  specification_version: 4
112
121
  summary: jsonpath parser which implements the finalized IETF standard of Goessner
113
122
  JSONPath