janeway-jsonpath 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +84 -6
- data/bin/janeway +77 -28
- data/lib/janeway/ast/child_segment.rb +9 -7
- data/lib/janeway/ast/expression.rb +2 -1
- data/lib/janeway/ast/filter_selector.rb +0 -2
- data/lib/janeway/ast/function.rb +2 -0
- data/lib/janeway/ast/selector.rb +17 -18
- data/lib/janeway/ast.rb +31 -0
- data/lib/janeway/enumerator.rb +161 -5
- data/lib/janeway/interpreter.rb +11 -5
- data/lib/janeway/interpreters/array_slice_selector_delete_if.rb +57 -0
- data/lib/janeway/interpreters/array_slice_selector_deleter.rb +1 -1
- data/lib/janeway/interpreters/child_segment_delete_if.rb +20 -0
- data/lib/janeway/interpreters/child_segment_deleter.rb +1 -1
- data/lib/janeway/interpreters/descendant_segment_interpreter.rb +1 -1
- data/lib/janeway/interpreters/filter_selector_delete_if.rb +73 -0
- data/lib/janeway/interpreters/index_selector_delete_if.rb +42 -0
- data/lib/janeway/interpreters/index_selector_interpreter.rb +3 -1
- data/lib/janeway/interpreters/iteration_helper.rb +45 -0
- data/lib/janeway/interpreters/name_selector_delete_if.rb +42 -0
- data/lib/janeway/interpreters/root_node_delete_if.rb +34 -0
- data/lib/janeway/interpreters/tree_constructor.rb +31 -1
- data/lib/janeway/interpreters/wildcard_selector_delete_if.rb +61 -0
- data/lib/janeway/interpreters/yielder.rb +7 -33
- data/lib/janeway/lexer.rb +46 -40
- data/lib/janeway/normalized_path.rb +12 -5
- data/lib/janeway/parser.rb +2 -0
- data/lib/janeway/query.rb +38 -0
- data/lib/janeway/version.rb +2 -1
- data/lib/janeway.rb +61 -35
- metadata +12 -3
data/lib/janeway/lexer.rb
CHANGED
|
@@ -5,50 +5,50 @@ require_relative 'token'
|
|
|
5
5
|
require_relative 'error'
|
|
6
6
|
|
|
7
7
|
module Janeway
|
|
8
|
-
OPERATORS = {
|
|
9
|
-
and: '&&',
|
|
10
|
-
array_slice_separator: ':',
|
|
11
|
-
child_end: ']',
|
|
12
|
-
child_start: '[',
|
|
13
|
-
current_node: '@',
|
|
14
|
-
descendants: '..',
|
|
15
|
-
dot: '.',
|
|
16
|
-
equal: '==',
|
|
17
|
-
filter: '?',
|
|
18
|
-
greater_than: '>',
|
|
19
|
-
greater_than_or_equal: '>=',
|
|
20
|
-
group_end: ')',
|
|
21
|
-
group_start: '(',
|
|
22
|
-
less_than: '<',
|
|
23
|
-
less_than_or_equal: '<=',
|
|
24
|
-
minus: '-',
|
|
25
|
-
not: '!',
|
|
26
|
-
not_equal: '!=',
|
|
27
|
-
or: '||',
|
|
28
|
-
root: '$',
|
|
29
|
-
union: ',',
|
|
30
|
-
wildcard: '*',
|
|
31
|
-
}.freeze
|
|
32
|
-
ONE_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 1 }.freeze
|
|
33
|
-
TWO_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 2 }.freeze
|
|
34
|
-
TWO_CHAR_LEX_FIRST = TWO_CHAR_LEX.map { |lexeme| lexeme[0] }.freeze
|
|
35
|
-
ONE_OR_TWO_CHAR_LEX = ONE_CHAR_LEX & TWO_CHAR_LEX.map { |str| str[0] }.freeze
|
|
36
|
-
|
|
37
|
-
WHITESPACE = " \t\n\r"
|
|
38
|
-
KEYWORD = %w[true false null].freeze
|
|
39
|
-
FUNCTIONS = %w[length count match search value].freeze
|
|
40
|
-
|
|
41
|
-
# faster to check membership in a string than an array of char (benchmarked ruby 3.1.2)
|
|
42
|
-
ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
43
|
-
DIGITS = '0123456789'
|
|
44
|
-
|
|
45
|
-
# chars that may be used as the first letter of member-name-shorthand
|
|
46
|
-
NAME_FIRST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
|
47
|
-
|
|
48
8
|
# Transforms source code into tokens
|
|
49
9
|
class Lexer
|
|
50
10
|
class Error < Janeway::Error; end
|
|
51
11
|
|
|
12
|
+
OPERATORS = {
|
|
13
|
+
and: '&&',
|
|
14
|
+
array_slice_separator: ':',
|
|
15
|
+
child_end: ']',
|
|
16
|
+
child_start: '[',
|
|
17
|
+
current_node: '@',
|
|
18
|
+
descendants: '..',
|
|
19
|
+
dot: '.',
|
|
20
|
+
equal: '==',
|
|
21
|
+
filter: '?',
|
|
22
|
+
greater_than: '>',
|
|
23
|
+
greater_than_or_equal: '>=',
|
|
24
|
+
group_end: ')',
|
|
25
|
+
group_start: '(',
|
|
26
|
+
less_than: '<',
|
|
27
|
+
less_than_or_equal: '<=',
|
|
28
|
+
minus: '-',
|
|
29
|
+
not: '!',
|
|
30
|
+
not_equal: '!=',
|
|
31
|
+
or: '||',
|
|
32
|
+
root: '$',
|
|
33
|
+
union: ',',
|
|
34
|
+
wildcard: '*',
|
|
35
|
+
}.freeze
|
|
36
|
+
ONE_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 1 }.freeze
|
|
37
|
+
TWO_CHAR_LEX = OPERATORS.values.select { |lexeme| lexeme.size == 2 }.freeze
|
|
38
|
+
TWO_CHAR_LEX_FIRST = TWO_CHAR_LEX.map { |lexeme| lexeme[0] }.freeze
|
|
39
|
+
ONE_OR_TWO_CHAR_LEX = ONE_CHAR_LEX & TWO_CHAR_LEX.map { |str| str[0] }.freeze
|
|
40
|
+
|
|
41
|
+
WHITESPACE = " \t\n\r"
|
|
42
|
+
KEYWORD = %w[true false null].freeze
|
|
43
|
+
FUNCTIONS = %w[length count match search value].freeze
|
|
44
|
+
|
|
45
|
+
# faster to check membership in a string than an array of char (benchmarked ruby 3.1.2)
|
|
46
|
+
ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
47
|
+
DIGITS = '0123456789'
|
|
48
|
+
|
|
49
|
+
# chars that may be used as the first letter of member-name-shorthand
|
|
50
|
+
NAME_FIRST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
|
51
|
+
|
|
52
52
|
attr_reader :source, :tokens
|
|
53
53
|
attr_accessor :next_p, :lexeme_start_p
|
|
54
54
|
|
|
@@ -72,6 +72,7 @@ module Janeway
|
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
def start_tokenization
|
|
75
|
+
raise err('JSONPath query is empty') if @source.empty?
|
|
75
76
|
if WHITESPACE.include?(@source[0]) || WHITESPACE.include?(@source[-1])
|
|
76
77
|
raise err('JSONPath query may not start or end with whitespace')
|
|
77
78
|
end
|
|
@@ -470,6 +471,7 @@ module Janeway
|
|
|
470
471
|
end
|
|
471
472
|
|
|
472
473
|
# Lex a member name that is found within dot notation.
|
|
474
|
+
# This name is not delimited and allows a subset of the characters that can appear in a delimited string.
|
|
473
475
|
#
|
|
474
476
|
# Recognize keywords and given them the correct type.
|
|
475
477
|
# @see https://www.rfc-editor.org/rfc/rfc9535.html#section-2.5.1.1-3
|
|
@@ -477,6 +479,10 @@ module Janeway
|
|
|
477
479
|
# @param ignore_keywords [Boolean]
|
|
478
480
|
# @return [Token]
|
|
479
481
|
def lex_member_name_shorthand(ignore_keywords: false)
|
|
482
|
+
# Abort if name is preceded by child_start. Catches non-delimited identifiers in brackets,
|
|
483
|
+
# eg. $["key"] is allowed, but $[key] is not
|
|
484
|
+
raise err('Identifier within brackets must be surrounded by quotes') if @tokens.last&.type == :child_start
|
|
485
|
+
|
|
480
486
|
consume while name_char?(lookahead)
|
|
481
487
|
identifier = source[lexeme_start_p..(next_p - 1)]
|
|
482
488
|
type =
|
|
@@ -46,11 +46,18 @@ module Janeway
|
|
|
46
46
|
def self.escape_char(char)
|
|
47
47
|
# Character ranges defined by https://www.rfc-editor.org/rfc/rfc9535.html#section-2.7-8
|
|
48
48
|
case char.ord
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
# normal-unescaped range
|
|
50
|
+
when 0x20..0x26, 0x28..0x5B, 0x5D..0xD7FF, 0xE000..0x10FFFF then char # unescaped
|
|
51
|
+
|
|
52
|
+
# normal-escapable range
|
|
53
|
+
when 0x08 then '\\b' # backspace
|
|
54
|
+
when 0x0C then '\\f' # form feed
|
|
55
|
+
when 0x0A then '\\n' # line feed / newline
|
|
56
|
+
when 0x0D then '\\r' # carriage return
|
|
57
|
+
when 0x09 then '\\t' # horizontal tab
|
|
58
|
+
when 0x27 then '\\\'' # apostrophe
|
|
59
|
+
when 0x5C then '\\\\' # backslash
|
|
60
|
+
|
|
54
61
|
else # normal-hexchar range
|
|
55
62
|
hex_encode_char(char)
|
|
56
63
|
end
|
data/lib/janeway/parser.rb
CHANGED
data/lib/janeway/query.rb
CHANGED
|
@@ -30,10 +30,19 @@ module Janeway
|
|
|
30
30
|
Janeway::Enumerator.new(self, input)
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
# @return [String]
|
|
33
34
|
def to_s
|
|
34
35
|
@root.to_s
|
|
35
36
|
end
|
|
36
37
|
|
|
38
|
+
# Return true if this query can only possibly match 0 or 1 elements in any input.
|
|
39
|
+
# Such a query must be composed exclusively of the root identifier followed by
|
|
40
|
+
# name selectors and / or index selectors.
|
|
41
|
+
# @return [Boolean]
|
|
42
|
+
def singular_query?
|
|
43
|
+
@root.singular_query?
|
|
44
|
+
end
|
|
45
|
+
|
|
37
46
|
# Return a list of the nodes in the AST.
|
|
38
47
|
# The AST of a jsonpath query is a straight line, so this is expressible as an array.
|
|
39
48
|
# The only part of the AST with branches is inside a filter selector, but that doesn't show up here.
|
|
@@ -66,5 +75,34 @@ module Janeway
|
|
|
66
75
|
|
|
67
76
|
result.flatten.join("\n")
|
|
68
77
|
end
|
|
78
|
+
|
|
79
|
+
# Deep copy the query
|
|
80
|
+
# @return [Query]
|
|
81
|
+
def dup
|
|
82
|
+
Parser.parse(to_s)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Delete the last element from the chain of selectors.
|
|
86
|
+
# For a singular query, this makes the query point to the match's parent instead of the match itself.
|
|
87
|
+
#
|
|
88
|
+
# Don't do this for a non-singular query, those may contain child segments and
|
|
89
|
+
# descendant segments which would lead to different results.
|
|
90
|
+
#
|
|
91
|
+
# @return [AST::Selector]
|
|
92
|
+
def pop
|
|
93
|
+
raise Janeway::Error.new('not allowed to pop from a non-singular query', to_s) unless singular_query?
|
|
94
|
+
|
|
95
|
+
# Sever the link to the last selector
|
|
96
|
+
nodes = node_list
|
|
97
|
+
if nodes.size == 1
|
|
98
|
+
# Special case: cannot pop from the query "$" even though it is a singular query
|
|
99
|
+
raise Janeway::Error.new('cannot pop from single-element query', to_s)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
nodes[-2].next = nil
|
|
103
|
+
|
|
104
|
+
# Return the last selector
|
|
105
|
+
nodes.last
|
|
106
|
+
end
|
|
69
107
|
end
|
|
70
108
|
end
|
data/lib/janeway/version.rb
CHANGED
data/lib/janeway.rb
CHANGED
|
@@ -1,59 +1,85 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'English'
|
|
4
|
+
require_relative 'janeway/enumerator'
|
|
5
|
+
require_relative 'janeway/parser'
|
|
4
6
|
|
|
5
|
-
# Janeway JSONPath
|
|
7
|
+
# Janeway JSONPath query library
|
|
8
|
+
#
|
|
9
|
+
# https://github.com/gongfarmer/janeway
|
|
6
10
|
module Janeway
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
11
|
+
# Parse a jsonpath string and combine it with data to make an Enumerator.
|
|
12
|
+
#
|
|
13
|
+
# The Enumerator can be used to apply the query to the data using Enumerator
|
|
14
|
+
# module methods such as #each and #map.
|
|
15
|
+
#
|
|
16
|
+
# @example Apply query to data and search to get array of results
|
|
17
|
+
# results = Janeway.parse('$.store.books[? length(@.title) > 20]').search
|
|
18
|
+
#
|
|
19
|
+
# @example Apply query to data and iterate over results
|
|
20
|
+
# enum = Janeway.parse('$.store.books[? length(@.title) > 20]')
|
|
21
|
+
# enum.each do |book|
|
|
22
|
+
# results << book
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# @see Janeway::Enumerator docs for more ways to use the Enumerator
|
|
17
26
|
#
|
|
18
27
|
# @param jsonpath [String] jsonpath query
|
|
19
28
|
# @param data [Array, Hash] input data
|
|
20
29
|
# @return [Janeway::Enumerator]
|
|
21
30
|
def self.enum_for(jsonpath, data)
|
|
22
|
-
query =
|
|
31
|
+
query = parse(jsonpath)
|
|
23
32
|
Janeway::Enumerator.new(query, data)
|
|
24
33
|
end
|
|
25
34
|
|
|
26
|
-
#
|
|
35
|
+
# Parse a JSONPath string into a Janeway::Query object.
|
|
36
|
+
#
|
|
37
|
+
# This object can be combined with data to create Enumerators that apply the query to the data.
|
|
38
|
+
#
|
|
39
|
+
# Use this method if you want to parse the query once and re-use it for multiple data sets.
|
|
27
40
|
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
#
|
|
34
|
-
#
|
|
41
|
+
# Otherwise, use Janeway.enum_for to parse the query and pair it with data in a single step.
|
|
42
|
+
#
|
|
43
|
+
# @example Use a query to search several JSON files
|
|
44
|
+
# results = []
|
|
45
|
+
# query = Janeway.parse('$.store.books[? length(@.title) > 20]')
|
|
46
|
+
# data_files.each do |path|
|
|
47
|
+
# data = JSON.parse File.read(path)
|
|
48
|
+
# results.concat query.enum_for(data).search
|
|
49
|
+
# end
|
|
35
50
|
#
|
|
36
51
|
# @param query [String] jsonpath query
|
|
37
52
|
# @return [Janeway::AST::Query]
|
|
38
|
-
def self.
|
|
53
|
+
def self.parse(query)
|
|
39
54
|
Janeway::Parser.parse(query)
|
|
40
55
|
end
|
|
41
|
-
end
|
|
42
56
|
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
57
|
+
# Transform a jsonpath singular query into an array of hash keys and/or array
|
|
58
|
+
# indexes suitable for providing to Hash#dig or Array#dig.
|
|
59
|
+
#
|
|
60
|
+
# Only singular queries are allowed, meaning queries that contain only name
|
|
61
|
+
# selectors (ie. hash keys) and index selectors (array indexes.)
|
|
62
|
+
# The paths that are yielded to Enumerator#each are all suitable for this.
|
|
63
|
+
#
|
|
64
|
+
# @example convert normalized jsonpath to array of hash keys / array indices
|
|
65
|
+
# Janeway.path_to_diggable('$["a"].b.c[0]') => ["a", "b", "c", 0]
|
|
66
|
+
#
|
|
67
|
+
# @param jsonpath [String] jsonpath query
|
|
68
|
+
# @return [Array<String, Integer>]
|
|
69
|
+
def self.path_to_diggable(jsonpath)
|
|
70
|
+
raise Janeway::Error.new('Query has nothing to dig', jsonpath) if jsonpath == '$'
|
|
71
|
+
|
|
72
|
+
# Parse query and determine whether it can be converted
|
|
73
|
+
query = parse(jsonpath)
|
|
74
|
+
unless query.singular_query?
|
|
75
|
+
raise Janeway::Error.new('Only a singular query can be converted to dig parameters', jsonpath)
|
|
76
|
+
end
|
|
49
77
|
|
|
50
|
-
|
|
51
|
-
|
|
78
|
+
# Convert query to a list of name and index selectors
|
|
79
|
+
nodes = query.node_list
|
|
80
|
+
nodes.shift # discard the root identifier
|
|
52
81
|
|
|
53
|
-
|
|
54
|
-
|
|
82
|
+
# Extract values from selectors
|
|
83
|
+
nodes.map(&:value)
|
|
55
84
|
end
|
|
56
85
|
end
|
|
57
|
-
|
|
58
|
-
require_libs('janeway/ast')
|
|
59
|
-
require_libs('janeway')
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: janeway-jsonpath
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Fraser Hanson
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
description: |+
|
|
13
13
|
JSONPath is a query language for selecting and extracting values from a JSON text.
|
|
@@ -31,6 +31,7 @@ files:
|
|
|
31
31
|
- README.md
|
|
32
32
|
- bin/janeway
|
|
33
33
|
- lib/janeway.rb
|
|
34
|
+
- lib/janeway/ast.rb
|
|
34
35
|
- lib/janeway/ast/array_slice_selector.rb
|
|
35
36
|
- lib/janeway/ast/binary_operator.rb
|
|
36
37
|
- lib/janeway/ast/boolean.rb
|
|
@@ -60,25 +61,33 @@ files:
|
|
|
60
61
|
- lib/janeway/functions/search.rb
|
|
61
62
|
- lib/janeway/functions/value.rb
|
|
62
63
|
- lib/janeway/interpreter.rb
|
|
64
|
+
- lib/janeway/interpreters/array_slice_selector_delete_if.rb
|
|
63
65
|
- lib/janeway/interpreters/array_slice_selector_deleter.rb
|
|
64
66
|
- lib/janeway/interpreters/array_slice_selector_interpreter.rb
|
|
65
67
|
- lib/janeway/interpreters/base.rb
|
|
66
68
|
- lib/janeway/interpreters/binary_operator_interpreter.rb
|
|
69
|
+
- lib/janeway/interpreters/child_segment_delete_if.rb
|
|
67
70
|
- lib/janeway/interpreters/child_segment_deleter.rb
|
|
68
71
|
- lib/janeway/interpreters/child_segment_interpreter.rb
|
|
69
72
|
- lib/janeway/interpreters/current_node_interpreter.rb
|
|
70
73
|
- lib/janeway/interpreters/descendant_segment_interpreter.rb
|
|
74
|
+
- lib/janeway/interpreters/filter_selector_delete_if.rb
|
|
71
75
|
- lib/janeway/interpreters/filter_selector_deleter.rb
|
|
72
76
|
- lib/janeway/interpreters/filter_selector_interpreter.rb
|
|
73
77
|
- lib/janeway/interpreters/function_interpreter.rb
|
|
78
|
+
- lib/janeway/interpreters/index_selector_delete_if.rb
|
|
74
79
|
- lib/janeway/interpreters/index_selector_deleter.rb
|
|
75
80
|
- lib/janeway/interpreters/index_selector_interpreter.rb
|
|
81
|
+
- lib/janeway/interpreters/iteration_helper.rb
|
|
82
|
+
- lib/janeway/interpreters/name_selector_delete_if.rb
|
|
76
83
|
- lib/janeway/interpreters/name_selector_deleter.rb
|
|
77
84
|
- lib/janeway/interpreters/name_selector_interpreter.rb
|
|
85
|
+
- lib/janeway/interpreters/root_node_delete_if.rb
|
|
78
86
|
- lib/janeway/interpreters/root_node_deleter.rb
|
|
79
87
|
- lib/janeway/interpreters/root_node_interpreter.rb
|
|
80
88
|
- lib/janeway/interpreters/tree_constructor.rb
|
|
81
89
|
- lib/janeway/interpreters/unary_operator_interpreter.rb
|
|
90
|
+
- lib/janeway/interpreters/wildcard_selector_delete_if.rb
|
|
82
91
|
- lib/janeway/interpreters/wildcard_selector_deleter.rb
|
|
83
92
|
- lib/janeway/interpreters/wildcard_selector_interpreter.rb
|
|
84
93
|
- lib/janeway/interpreters/yielder.rb
|
|
@@ -107,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
107
116
|
- !ruby/object:Gem::Version
|
|
108
117
|
version: '0'
|
|
109
118
|
requirements: []
|
|
110
|
-
rubygems_version: 3.6.
|
|
119
|
+
rubygems_version: 3.6.9
|
|
111
120
|
specification_version: 4
|
|
112
121
|
summary: jsonpath parser which implements the finalized IETF standard of Goessner
|
|
113
122
|
JSONPath
|