json_p3 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ # Base class for all JSONPath selectors
5
+ class Selector
6
+ # @dynamic token
7
+ attr_reader :token
8
+
9
+ def initialize(env, token)
10
+ @env = env
11
+ @token = token
12
+ end
13
+
14
+ # Apply this selector to _node_.
15
+ # @return [Array<JSONPathNode>]
16
+ def resolve(_node)
17
+ raise "selectors must implement resolve(node)"
18
+ end
19
+
20
+ # Return true if this selector is a singular selector.
21
+ def singular?
22
+ false
23
+ end
24
+ end
25
+
26
+ # The name selector select values from hashes given a key.
27
+ class NameSelector < Selector
28
+ # @dynamic name
29
+ attr_reader :name
30
+
31
+ def initialize(env, token, name)
32
+ super(env, token)
33
+ @name = name
34
+ end
35
+
36
+ def resolve(node)
37
+ if node.value.is_a?(Hash) && node.value.key?(@name)
38
+ [node.new_child(node.value[@name], @name)]
39
+ else
40
+ []
41
+ end
42
+ end
43
+
44
+ def singular?
45
+ true
46
+ end
47
+
48
+ def to_s
49
+ @name.inspect
50
+ end
51
+
52
+ def ==(other)
53
+ self.class == other.class &&
54
+ @name == other.name &&
55
+ @token == other.token
56
+ end
57
+
58
+ alias eql? ==
59
+
60
+ def hash
61
+ [@name, @token].hash
62
+ end
63
+ end
64
+
65
+ # This non-standard name selector selects values from hashes given a string or
66
+ # symbol key.
67
+ class SymbolNameSelector < NameSelector
68
+ def initialize(env, token, name)
69
+ super
70
+ @sym = @name.to_sym
71
+ end
72
+
73
+ def resolve(node) # rubocop:disable Metrics/MethodLength
74
+ if node.value.is_a?(Hash)
75
+ if node.value.key?(@name)
76
+ [node.new_child(node.value[@name], @name)]
77
+ elsif node.value.key?(@sym)
78
+ [node.new_child(node.value[@sym], @name)]
79
+ else
80
+ []
81
+ end
82
+ else
83
+ []
84
+ end
85
+ end
86
+ end
87
+
88
+ # The index selector selects values from arrays given an index.
89
+ class IndexSelector < Selector
90
+ # @dynamic index
91
+ attr_reader :index
92
+
93
+ def initialize(env, token, index)
94
+ super(env, token)
95
+ @index = index
96
+ end
97
+
98
+ def resolve(node)
99
+ if node.value.is_a?(Array)
100
+ norm_index = normalize(@index, node.value.length)
101
+ return [] if norm_index.negative? || norm_index >= node.value.length
102
+
103
+ [node.new_child(node.value[@index], norm_index)]
104
+ else
105
+ []
106
+ end
107
+ end
108
+
109
+ def singular?
110
+ true
111
+ end
112
+
113
+ def to_s
114
+ @index.to_s
115
+ end
116
+
117
+ def ==(other)
118
+ self.class == other.class &&
119
+ @index == other.index &&
120
+ @token == other.token
121
+ end
122
+
123
+ alias eql? ==
124
+
125
+ def hash
126
+ [@index, @token].hash
127
+ end
128
+
129
+ private
130
+
131
+ def normalize(index, length)
132
+ index.negative? && length >= index.abs ? length + index : index
133
+ end
134
+ end
135
+
136
+ # The wildcard selector selects all elements from an array or values from a hash.
137
+ class WildcardSelector < Selector
138
+ def resolve(node)
139
+ if node.value.is_a? Hash
140
+ node.value.map { |k, v| node.new_child(v, k) }
141
+ elsif node.value.is_a? Array
142
+ node.value.map.with_index { |e, i| node.new_child(e, i) }
143
+ else
144
+ []
145
+ end
146
+ end
147
+
148
+ def to_s
149
+ "*"
150
+ end
151
+
152
+ def ==(other)
153
+ self.class == other.class && @token == other.token
154
+ end
155
+
156
+ alias eql? ==
157
+
158
+ def hash
159
+ @token.hash
160
+ end
161
+ end
162
+
163
+ # The slice selector selects a range of elements from an array.
164
+ class SliceSelector < Selector
165
+ # @dynamic start, stop, step
166
+ attr_reader :start, :stop, :step
167
+
168
+ def initialize(env, token, start, stop, step)
169
+ super(env, token)
170
+ @start = start
171
+ @stop = stop
172
+ @step = step || 1
173
+ end
174
+
175
+ def resolve(node) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
176
+ return [] unless node.value.is_a?(Array)
177
+
178
+ length = node.value.length
179
+ return [] if length.zero? || @step.zero?
180
+
181
+ norm_start = normalized_start(length)
182
+ norm_stop = normalized_stop(length)
183
+
184
+ nodes = []
185
+
186
+ if @step.positive?
187
+
188
+ for i in (norm_start...norm_stop).step(@step) # rubocop:disable Style/For
189
+ nodes << node.new_child(node.value[i], i)
190
+ end
191
+ else
192
+ i = norm_start
193
+ while i > norm_stop
194
+ nodes << node.new_child(node.value[i], i)
195
+ i += @step
196
+ end
197
+ end
198
+ nodes
199
+ end
200
+
201
+ def to_s
202
+ start = @start || ""
203
+ stop = @stop || ""
204
+ step = @step || 1
205
+ "#{start}:#{stop}:#{step}"
206
+ end
207
+
208
+ def ==(other)
209
+ self.class == other.class &&
210
+ @start == other.start &&
211
+ @stop == other.stop &&
212
+ @step == other.step &&
213
+ @token == other.token
214
+ end
215
+
216
+ alias eql? ==
217
+
218
+ def hash
219
+ [@start, @stop, @step, @token].hash
220
+ end
221
+
222
+ private
223
+
224
+ def normalized_start(length)
225
+ # NOTE: trying to please the type checker :(
226
+ return @step.negative? ? length - 1 : 0 if @start.nil?
227
+ return [length + (@start || raise), 0].max if @start&.negative?
228
+
229
+ [@start || raise, length - 1].min
230
+ end
231
+
232
+ def normalized_stop(length)
233
+ # NOTE: trying to please the type checker :(
234
+ return @step.negative? ? -1 : length if @stop.nil?
235
+ return [length + (@stop || raise), -1].max if @stop&.negative?
236
+
237
+ [@stop || raise, length].min
238
+ end
239
+ end
240
+
241
+ # Select array elements or hash values according to a filter expression.
242
+ class FilterSelector < Selector
243
+ # @dynamic expression
244
+ attr_reader :expression
245
+
246
+ def initialize(env, token, expression)
247
+ super(env, token)
248
+ @expression = expression
249
+ end
250
+
251
+ def resolve(node) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
252
+ nodes = []
253
+
254
+ if node.value.is_a?(Array)
255
+ node.value.each_with_index do |e, i|
256
+ context = FilterContext.new(@env, e, node.root)
257
+ nodes << node.new_child(e, i) if @expression.evaluate(context)
258
+ end
259
+ elsif node.value.is_a?(Hash)
260
+ node.value.each_pair do |k, v|
261
+ context = FilterContext.new(@env, v, node.root)
262
+ nodes << node.new_child(v, k) if @expression.evaluate(context)
263
+ end
264
+ end
265
+
266
+ nodes
267
+ end
268
+
269
+ def to_s
270
+ "?#{@expression}"
271
+ end
272
+
273
+ def ==(other)
274
+ self.class == other.class &&
275
+ @expression == other.start &&
276
+ @token == other.token
277
+ end
278
+
279
+ alias eql? ==
280
+
281
+ def hash
282
+ [@expression, @token].hash
283
+ end
284
+ end
285
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "errors"
4
+
5
+ module JSONP3
6
+ # Tokens are produced by the lexer and consumed by the parser. Each token contains sub
7
+ # string from a JSONPath expression, its location within the JSONPath expression and a
8
+ # symbol indicating what type of token it is.
9
+ class Token
10
+ EOI = :token_eoi
11
+ ERROR = :token_error
12
+
13
+ SHORTHAND_NAME = :token_shorthand_name
14
+ COLON = :token_colon
15
+ COMMA = :token_comma
16
+ DOT = :token_dot
17
+ DOUBLE_DOT = :token_double_dot
18
+ FILTER = :token_filter
19
+ INDEX = :token_index
20
+ LBRACKET = :token_lbracket
21
+ NAME = :token_name
22
+ RBRACKET = :token_rbracket
23
+ ROOT = :token_root
24
+ WILD = :token_wild
25
+
26
+ AND = :token_and
27
+ CURRENT = :token_current
28
+ DOUBLE_QUOTE_STRING = :token_double_quote_string
29
+ EQ = :token_eq
30
+ FALSE = :token_false
31
+ FLOAT = :token_float
32
+ FUNCTION = :token_function
33
+ GE = :token_ge
34
+ GT = :token_gt
35
+ INT = :token_int
36
+ LE = :token_le
37
+ LPAREN = :token_lparen
38
+ LT = :token_lt
39
+ NE = :token_ne
40
+ NOT = :token_not
41
+ NULL = :token_null
42
+ OP = :token_op
43
+ OR = :token_or
44
+ RPAREN = :token_rparen
45
+ SINGLE_QUOTE_STRING = :token_single_quote_string
46
+ TRUE = :token_true
47
+
48
+ # @dynamic type, value, start, query, message
49
+ attr_reader :type, :value, :start, :query, :message
50
+
51
+ def initialize(type, value, start, query, message: nil)
52
+ @type = type
53
+ @value = value
54
+ @start = start
55
+ @query = query
56
+ @message = message
57
+ end
58
+
59
+ def ==(other)
60
+ self.class == other.class &&
61
+ @type == other.type &&
62
+ @value == other.value &&
63
+ @start == other.start &&
64
+ @query == other.query &&
65
+ @message == other.message
66
+ end
67
+
68
+ alias eql? ==
69
+
70
+ def hash
71
+ [@type, @value].hash
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3 # rubocop:disable Style/Documentation
4
+ # Replace escape sequences with their equivalent Unicode code point.
5
+ # @param value [String]
6
+ # @param quote [String] one of '"' or "'".
7
+ # @param token [Token]
8
+ # @return [String] A new string without escape sequences.
9
+ def self.unescape_string(value, quote, token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
10
+ unescaped = String.new(encoding: "UTF-8")
11
+ index = 0
12
+ length = value.length
13
+
14
+ while index < length
15
+ ch = value[index] || raise
16
+ if ch == "\\"
17
+ index += 1
18
+ case value[index]
19
+ when quote
20
+ unescaped << quote
21
+ when "\\"
22
+ unescaped << "\\"
23
+ when "/"
24
+ unescaped << "/"
25
+ when "b"
26
+ unescaped << "\x08"
27
+ when "f"
28
+ unescaped << "\x0C"
29
+ when "n"
30
+ unescaped << "\n"
31
+ when "r"
32
+ unescaped << "\r"
33
+ when "t"
34
+ unescaped << "\t"
35
+ when "u"
36
+ code_point, index = JSONP3.decode_hex_char(value, index, token)
37
+ unescaped << JSONP3.code_point_to_string(code_point, token)
38
+ else
39
+ raise JSONPathSyntaxError.new("unknown escape sequence", token)
40
+ end
41
+ else
42
+ raise JSONPathSyntaxError.new("invalid character", token) if ch.ord <= 0x1F
43
+
44
+ unescaped << ch
45
+ end
46
+
47
+ index += 1
48
+
49
+ end
50
+
51
+ unescaped
52
+ end
53
+
54
+ def self.decode_hex_char(value, index, token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
55
+ length = value.length
56
+
57
+ raise JSONPathSyntaxError.new("incomplete escape sequence", token) if index + 4 >= length
58
+
59
+ index += 1 # move past 'u'
60
+ code_point = parse_hex_digits(value[index...index + 4], token)
61
+
62
+ raise JSONPathSyntaxError.new("unexpected low surrogate", token) if low_surrogate?(code_point)
63
+
64
+ return [code_point, index + 3] unless high_surrogate?(code_point)
65
+
66
+ unless index + 9 < length && value[index + 4] == "\\" && value[index + 5] == "u"
67
+ raise JSONPathSyntaxError.new("incomplete escape sequence", token)
68
+ end
69
+
70
+ low_surrogate = parse_hex_digits(value[index + 6...index + 10], token)
71
+
72
+ raise JSONPathSyntaxError.new("unexpected low surrogate", token) unless low_surrogate?(low_surrogate)
73
+
74
+ code_point = 0x10000 + (
75
+ ((code_point & 0x03FF) << 10) | (low_surrogate & 0x03FF)
76
+ )
77
+
78
+ [code_point, index + 9]
79
+ end
80
+
81
+ def self.parse_hex_digits(digits, token) # rubocop:disable Metrics/MethodLength
82
+ code_point = 0
83
+ digits.each_byte do |b|
84
+ code_point <<= 4
85
+ case b
86
+ when 48..57
87
+ code_point |= b - 48
88
+ when 65..70
89
+ code_point |= b - 65 + 10
90
+ when 97..102
91
+ code_point |= b - 97 + 10
92
+ else
93
+ raise JSONPathSyntaxError.new("invalid escape sequence", token)
94
+ end
95
+ end
96
+ code_point
97
+ end
98
+
99
+ def self.high_surrogate?(code_point)
100
+ code_point >= 0xD800 && code_point <= 0xDBFF
101
+ end
102
+
103
+ def self.low_surrogate?(code_point)
104
+ code_point >= 0xDC00 && code_point <= 0xDFFF
105
+ end
106
+
107
+ def self.code_point_to_string(code_point, token)
108
+ raise JSONPathSyntaxError.new("invalid character", token) if code_point <= 0x1F
109
+
110
+ code_point.chr(Encoding::UTF_8)
111
+ end
112
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ VERSION = "0.2.1"
5
+ end
data/lib/json_p3.rb ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "json_p3/version"
4
+ require_relative "json_p3/environment"
5
+
6
+ # RFC 9535 JSONPath query expressions for JSON.
7
+ module JSONP3
8
+ DefaultEnvironment = JSONPathEnvironment.new
9
+
10
+ def self.find(path, data)
11
+ DefaultEnvironment.find(path, data)
12
+ end
13
+
14
+ def self.compile(path)
15
+ DefaultEnvironment.compile(path)
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ n = 100
12
+
13
+ puts "repeating #{VALID_QUERIES.length} queries #{n} times"
14
+
15
+ Benchmark.bmbm(18) do |x|
16
+ x.report("compile and find:") do
17
+ n.times do
18
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
19
+ end
20
+ end
21
+
22
+ x.report("just compile:") do
23
+ n.times do
24
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
25
+ end
26
+ end
27
+
28
+ x.report("just find:") do
29
+ n.times do
30
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark/ips"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ puts "#{VALID_QUERIES.length} queries per iteration"
12
+
13
+ Benchmark.ips do |x|
14
+ # Configure the number of seconds used during
15
+ # the warmup phase (default 2) and calculation phase (default 5)
16
+ x.config(warmup: 2, time: 5)
17
+
18
+ x.report("compile and find:") do
19
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
20
+ end
21
+
22
+ x.report("just compile:") do
23
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
24
+ end
25
+
26
+ x.report("just find:") do
27
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ # TODO: include small-citylots.json as a git submodule
8
+ DATA = JSON.parse(File.read("/tmp/small-citylots.json")).freeze
9
+
10
+ Benchmark.bm(15) do |x|
11
+ x.report("deep:") do
12
+ JSONP3.find("$.features..properties.BLOCK_NUM", DATA)
13
+ end
14
+
15
+ x.report("shallow:") do
16
+ JSONP3.find("$.features..properties", DATA)
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "memory_profiler"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ n = 10
12
+
13
+ report = MemoryProfiler.report do
14
+ n.times do
15
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
16
+ end
17
+ end
18
+
19
+ report.pretty_print
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "memory_profiler"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ # TODO: include small-citylots.json as a git submodule
8
+ DATA = JSON.parse(File.read("/tmp/small-citylots.json")).freeze
9
+
10
+ report = MemoryProfiler.report do
11
+ JSONP3.find("$.features..properties", DATA)
12
+ end
13
+
14
+ report.pretty_print
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stackprof"
4
+
5
+ require "json"
6
+ require "json_p3"
7
+
8
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
9
+ VALID_QUERIES = CTS["tests"].filter { |t| t.key?("result") }
10
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
11
+
12
+ n = 100
13
+
14
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-compile-and-find.dump") do
15
+ n.times do
16
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
17
+ end
18
+ end
19
+
20
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-just-compile.dump") do
21
+ n.times do
22
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
23
+ end
24
+ end
25
+
26
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-just-find.dump") do
27
+ n.times do
28
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
29
+ end
30
+ end