json_p3 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,285 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ # Base class for all JSONPath selectors
5
+ class Selector
6
+ # @dynamic token
7
+ attr_reader :token
8
+
9
+ def initialize(env, token)
10
+ @env = env
11
+ @token = token
12
+ end
13
+
14
+ # Apply this selector to _node_.
15
+ # @return [Array<JSONPathNode>]
16
+ def resolve(_node)
17
+ raise "selectors must implement resolve(node)"
18
+ end
19
+
20
+ # Return true if this selector is a singular selector.
21
+ def singular?
22
+ false
23
+ end
24
+ end
25
+
26
+ # The name selector select values from hashes given a key.
27
+ class NameSelector < Selector
28
+ # @dynamic name
29
+ attr_reader :name
30
+
31
+ def initialize(env, token, name)
32
+ super(env, token)
33
+ @name = name
34
+ end
35
+
36
+ def resolve(node)
37
+ if node.value.is_a?(Hash) && node.value.key?(@name)
38
+ [node.new_child(node.value[@name], @name)]
39
+ else
40
+ []
41
+ end
42
+ end
43
+
44
+ def singular?
45
+ true
46
+ end
47
+
48
+ def to_s
49
+ @name.inspect
50
+ end
51
+
52
+ def ==(other)
53
+ self.class == other.class &&
54
+ @name == other.name &&
55
+ @token == other.token
56
+ end
57
+
58
+ alias eql? ==
59
+
60
+ def hash
61
+ [@name, @token].hash
62
+ end
63
+ end
64
+
65
+ # This non-standard name selector selects values from hashes given a string or
66
+ # symbol key.
67
+ class SymbolNameSelector < NameSelector
68
+ def initialize(env, token, name)
69
+ super
70
+ @sym = @name.to_sym
71
+ end
72
+
73
+ def resolve(node) # rubocop:disable Metrics/MethodLength
74
+ if node.value.is_a?(Hash)
75
+ if node.value.key?(@name)
76
+ [node.new_child(node.value[@name], @name)]
77
+ elsif node.value.key?(@sym)
78
+ [node.new_child(node.value[@sym], @name)]
79
+ else
80
+ []
81
+ end
82
+ else
83
+ []
84
+ end
85
+ end
86
+ end
87
+
88
+ # The index selector selects values from arrays given an index.
89
+ class IndexSelector < Selector
90
+ # @dynamic index
91
+ attr_reader :index
92
+
93
+ def initialize(env, token, index)
94
+ super(env, token)
95
+ @index = index
96
+ end
97
+
98
+ def resolve(node)
99
+ if node.value.is_a?(Array)
100
+ norm_index = normalize(@index, node.value.length)
101
+ return [] if norm_index.negative? || norm_index >= node.value.length
102
+
103
+ [node.new_child(node.value[@index], norm_index)]
104
+ else
105
+ []
106
+ end
107
+ end
108
+
109
+ def singular?
110
+ true
111
+ end
112
+
113
+ def to_s
114
+ @index.to_s
115
+ end
116
+
117
+ def ==(other)
118
+ self.class == other.class &&
119
+ @index == other.index &&
120
+ @token == other.token
121
+ end
122
+
123
+ alias eql? ==
124
+
125
+ def hash
126
+ [@index, @token].hash
127
+ end
128
+
129
+ private
130
+
131
+ def normalize(index, length)
132
+ index.negative? && length >= index.abs ? length + index : index
133
+ end
134
+ end
135
+
136
+ # The wildcard selector selects all elements from an array or values from a hash.
137
+ class WildcardSelector < Selector
138
+ def resolve(node)
139
+ if node.value.is_a? Hash
140
+ node.value.map { |k, v| node.new_child(v, k) }
141
+ elsif node.value.is_a? Array
142
+ node.value.map.with_index { |e, i| node.new_child(e, i) }
143
+ else
144
+ []
145
+ end
146
+ end
147
+
148
+ def to_s
149
+ "*"
150
+ end
151
+
152
+ def ==(other)
153
+ self.class == other.class && @token == other.token
154
+ end
155
+
156
+ alias eql? ==
157
+
158
+ def hash
159
+ @token.hash
160
+ end
161
+ end
162
+
163
+ # The slice selector selects a range of elements from an array.
164
+ class SliceSelector < Selector
165
+ # @dynamic start, stop, step
166
+ attr_reader :start, :stop, :step
167
+
168
+ def initialize(env, token, start, stop, step)
169
+ super(env, token)
170
+ @start = start
171
+ @stop = stop
172
+ @step = step || 1
173
+ end
174
+
175
+ def resolve(node) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
176
+ return [] unless node.value.is_a?(Array)
177
+
178
+ length = node.value.length
179
+ return [] if length.zero? || @step.zero?
180
+
181
+ norm_start = normalized_start(length)
182
+ norm_stop = normalized_stop(length)
183
+
184
+ nodes = []
185
+
186
+ if @step.positive?
187
+
188
+ for i in (norm_start...norm_stop).step(@step) # rubocop:disable Style/For
189
+ nodes << node.new_child(node.value[i], i)
190
+ end
191
+ else
192
+ i = norm_start
193
+ while i > norm_stop
194
+ nodes << node.new_child(node.value[i], i)
195
+ i += @step
196
+ end
197
+ end
198
+ nodes
199
+ end
200
+
201
+ def to_s
202
+ start = @start || ""
203
+ stop = @stop || ""
204
+ step = @step || 1
205
+ "#{start}:#{stop}:#{step}"
206
+ end
207
+
208
+ def ==(other)
209
+ self.class == other.class &&
210
+ @start == other.start &&
211
+ @stop == other.stop &&
212
+ @step == other.step &&
213
+ @token == other.token
214
+ end
215
+
216
+ alias eql? ==
217
+
218
+ def hash
219
+ [@start, @stop, @step, @token].hash
220
+ end
221
+
222
+ private
223
+
224
+ def normalized_start(length)
225
+ # NOTE: trying to please the type checker :(
226
+ return @step.negative? ? length - 1 : 0 if @start.nil?
227
+ return [length + (@start || raise), 0].max if @start&.negative?
228
+
229
+ [@start || raise, length - 1].min
230
+ end
231
+
232
+ def normalized_stop(length)
233
+ # NOTE: trying to please the type checker :(
234
+ return @step.negative? ? -1 : length if @stop.nil?
235
+ return [length + (@stop || raise), -1].max if @stop&.negative?
236
+
237
+ [@stop || raise, length].min
238
+ end
239
+ end
240
+
241
+ # Select array elements or hash values according to a filter expression.
242
+ class FilterSelector < Selector
243
+ # @dynamic expression
244
+ attr_reader :expression
245
+
246
+ def initialize(env, token, expression)
247
+ super(env, token)
248
+ @expression = expression
249
+ end
250
+
251
+ def resolve(node) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
252
+ nodes = []
253
+
254
+ if node.value.is_a?(Array)
255
+ node.value.each_with_index do |e, i|
256
+ context = FilterContext.new(@env, e, node.root)
257
+ nodes << node.new_child(e, i) if @expression.evaluate(context)
258
+ end
259
+ elsif node.value.is_a?(Hash)
260
+ node.value.each_pair do |k, v|
261
+ context = FilterContext.new(@env, v, node.root)
262
+ nodes << node.new_child(v, k) if @expression.evaluate(context)
263
+ end
264
+ end
265
+
266
+ nodes
267
+ end
268
+
269
+ def to_s
270
+ "?#{@expression}"
271
+ end
272
+
273
+ def ==(other)
274
+ self.class == other.class &&
275
+ @expression == other.start &&
276
+ @token == other.token
277
+ end
278
+
279
+ alias eql? ==
280
+
281
+ def hash
282
+ [@expression, @token].hash
283
+ end
284
+ end
285
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "errors"
4
+
5
+ module JSONP3
6
+ # Tokens are produced by the lexer and consumed by the parser. Each token contains sub
7
+ # string from a JSONPath expression, its location within the JSONPath expression and a
8
+ # symbol indicating what type of token it is.
9
+ class Token
10
+ EOI = :token_eoi
11
+ ERROR = :token_error
12
+
13
+ SHORTHAND_NAME = :token_shorthand_name
14
+ COLON = :token_colon
15
+ COMMA = :token_comma
16
+ DOT = :token_dot
17
+ DOUBLE_DOT = :token_double_dot
18
+ FILTER = :token_filter
19
+ INDEX = :token_index
20
+ LBRACKET = :token_lbracket
21
+ NAME = :token_name
22
+ RBRACKET = :token_rbracket
23
+ ROOT = :token_root
24
+ WILD = :token_wild
25
+
26
+ AND = :token_and
27
+ CURRENT = :token_current
28
+ DOUBLE_QUOTE_STRING = :token_double_quote_string
29
+ EQ = :token_eq
30
+ FALSE = :token_false
31
+ FLOAT = :token_float
32
+ FUNCTION = :token_function
33
+ GE = :token_ge
34
+ GT = :token_gt
35
+ INT = :token_int
36
+ LE = :token_le
37
+ LPAREN = :token_lparen
38
+ LT = :token_lt
39
+ NE = :token_ne
40
+ NOT = :token_not
41
+ NULL = :token_null
42
+ OP = :token_op
43
+ OR = :token_or
44
+ RPAREN = :token_rparen
45
+ SINGLE_QUOTE_STRING = :token_single_quote_string
46
+ TRUE = :token_true
47
+
48
+ # @dynamic type, value, start, query, message
49
+ attr_reader :type, :value, :start, :query, :message
50
+
51
+ def initialize(type, value, start, query, message: nil)
52
+ @type = type
53
+ @value = value
54
+ @start = start
55
+ @query = query
56
+ @message = message
57
+ end
58
+
59
+ def ==(other)
60
+ self.class == other.class &&
61
+ @type == other.type &&
62
+ @value == other.value &&
63
+ @start == other.start &&
64
+ @query == other.query &&
65
+ @message == other.message
66
+ end
67
+
68
+ alias eql? ==
69
+
70
+ def hash
71
+ [@type, @value].hash
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3 # rubocop:disable Style/Documentation
4
+ # Replace escape sequences with their equivalent Unicode code point.
5
+ # @param value [String]
6
+ # @param quote [String] one of '"' or "'".
7
+ # @param token [Token]
8
+ # @return [String] A new string without escape sequences.
9
+ def self.unescape_string(value, quote, token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
10
+ unescaped = String.new(encoding: "UTF-8")
11
+ index = 0
12
+ length = value.length
13
+
14
+ while index < length
15
+ ch = value[index] || raise
16
+ if ch == "\\"
17
+ index += 1
18
+ case value[index]
19
+ when quote
20
+ unescaped << quote
21
+ when "\\"
22
+ unescaped << "\\"
23
+ when "/"
24
+ unescaped << "/"
25
+ when "b"
26
+ unescaped << "\x08"
27
+ when "f"
28
+ unescaped << "\x0C"
29
+ when "n"
30
+ unescaped << "\n"
31
+ when "r"
32
+ unescaped << "\r"
33
+ when "t"
34
+ unescaped << "\t"
35
+ when "u"
36
+ code_point, index = JSONP3.decode_hex_char(value, index, token)
37
+ unescaped << JSONP3.code_point_to_string(code_point, token)
38
+ else
39
+ raise JSONPathSyntaxError.new("unknown escape sequence", token)
40
+ end
41
+ else
42
+ raise JSONPathSyntaxError.new("invalid character", token) if ch.ord <= 0x1F
43
+
44
+ unescaped << ch
45
+ end
46
+
47
+ index += 1
48
+
49
+ end
50
+
51
+ unescaped
52
+ end
53
+
54
+ def self.decode_hex_char(value, index, token) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
55
+ length = value.length
56
+
57
+ raise JSONPathSyntaxError.new("incomplete escape sequence", token) if index + 4 >= length
58
+
59
+ index += 1 # move past 'u'
60
+ code_point = parse_hex_digits(value[index...index + 4], token)
61
+
62
+ raise JSONPathSyntaxError.new("unexpected low surrogate", token) if low_surrogate?(code_point)
63
+
64
+ return [code_point, index + 3] unless high_surrogate?(code_point)
65
+
66
+ unless index + 9 < length && value[index + 4] == "\\" && value[index + 5] == "u"
67
+ raise JSONPathSyntaxError.new("incomplete escape sequence", token)
68
+ end
69
+
70
+ low_surrogate = parse_hex_digits(value[index + 6...index + 10], token)
71
+
72
+ raise JSONPathSyntaxError.new("unexpected low surrogate", token) unless low_surrogate?(low_surrogate)
73
+
74
+ code_point = 0x10000 + (
75
+ ((code_point & 0x03FF) << 10) | (low_surrogate & 0x03FF)
76
+ )
77
+
78
+ [code_point, index + 9]
79
+ end
80
+
81
+ def self.parse_hex_digits(digits, token) # rubocop:disable Metrics/MethodLength
82
+ code_point = 0
83
+ digits.each_byte do |b|
84
+ code_point <<= 4
85
+ case b
86
+ when 48..57
87
+ code_point |= b - 48
88
+ when 65..70
89
+ code_point |= b - 65 + 10
90
+ when 97..102
91
+ code_point |= b - 97 + 10
92
+ else
93
+ raise JSONPathSyntaxError.new("invalid escape sequence", token)
94
+ end
95
+ end
96
+ code_point
97
+ end
98
+
99
+ def self.high_surrogate?(code_point)
100
+ code_point >= 0xD800 && code_point <= 0xDBFF
101
+ end
102
+
103
+ def self.low_surrogate?(code_point)
104
+ code_point >= 0xDC00 && code_point <= 0xDFFF
105
+ end
106
+
107
+ def self.code_point_to_string(code_point, token)
108
+ raise JSONPathSyntaxError.new("invalid character", token) if code_point <= 0x1F
109
+
110
+ code_point.chr(Encoding::UTF_8)
111
+ end
112
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JSONP3
4
+ VERSION = "0.2.1"
5
+ end
data/lib/json_p3.rb ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "json_p3/version"
4
+ require_relative "json_p3/environment"
5
+
6
+ # RFC 9535 JSONPath query expressions for JSON.
7
+ module JSONP3
8
+ DefaultEnvironment = JSONPathEnvironment.new
9
+
10
+ def self.find(path, data)
11
+ DefaultEnvironment.find(path, data)
12
+ end
13
+
14
+ def self.compile(path)
15
+ DefaultEnvironment.compile(path)
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ n = 100
12
+
13
+ puts "repeating #{VALID_QUERIES.length} queries #{n} times"
14
+
15
+ Benchmark.bmbm(18) do |x|
16
+ x.report("compile and find:") do
17
+ n.times do
18
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
19
+ end
20
+ end
21
+
22
+ x.report("just compile:") do
23
+ n.times do
24
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
25
+ end
26
+ end
27
+
28
+ x.report("just find:") do
29
+ n.times do
30
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark/ips"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ puts "#{VALID_QUERIES.length} queries per iteration"
12
+
13
+ Benchmark.ips do |x|
14
+ # Configure the number of seconds used during
15
+ # the warmup phase (default 2) and calculation phase (default 5)
16
+ x.config(warmup: 2, time: 5)
17
+
18
+ x.report("compile and find:") do
19
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
20
+ end
21
+
22
+ x.report("just compile:") do
23
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
24
+ end
25
+
26
+ x.report("just find:") do
27
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ # TODO: include small-citylots.json as a git submodule
8
+ DATA = JSON.parse(File.read("/tmp/small-citylots.json")).freeze
9
+
10
+ Benchmark.bm(15) do |x|
11
+ x.report("deep:") do
12
+ JSONP3.find("$.features..properties.BLOCK_NUM", DATA)
13
+ end
14
+
15
+ x.report("shallow:") do
16
+ JSONP3.find("$.features..properties", DATA)
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "memory_profiler"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
8
+ VALID_QUERIES = CTS["tests"].filter { |t| !t.key?("invalid_selector") }
9
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
10
+
11
+ n = 10
12
+
13
+ report = MemoryProfiler.report do
14
+ n.times do
15
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
16
+ end
17
+ end
18
+
19
+ report.pretty_print
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "memory_profiler"
4
+ require "json"
5
+ require "json_p3"
6
+
7
+ # TODO: include small-citylots.json as a git submodule
8
+ DATA = JSON.parse(File.read("/tmp/small-citylots.json")).freeze
9
+
10
+ report = MemoryProfiler.report do
11
+ JSONP3.find("$.features..properties", DATA)
12
+ end
13
+
14
+ report.pretty_print
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stackprof"
4
+
5
+ require "json"
6
+ require "json_p3"
7
+
8
+ CTS = JSON.parse(File.read("test/cts/cts.json"))
9
+ VALID_QUERIES = CTS["tests"].filter { |t| t.key?("result") }
10
+ COMPILED_QUERIES = VALID_QUERIES.map { |t| [JSONP3.compile(t["selector"]), t["document"]] }
11
+
12
+ n = 100
13
+
14
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-compile-and-find.dump") do
15
+ n.times do
16
+ VALID_QUERIES.map { |t| JSONP3.find(t["selector"], t["document"]) }
17
+ end
18
+ end
19
+
20
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-just-compile.dump") do
21
+ n.times do
22
+ VALID_QUERIES.map { |t| JSONP3.compile(t["selector"]) }
23
+ end
24
+ end
25
+
26
+ StackProf.run(mode: :cpu, raw: true, out: ".stackprof-cpu-just-find.dump") do
27
+ n.times do
28
+ COMPILED_QUERIES.map { |p, d| p.find(d) }
29
+ end
30
+ end