lazy-json 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/lazy-json.rb +284 -0
- metadata +72 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 294fda2ead2eb4a2b288c6849dea7fd4f9360521
|
|
4
|
+
data.tar.gz: a71bab4930cd523c96cef883478df1fd9cef3cd4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2b94b57110452a1ff8d05678dbcd9d57423993e93059701d2055784f7d091de0a90a85871b9ff421058d20befe200be91c825e877d67723470d20389981e15bb
|
|
7
|
+
data.tar.gz: e9d42662275f2c1e9846d146fea127479b975aa714a5182d37ad7ee1cbf4c95ecd9b225cff9a299c07ce58d402a7bba9d94fb7c5f97e246975c295697a8d1b1e
|
data/lib/lazy-json.rb
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
require 'oj'
|
|
2
|
+
|
|
3
|
+
module LazyJson
|
|
4
|
+
|
|
5
|
+
def self.attach(json)
|
|
6
|
+
LazyValue.new(Sequence.new(json))
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# A sequence of json JSON characters
|
|
10
|
+
class Sequence
|
|
11
|
+
|
|
12
|
+
# See http://stackoverflow.com/questions/16042274/definition-of-whitespace-in-json
|
|
13
|
+
WHITESPACE = [
|
|
14
|
+
0x20, # Space
|
|
15
|
+
0x09, # Horizontal tab
|
|
16
|
+
0x0A, # Line feed or New line
|
|
17
|
+
0x0D # Carriage return
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
# Note positions are in bytes, not characters
|
|
21
|
+
def initialize(json, start_pos = 0, end_pos = json.bytesize)
|
|
22
|
+
raise "Sequence end ${ end_pos } is before start #{ start_pos }" if end_pos < start_pos
|
|
23
|
+
@json = json
|
|
24
|
+
@start_pos = start_pos
|
|
25
|
+
@end_pos = end_pos
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
attr_reader :json
|
|
29
|
+
attr_reader :start_pos
|
|
30
|
+
attr_reader :end_pos
|
|
31
|
+
|
|
32
|
+
def to_s
|
|
33
|
+
@json.byteslice(@start_pos...@end_pos)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def byte_at(i)
|
|
37
|
+
@json.getbyte(@start_pos + i)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def first
|
|
41
|
+
byte_at(0)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def empty?
|
|
45
|
+
@start_pos == @end_pos
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def prefix(end_pos)
|
|
49
|
+
Sequence.new(@json, @start_pos, end_pos)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def suffix(start_pos)
|
|
53
|
+
Sequence.new(@json, start_pos, @end_pos)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def remainder(enclosing_seq)
|
|
57
|
+
Sequence.new(@json, @end_pos, enclosing_seq.end_pos)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def read_whitespace
|
|
61
|
+
prefix(skim_whitespace(@start_pos))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def skip_whitespace
|
|
65
|
+
suffix(skim_whitespace(@start_pos))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def read_byte(byte, required = true)
|
|
69
|
+
prefix(skim_byte(@start_pos, byte, required))
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def skip_byte(byte, required = true)
|
|
73
|
+
suffix(skim_byte(@start_pos, byte, required))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def read_until(terminator, include_terminator)
|
|
77
|
+
prefix(skim_until(@start_pos, false, terminator, include_terminator))
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def skip_until(terminator, include_terminator)
|
|
81
|
+
suffix(skim_until(@start_pos, false, terminator, include_terminator))
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def skim_whitespace(start)
|
|
87
|
+
i = start
|
|
88
|
+
while i < @end_pos && WHITESPACE.include?(@json.getbyte(i))
|
|
89
|
+
i += 1
|
|
90
|
+
end
|
|
91
|
+
i
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def skim_byte(at, byte, required)
|
|
95
|
+
byte = [ byte ] unless byte.is_a?(::Array)
|
|
96
|
+
if byte.include?(@json.getbyte(at))
|
|
97
|
+
at + 1
|
|
98
|
+
elsif required
|
|
99
|
+
raise "Expected #{ byte } but got '#{ @json.getbyte(at) }'"
|
|
100
|
+
else
|
|
101
|
+
at
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def skim_until(start, in_string, terminator, include_terminator)
|
|
106
|
+
terminator = [ terminator ] unless terminator.is_a?(::Array)
|
|
107
|
+
i = start
|
|
108
|
+
while i < @end_pos
|
|
109
|
+
byte = @json.getbyte(i)
|
|
110
|
+
|
|
111
|
+
# Skip unicode characters. See table at https://en.wikipedia.org/wiki/UTF-8.
|
|
112
|
+
if byte & 0b11100000 == 0b11000000
|
|
113
|
+
i += 2
|
|
114
|
+
elsif byte & 0b11110000 == 0b11100000
|
|
115
|
+
i += 3
|
|
116
|
+
elsif byte & 0b11111000 == 0b11110000
|
|
117
|
+
i += 4
|
|
118
|
+
|
|
119
|
+
elsif in_string && byte == 92 # '\\'.ord
|
|
120
|
+
i += escape_sequence_length(i) # String escape sequence
|
|
121
|
+
elsif terminator.include?(byte)
|
|
122
|
+
i += 1 if include_terminator
|
|
123
|
+
break
|
|
124
|
+
else
|
|
125
|
+
i += 1
|
|
126
|
+
if ! in_string
|
|
127
|
+
if byte == 34 # '"'.ord
|
|
128
|
+
i = skim_until(i, true, 34, true) # '"'.ord - String start
|
|
129
|
+
elsif byte == 91 # '['.ord
|
|
130
|
+
i = skim_until(i, false, 93, true) # ']'.ord - Array start
|
|
131
|
+
elsif byte == 123 # '{'.ord
|
|
132
|
+
i = skim_until(i, false, 125, true) # '}'.ord - Object start
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
i
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def escape_sequence_length(start)
|
|
141
|
+
raise 'Escape sequence must start with \\' if @json.getbyte(start) != 92 # '\\'.ord
|
|
142
|
+
byte = @json.getbyte(start + 1)
|
|
143
|
+
if byte == 120 # 'x'.ord - \x followed by 2 hex digits
|
|
144
|
+
4
|
|
145
|
+
elsif byte == 117 # 'u'.ord - \u followed by 4 hex digits
|
|
146
|
+
6
|
|
147
|
+
elsif byte >= 48 && byte <= 57 # '0'.ord, '9'.ord - \ followed by 3 octal digits
|
|
148
|
+
4
|
|
149
|
+
else # \ followed by single escaped character
|
|
150
|
+
2
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
class Value
|
|
157
|
+
|
|
158
|
+
def initialize(seq)
|
|
159
|
+
@seq = seq
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def parse
|
|
163
|
+
Oj.load(@seq.to_s) # Note JSON.parse fails on primitives since they're invalid as documents
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
class LazyValue < Value
|
|
169
|
+
|
|
170
|
+
def initialize(seq)
|
|
171
|
+
super(seq)
|
|
172
|
+
@parsed = false
|
|
173
|
+
@value = nil
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def value
|
|
177
|
+
if ! @parsed
|
|
178
|
+
byte = @seq.skip_whitespace.first
|
|
179
|
+
if byte == 123 # '{'.ord
|
|
180
|
+
@value = Object.new(@seq)
|
|
181
|
+
elsif byte == 91 # '['.ord
|
|
182
|
+
@value = Array.new(@seq)
|
|
183
|
+
else
|
|
184
|
+
@value = Primitive.new(@seq)
|
|
185
|
+
end
|
|
186
|
+
@parsed = true
|
|
187
|
+
end
|
|
188
|
+
@value
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def [](key_or_index)
|
|
192
|
+
value[key_or_index]
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
class Object < Value
|
|
198
|
+
|
|
199
|
+
def initialize(seq)
|
|
200
|
+
super(seq)
|
|
201
|
+
@fields = {}
|
|
202
|
+
@fseq = @seq.skip_whitespace.skip_byte(123) # '{'.ord
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Access a field, lazily parsing if not yet parsed
|
|
206
|
+
def [](key)
|
|
207
|
+
if ! @fields.has_key?(key) && ! @fseq.empty?
|
|
208
|
+
while true
|
|
209
|
+
@fseq = @fseq.skip_whitespace
|
|
210
|
+
if @fseq.first == 125 # '}'.ord
|
|
211
|
+
@fseq = @fseq.skip_byte(125).skip_whitespace # '}'.ord
|
|
212
|
+
break
|
|
213
|
+
end
|
|
214
|
+
new_key, new_value = read_field_and_consume
|
|
215
|
+
@fields[new_key] = new_value
|
|
216
|
+
break if new_key == key
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
@fields[key]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
private
|
|
223
|
+
|
|
224
|
+
def read_field_and_consume
|
|
225
|
+
key_seq = @fseq.read_until(58, false) # ':'.ord
|
|
226
|
+
key = Value.new(key_seq).parse
|
|
227
|
+
raise "Non-string object key #{ key }" unless key.is_a?(String)
|
|
228
|
+
@fseq = key_seq.remainder(@fseq)
|
|
229
|
+
@fseq = @fseq.skip_byte(58) # ':'.ord
|
|
230
|
+
value_seq = @fseq.read_until([ 44, 125 ], false) # ','.ord, '}'.ord
|
|
231
|
+
@fseq = value_seq.remainder(@fseq)
|
|
232
|
+
sep_seq = @fseq.read_byte([ 44, 125 ]) # ','.ord, '}'.ord
|
|
233
|
+
@fseq = sep_seq.remainder(@fseq) if sep_seq.first == 44 # ','.ord - Consume , but not }
|
|
234
|
+
[ key, LazyValue.new(value_seq) ]
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
class Array < Value
|
|
240
|
+
|
|
241
|
+
def initialize(seq)
|
|
242
|
+
super(seq)
|
|
243
|
+
@elements = []
|
|
244
|
+
@eseq = @seq.skip_whitespace.skip_byte(91) # '['.ord
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Access an element, lazily parsing if not yet parsed
|
|
248
|
+
def [](i)
|
|
249
|
+
if @elements.size <= i && ! @eseq.empty?
|
|
250
|
+
while true
|
|
251
|
+
@eseq = @eseq.skip_whitespace
|
|
252
|
+
if @eseq.first == 93 # ']'.ord
|
|
253
|
+
@eseq = @eseq.skip_byte(93).skip_whitespace # ']'.ord
|
|
254
|
+
break
|
|
255
|
+
end
|
|
256
|
+
new_value = read_value_and_consume
|
|
257
|
+
@elements << new_value
|
|
258
|
+
break if @elements.size > i
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
@elements[i]
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
private
|
|
265
|
+
|
|
266
|
+
def read_value_and_consume
|
|
267
|
+
value_seq = @eseq.read_until([ 44, 93 ], false) # ','.ord, ']'.ord
|
|
268
|
+
@eseq = value_seq.remainder(@eseq)
|
|
269
|
+
sep_seq = @eseq.read_byte([ 44, 93 ]) # ','.ord, ']'.ord
|
|
270
|
+
@eseq = sep_seq.remainder(@eseq) if sep_seq.first == 44 # ','.ord - Consume , but not ]
|
|
271
|
+
LazyValue.new(value_seq)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
class Primitive < Value
|
|
277
|
+
|
|
278
|
+
def initialize(seq)
|
|
279
|
+
super(seq)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lazy-json
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Paul McReynolds
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2015-09-03 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: oj
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '2'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '2'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rspec
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '3'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '3'
|
|
41
|
+
description: Lazy JSON skimmer-parser
|
|
42
|
+
email: paul@conspire.com
|
|
43
|
+
executables: []
|
|
44
|
+
extensions: []
|
|
45
|
+
extra_rdoc_files: []
|
|
46
|
+
files:
|
|
47
|
+
- lib/lazy-json.rb
|
|
48
|
+
homepage: https://github.com/conspire-org/lazy-json
|
|
49
|
+
licenses:
|
|
50
|
+
- MIT
|
|
51
|
+
metadata: {}
|
|
52
|
+
post_install_message:
|
|
53
|
+
rdoc_options: []
|
|
54
|
+
require_paths:
|
|
55
|
+
- lib
|
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
|
+
requirements:
|
|
63
|
+
- - ">="
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: '0'
|
|
66
|
+
requirements: []
|
|
67
|
+
rubyforge_project:
|
|
68
|
+
rubygems_version: 2.4.5.1
|
|
69
|
+
signing_key:
|
|
70
|
+
specification_version: 4
|
|
71
|
+
summary: Lazy JSON
|
|
72
|
+
test_files: []
|