lazy-json 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/lazy-json.rb +284 -0
  3. metadata +72 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 294fda2ead2eb4a2b288c6849dea7fd4f9360521
4
+ data.tar.gz: a71bab4930cd523c96cef883478df1fd9cef3cd4
5
+ SHA512:
6
+ metadata.gz: 2b94b57110452a1ff8d05678dbcd9d57423993e93059701d2055784f7d091de0a90a85871b9ff421058d20befe200be91c825e877d67723470d20389981e15bb
7
+ data.tar.gz: e9d42662275f2c1e9846d146fea127479b975aa714a5182d37ad7ee1cbf4c95ecd9b225cff9a299c07ce58d402a7bba9d94fb7c5f97e246975c295697a8d1b1e
@@ -0,0 +1,284 @@
1
+ require 'oj'
2
+
3
+ module LazyJson
4
+
5
+ def self.attach(json)
6
+ LazyValue.new(Sequence.new(json))
7
+ end
8
+
9
+ # A sequence of json JSON characters
10
+ class Sequence
11
+
12
+ # See http://stackoverflow.com/questions/16042274/definition-of-whitespace-in-json
13
+ WHITESPACE = [
14
+ 0x20, # Space
15
+ 0x09, # Horizontal tab
16
+ 0x0A, # Line feed or New line
17
+ 0x0D # Carriage return
18
+ ]
19
+
20
+ # Note positions are in bytes, not characters
21
+ def initialize(json, start_pos = 0, end_pos = json.bytesize)
22
+ raise "Sequence end ${ end_pos } is before start #{ start_pos }" if end_pos < start_pos
23
+ @json = json
24
+ @start_pos = start_pos
25
+ @end_pos = end_pos
26
+ end
27
+
28
+ attr_reader :json
29
+ attr_reader :start_pos
30
+ attr_reader :end_pos
31
+
32
+ def to_s
33
+ @json.byteslice(@start_pos...@end_pos)
34
+ end
35
+
36
+ def byte_at(i)
37
+ @json.getbyte(@start_pos + i)
38
+ end
39
+
40
+ def first
41
+ byte_at(0)
42
+ end
43
+
44
+ def empty?
45
+ @start_pos == @end_pos
46
+ end
47
+
48
+ def prefix(end_pos)
49
+ Sequence.new(@json, @start_pos, end_pos)
50
+ end
51
+
52
+ def suffix(start_pos)
53
+ Sequence.new(@json, start_pos, @end_pos)
54
+ end
55
+
56
+ def remainder(enclosing_seq)
57
+ Sequence.new(@json, @end_pos, enclosing_seq.end_pos)
58
+ end
59
+
60
+ def read_whitespace
61
+ prefix(skim_whitespace(@start_pos))
62
+ end
63
+
64
+ def skip_whitespace
65
+ suffix(skim_whitespace(@start_pos))
66
+ end
67
+
68
+ def read_byte(byte, required = true)
69
+ prefix(skim_byte(@start_pos, byte, required))
70
+ end
71
+
72
+ def skip_byte(byte, required = true)
73
+ suffix(skim_byte(@start_pos, byte, required))
74
+ end
75
+
76
+ def read_until(terminator, include_terminator)
77
+ prefix(skim_until(@start_pos, false, terminator, include_terminator))
78
+ end
79
+
80
+ def skip_until(terminator, include_terminator)
81
+ suffix(skim_until(@start_pos, false, terminator, include_terminator))
82
+ end
83
+
84
+ private
85
+
86
+ def skim_whitespace(start)
87
+ i = start
88
+ while i < @end_pos && WHITESPACE.include?(@json.getbyte(i))
89
+ i += 1
90
+ end
91
+ i
92
+ end
93
+
94
+ def skim_byte(at, byte, required)
95
+ byte = [ byte ] unless byte.is_a?(::Array)
96
+ if byte.include?(@json.getbyte(at))
97
+ at + 1
98
+ elsif required
99
+ raise "Expected #{ byte } but got '#{ @json.getbyte(at) }'"
100
+ else
101
+ at
102
+ end
103
+ end
104
+
105
+ def skim_until(start, in_string, terminator, include_terminator)
106
+ terminator = [ terminator ] unless terminator.is_a?(::Array)
107
+ i = start
108
+ while i < @end_pos
109
+ byte = @json.getbyte(i)
110
+
111
+ # Skip unicode characters. See table at https://en.wikipedia.org/wiki/UTF-8.
112
+ if byte & 0b11100000 == 0b11000000
113
+ i += 2
114
+ elsif byte & 0b11110000 == 0b11100000
115
+ i += 3
116
+ elsif byte & 0b11111000 == 0b11110000
117
+ i += 4
118
+
119
+ elsif in_string && byte == 92 # '\\'.ord
120
+ i += escape_sequence_length(i) # String escape sequence
121
+ elsif terminator.include?(byte)
122
+ i += 1 if include_terminator
123
+ break
124
+ else
125
+ i += 1
126
+ if ! in_string
127
+ if byte == 34 # '"'.ord
128
+ i = skim_until(i, true, 34, true) # '"'.ord - String start
129
+ elsif byte == 91 # '['.ord
130
+ i = skim_until(i, false, 93, true) # ']'.ord - Array start
131
+ elsif byte == 123 # '{'.ord
132
+ i = skim_until(i, false, 125, true) # '}'.ord - Object start
133
+ end
134
+ end
135
+ end
136
+ end
137
+ i
138
+ end
139
+
140
+ def escape_sequence_length(start)
141
+ raise 'Escape sequence must start with \\' if @json.getbyte(start) != 92 # '\\'.ord
142
+ byte = @json.getbyte(start + 1)
143
+ if byte == 120 # 'x'.ord - \x followed by 2 hex digits
144
+ 4
145
+ elsif byte == 117 # 'u'.ord - \u followed by 4 hex digits
146
+ 6
147
+ elsif byte >= 48 && byte <= 57 # '0'.ord, '9'.ord - \ followed by 3 octal digits
148
+ 4
149
+ else # \ followed by single escaped character
150
+ 2
151
+ end
152
+ end
153
+
154
+ end
155
+
156
+ class Value
157
+
158
+ def initialize(seq)
159
+ @seq = seq
160
+ end
161
+
162
+ def parse
163
+ Oj.load(@seq.to_s) # Note JSON.parse fails on primitives since they're invalid as documents
164
+ end
165
+
166
+ end
167
+
168
+ class LazyValue < Value
169
+
170
+ def initialize(seq)
171
+ super(seq)
172
+ @parsed = false
173
+ @value = nil
174
+ end
175
+
176
+ def value
177
+ if ! @parsed
178
+ byte = @seq.skip_whitespace.first
179
+ if byte == 123 # '{'.ord
180
+ @value = Object.new(@seq)
181
+ elsif byte == 91 # '['.ord
182
+ @value = Array.new(@seq)
183
+ else
184
+ @value = Primitive.new(@seq)
185
+ end
186
+ @parsed = true
187
+ end
188
+ @value
189
+ end
190
+
191
+ def [](key_or_index)
192
+ value[key_or_index]
193
+ end
194
+
195
+ end
196
+
197
+ class Object < Value
198
+
199
+ def initialize(seq)
200
+ super(seq)
201
+ @fields = {}
202
+ @fseq = @seq.skip_whitespace.skip_byte(123) # '{'.ord
203
+ end
204
+
205
+ # Access a field, lazily parsing if not yet parsed
206
+ def [](key)
207
+ if ! @fields.has_key?(key) && ! @fseq.empty?
208
+ while true
209
+ @fseq = @fseq.skip_whitespace
210
+ if @fseq.first == 125 # '}'.ord
211
+ @fseq = @fseq.skip_byte(125).skip_whitespace # '}'.ord
212
+ break
213
+ end
214
+ new_key, new_value = read_field_and_consume
215
+ @fields[new_key] = new_value
216
+ break if new_key == key
217
+ end
218
+ end
219
+ @fields[key]
220
+ end
221
+
222
+ private
223
+
224
+ def read_field_and_consume
225
+ key_seq = @fseq.read_until(58, false) # ':'.ord
226
+ key = Value.new(key_seq).parse
227
+ raise "Non-string object key #{ key }" unless key.is_a?(String)
228
+ @fseq = key_seq.remainder(@fseq)
229
+ @fseq = @fseq.skip_byte(58) # ':'.ord
230
+ value_seq = @fseq.read_until([ 44, 125 ], false) # ','.ord, '}'.ord
231
+ @fseq = value_seq.remainder(@fseq)
232
+ sep_seq = @fseq.read_byte([ 44, 125 ]) # ','.ord, '}'.ord
233
+ @fseq = sep_seq.remainder(@fseq) if sep_seq.first == 44 # ','.ord - Consume , but not }
234
+ [ key, LazyValue.new(value_seq) ]
235
+ end
236
+
237
+ end
238
+
239
+ class Array < Value
240
+
241
+ def initialize(seq)
242
+ super(seq)
243
+ @elements = []
244
+ @eseq = @seq.skip_whitespace.skip_byte(91) # '['.ord
245
+ end
246
+
247
+ # Access an element, lazily parsing if not yet parsed
248
+ def [](i)
249
+ if @elements.size <= i && ! @eseq.empty?
250
+ while true
251
+ @eseq = @eseq.skip_whitespace
252
+ if @eseq.first == 93 # ']'.ord
253
+ @eseq = @eseq.skip_byte(93).skip_whitespace # ']'.ord
254
+ break
255
+ end
256
+ new_value = read_value_and_consume
257
+ @elements << new_value
258
+ break if @elements.size > i
259
+ end
260
+ end
261
+ @elements[i]
262
+ end
263
+
264
+ private
265
+
266
+ def read_value_and_consume
267
+ value_seq = @eseq.read_until([ 44, 93 ], false) # ','.ord, ']'.ord
268
+ @eseq = value_seq.remainder(@eseq)
269
+ sep_seq = @eseq.read_byte([ 44, 93 ]) # ','.ord, ']'.ord
270
+ @eseq = sep_seq.remainder(@eseq) if sep_seq.first == 44 # ','.ord - Consume , but not ]
271
+ LazyValue.new(value_seq)
272
+ end
273
+
274
+ end
275
+
276
+ class Primitive < Value
277
+
278
+ def initialize(seq)
279
+ super(seq)
280
+ end
281
+
282
+ end
283
+
284
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lazy-json
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Paul McReynolds
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: oj
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3'
41
+ description: Lazy JSON skimmer-parser
42
+ email: paul@conspire.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/lazy-json.rb
48
+ homepage: https://github.com/conspire-org/lazy-json
49
+ licenses:
50
+ - MIT
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.4.5.1
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Lazy JSON
72
+ test_files: []