lazy-json 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/lazy-json.rb +284 -0
  3. metadata +72 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 294fda2ead2eb4a2b288c6849dea7fd4f9360521
4
+ data.tar.gz: a71bab4930cd523c96cef883478df1fd9cef3cd4
5
+ SHA512:
6
+ metadata.gz: 2b94b57110452a1ff8d05678dbcd9d57423993e93059701d2055784f7d091de0a90a85871b9ff421058d20befe200be91c825e877d67723470d20389981e15bb
7
+ data.tar.gz: e9d42662275f2c1e9846d146fea127479b975aa714a5182d37ad7ee1cbf4c95ecd9b225cff9a299c07ce58d402a7bba9d94fb7c5f97e246975c295697a8d1b1e
@@ -0,0 +1,284 @@
1
+ require 'oj'
2
+
3
+ module LazyJson
4
+
5
+ def self.attach(json)
6
+ LazyValue.new(Sequence.new(json))
7
+ end
8
+
9
+ # A sequence of json JSON characters
10
+ class Sequence
11
+
12
+ # See http://stackoverflow.com/questions/16042274/definition-of-whitespace-in-json
13
+ WHITESPACE = [
14
+ 0x20, # Space
15
+ 0x09, # Horizontal tab
16
+ 0x0A, # Line feed or New line
17
+ 0x0D # Carriage return
18
+ ]
19
+
20
+ # Note positions are in bytes, not characters
21
+ def initialize(json, start_pos = 0, end_pos = json.bytesize)
22
+ raise "Sequence end ${ end_pos } is before start #{ start_pos }" if end_pos < start_pos
23
+ @json = json
24
+ @start_pos = start_pos
25
+ @end_pos = end_pos
26
+ end
27
+
28
+ attr_reader :json
29
+ attr_reader :start_pos
30
+ attr_reader :end_pos
31
+
32
+ def to_s
33
+ @json.byteslice(@start_pos...@end_pos)
34
+ end
35
+
36
+ def byte_at(i)
37
+ @json.getbyte(@start_pos + i)
38
+ end
39
+
40
+ def first
41
+ byte_at(0)
42
+ end
43
+
44
+ def empty?
45
+ @start_pos == @end_pos
46
+ end
47
+
48
+ def prefix(end_pos)
49
+ Sequence.new(@json, @start_pos, end_pos)
50
+ end
51
+
52
+ def suffix(start_pos)
53
+ Sequence.new(@json, start_pos, @end_pos)
54
+ end
55
+
56
+ def remainder(enclosing_seq)
57
+ Sequence.new(@json, @end_pos, enclosing_seq.end_pos)
58
+ end
59
+
60
+ def read_whitespace
61
+ prefix(skim_whitespace(@start_pos))
62
+ end
63
+
64
+ def skip_whitespace
65
+ suffix(skim_whitespace(@start_pos))
66
+ end
67
+
68
+ def read_byte(byte, required = true)
69
+ prefix(skim_byte(@start_pos, byte, required))
70
+ end
71
+
72
+ def skip_byte(byte, required = true)
73
+ suffix(skim_byte(@start_pos, byte, required))
74
+ end
75
+
76
+ def read_until(terminator, include_terminator)
77
+ prefix(skim_until(@start_pos, false, terminator, include_terminator))
78
+ end
79
+
80
+ def skip_until(terminator, include_terminator)
81
+ suffix(skim_until(@start_pos, false, terminator, include_terminator))
82
+ end
83
+
84
+ private
85
+
86
+ def skim_whitespace(start)
87
+ i = start
88
+ while i < @end_pos && WHITESPACE.include?(@json.getbyte(i))
89
+ i += 1
90
+ end
91
+ i
92
+ end
93
+
94
+ def skim_byte(at, byte, required)
95
+ byte = [ byte ] unless byte.is_a?(::Array)
96
+ if byte.include?(@json.getbyte(at))
97
+ at + 1
98
+ elsif required
99
+ raise "Expected #{ byte } but got '#{ @json.getbyte(at) }'"
100
+ else
101
+ at
102
+ end
103
+ end
104
+
105
+ def skim_until(start, in_string, terminator, include_terminator)
106
+ terminator = [ terminator ] unless terminator.is_a?(::Array)
107
+ i = start
108
+ while i < @end_pos
109
+ byte = @json.getbyte(i)
110
+
111
+ # Skip unicode characters. See table at https://en.wikipedia.org/wiki/UTF-8.
112
+ if byte & 0b11100000 == 0b11000000
113
+ i += 2
114
+ elsif byte & 0b11110000 == 0b11100000
115
+ i += 3
116
+ elsif byte & 0b11111000 == 0b11110000
117
+ i += 4
118
+
119
+ elsif in_string && byte == 92 # '\\'.ord
120
+ i += escape_sequence_length(i) # String escape sequence
121
+ elsif terminator.include?(byte)
122
+ i += 1 if include_terminator
123
+ break
124
+ else
125
+ i += 1
126
+ if ! in_string
127
+ if byte == 34 # '"'.ord
128
+ i = skim_until(i, true, 34, true) # '"'.ord - String start
129
+ elsif byte == 91 # '['.ord
130
+ i = skim_until(i, false, 93, true) # ']'.ord - Array start
131
+ elsif byte == 123 # '{'.ord
132
+ i = skim_until(i, false, 125, true) # '}'.ord - Object start
133
+ end
134
+ end
135
+ end
136
+ end
137
+ i
138
+ end
139
+
140
+ def escape_sequence_length(start)
141
+ raise 'Escape sequence must start with \\' if @json.getbyte(start) != 92 # '\\'.ord
142
+ byte = @json.getbyte(start + 1)
143
+ if byte == 120 # 'x'.ord - \x followed by 2 hex digits
144
+ 4
145
+ elsif byte == 117 # 'u'.ord - \u followed by 4 hex digits
146
+ 6
147
+ elsif byte >= 48 && byte <= 57 # '0'.ord, '9'.ord - \ followed by 3 octal digits
148
+ 4
149
+ else # \ followed by single escaped character
150
+ 2
151
+ end
152
+ end
153
+
154
+ end
155
+
156
+ class Value
157
+
158
+ def initialize(seq)
159
+ @seq = seq
160
+ end
161
+
162
+ def parse
163
+ Oj.load(@seq.to_s) # Note JSON.parse fails on primitives since they're invalid as documents
164
+ end
165
+
166
+ end
167
+
168
+ class LazyValue < Value
169
+
170
+ def initialize(seq)
171
+ super(seq)
172
+ @parsed = false
173
+ @value = nil
174
+ end
175
+
176
+ def value
177
+ if ! @parsed
178
+ byte = @seq.skip_whitespace.first
179
+ if byte == 123 # '{'.ord
180
+ @value = Object.new(@seq)
181
+ elsif byte == 91 # '['.ord
182
+ @value = Array.new(@seq)
183
+ else
184
+ @value = Primitive.new(@seq)
185
+ end
186
+ @parsed = true
187
+ end
188
+ @value
189
+ end
190
+
191
+ def [](key_or_index)
192
+ value[key_or_index]
193
+ end
194
+
195
+ end
196
+
197
+ class Object < Value
198
+
199
+ def initialize(seq)
200
+ super(seq)
201
+ @fields = {}
202
+ @fseq = @seq.skip_whitespace.skip_byte(123) # '{'.ord
203
+ end
204
+
205
+ # Access a field, lazily parsing if not yet parsed
206
+ def [](key)
207
+ if ! @fields.has_key?(key) && ! @fseq.empty?
208
+ while true
209
+ @fseq = @fseq.skip_whitespace
210
+ if @fseq.first == 125 # '}'.ord
211
+ @fseq = @fseq.skip_byte(125).skip_whitespace # '}'.ord
212
+ break
213
+ end
214
+ new_key, new_value = read_field_and_consume
215
+ @fields[new_key] = new_value
216
+ break if new_key == key
217
+ end
218
+ end
219
+ @fields[key]
220
+ end
221
+
222
+ private
223
+
224
+ def read_field_and_consume
225
+ key_seq = @fseq.read_until(58, false) # ':'.ord
226
+ key = Value.new(key_seq).parse
227
+ raise "Non-string object key #{ key }" unless key.is_a?(String)
228
+ @fseq = key_seq.remainder(@fseq)
229
+ @fseq = @fseq.skip_byte(58) # ':'.ord
230
+ value_seq = @fseq.read_until([ 44, 125 ], false) # ','.ord, '}'.ord
231
+ @fseq = value_seq.remainder(@fseq)
232
+ sep_seq = @fseq.read_byte([ 44, 125 ]) # ','.ord, '}'.ord
233
+ @fseq = sep_seq.remainder(@fseq) if sep_seq.first == 44 # ','.ord - Consume , but not }
234
+ [ key, LazyValue.new(value_seq) ]
235
+ end
236
+
237
+ end
238
+
239
+ class Array < Value
240
+
241
+ def initialize(seq)
242
+ super(seq)
243
+ @elements = []
244
+ @eseq = @seq.skip_whitespace.skip_byte(91) # '['.ord
245
+ end
246
+
247
+ # Access an element, lazily parsing if not yet parsed
248
+ def [](i)
249
+ if @elements.size <= i && ! @eseq.empty?
250
+ while true
251
+ @eseq = @eseq.skip_whitespace
252
+ if @eseq.first == 93 # ']'.ord
253
+ @eseq = @eseq.skip_byte(93).skip_whitespace # ']'.ord
254
+ break
255
+ end
256
+ new_value = read_value_and_consume
257
+ @elements << new_value
258
+ break if @elements.size > i
259
+ end
260
+ end
261
+ @elements[i]
262
+ end
263
+
264
+ private
265
+
266
+ def read_value_and_consume
267
+ value_seq = @eseq.read_until([ 44, 93 ], false) # ','.ord, ']'.ord
268
+ @eseq = value_seq.remainder(@eseq)
269
+ sep_seq = @eseq.read_byte([ 44, 93 ]) # ','.ord, ']'.ord
270
+ @eseq = sep_seq.remainder(@eseq) if sep_seq.first == 44 # ','.ord - Consume , but not ]
271
+ LazyValue.new(value_seq)
272
+ end
273
+
274
+ end
275
+
276
+ class Primitive < Value
277
+
278
+ def initialize(seq)
279
+ super(seq)
280
+ end
281
+
282
+ end
283
+
284
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lazy-json
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Paul McReynolds
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: oj
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3'
41
+ description: Lazy JSON skimmer-parser
42
+ email: paul@conspire.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/lazy-json.rb
48
+ homepage: https://github.com/conspire-org/lazy-json
49
+ licenses:
50
+ - MIT
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.4.5.1
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Lazy JSON
72
+ test_files: []