json-stream 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/lib/json/stream/buffer.rb +16 -8
- data/lib/json/stream/builder.rb +9 -6
- data/lib/json/stream/parser.rb +46 -46
- data/lib/json/stream/version.rb +1 -1
- data/spec/buffer_spec.rb +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62af90ebe18d5c8a58ca9b75695bf2c403b4c4a0
|
4
|
+
data.tar.gz: d3176279af8156702e63ffdf4c963ab6f0e25cb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d757855dcd79878bb9d5cbdd74c05252bde3286259b88315e95b3302ce60bc24bbf2d6f6e0ab32a5507b55bbdeae3b63ab405368880cfb29b1981f3cb291146
|
7
|
+
data.tar.gz: 0a90cf028ed263a4dc571c4834e1b033d9c23b17aaaef36a7275d44e804a69c1c7c8090ea119411858608bfb45a8c6ada7b2e4e6c8862294ef54ef091b1030e0
|
data/README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
JSON::Stream is a JSON parser, based on a finite state machine, that generates
|
4
4
|
events for each state change. This allows streaming both the JSON document into
|
5
|
-
memory and the parsed object graph out of memory to some other process.
|
6
|
-
|
7
|
-
|
5
|
+
memory and the parsed object graph out of memory to some other process.
|
6
|
+
|
7
|
+
This is much like an XML SAX parser that generates events during parsing. There
|
8
|
+
is no requirement for the document, or the object graph, to be fully buffered in
|
8
9
|
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
10
|
For example, streaming and processing large map/reduce views from Apache
|
10
11
|
CouchDB.
|
data/lib/json/stream/buffer.rb
CHANGED
@@ -14,7 +14,7 @@ module JSON
|
|
14
14
|
class Buffer
|
15
15
|
def initialize
|
16
16
|
@state = :start
|
17
|
-
@
|
17
|
+
@buffer = []
|
18
18
|
@need = 0
|
19
19
|
end
|
20
20
|
|
@@ -29,6 +29,12 @@ module JSON
|
|
29
29
|
#
|
30
30
|
# Returns a UTF-8 encoded String.
|
31
31
|
def <<(data)
|
32
|
+
# Avoid state machine for complete UTF-8.
|
33
|
+
if @buffer.empty?
|
34
|
+
data.force_encoding(Encoding::UTF_8)
|
35
|
+
return data if data.valid_encoding?
|
36
|
+
end
|
37
|
+
|
32
38
|
bytes = []
|
33
39
|
data.each_byte do |byte|
|
34
40
|
case @state
|
@@ -37,7 +43,7 @@ module JSON
|
|
37
43
|
bytes << byte
|
38
44
|
elsif byte >= 192
|
39
45
|
@state = :multi_byte
|
40
|
-
@
|
46
|
+
@buffer << byte
|
41
47
|
@need =
|
42
48
|
case
|
43
49
|
when byte >= 240 then 4
|
@@ -49,9 +55,9 @@ module JSON
|
|
49
55
|
end
|
50
56
|
when :multi_byte
|
51
57
|
if byte > 127 && byte < 192
|
52
|
-
@
|
53
|
-
if @
|
54
|
-
bytes += @
|
58
|
+
@buffer << byte
|
59
|
+
if @buffer.size == @need
|
60
|
+
bytes += @buffer.slice!(0, @buffer.size)
|
55
61
|
@state = :start
|
56
62
|
end
|
57
63
|
else
|
@@ -59,8 +65,10 @@ module JSON
|
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
62
|
-
|
63
|
-
|
68
|
+
|
69
|
+
# Build UTF-8 encoded string from completed codepoints.
|
70
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
|
71
|
+
error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
|
64
72
|
end
|
65
73
|
end
|
66
74
|
|
@@ -82,7 +90,7 @@ module JSON
|
|
82
90
|
#
|
83
91
|
# Returns true if the buffer is empty.
|
84
92
|
def empty?
|
85
|
-
@
|
93
|
+
@buffer.empty?
|
86
94
|
end
|
87
95
|
|
88
96
|
private
|
data/lib/json/stream/builder.rb
CHANGED
@@ -38,13 +38,15 @@ module JSON
|
|
38
38
|
|
39
39
|
def end_object
|
40
40
|
return if @stack.size == 1
|
41
|
+
|
41
42
|
node = @stack.pop
|
43
|
+
top = @stack[-1]
|
42
44
|
|
43
|
-
case
|
45
|
+
case top
|
44
46
|
when Hash
|
45
|
-
|
47
|
+
top[@keys.pop] = node
|
46
48
|
when Array
|
47
|
-
|
49
|
+
top << node
|
48
50
|
end
|
49
51
|
end
|
50
52
|
alias :end_array :end_object
|
@@ -58,11 +60,12 @@ module JSON
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def value(value)
|
61
|
-
|
63
|
+
top = @stack[-1]
|
64
|
+
case top
|
62
65
|
when Hash
|
63
|
-
|
66
|
+
top[@keys.pop] = value
|
64
67
|
when Array
|
65
|
-
|
68
|
+
top << value
|
66
69
|
else
|
67
70
|
@stack << value
|
68
71
|
end
|
data/lib/json/stream/parser.rb
CHANGED
@@ -73,46 +73,6 @@ module JSON
|
|
73
73
|
stream.close
|
74
74
|
end
|
75
75
|
|
76
|
-
# Drain any remaining buffered characters into the parser to complete
|
77
|
-
# the parsing of the document.
|
78
|
-
#
|
79
|
-
# This is only required when parsing a document containing a single
|
80
|
-
# numeric value, integer or float. The parser has no other way to
|
81
|
-
# detect when it should no longer expect additional characters with
|
82
|
-
# which to complete the parse, so it must be signaled by a call to
|
83
|
-
# this method.
|
84
|
-
#
|
85
|
-
# If you're parsing more typical object or array documents, there's no
|
86
|
-
# need to call `finish` because the parse will complete when the final
|
87
|
-
# closing `]` or `}` character is scanned.
|
88
|
-
#
|
89
|
-
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
90
|
-
#
|
91
|
-
# Returns nothing.
|
92
|
-
def finish
|
93
|
-
# Partial multi-byte character waiting for completion bytes.
|
94
|
-
error('Unexpected end-of-file') unless @utf8.empty?
|
95
|
-
|
96
|
-
# Partial array, object, or string.
|
97
|
-
error('Unexpected end-of-file') unless @stack.empty?
|
98
|
-
|
99
|
-
case @state
|
100
|
-
when :end_document
|
101
|
-
# done, do nothing
|
102
|
-
when :in_float
|
103
|
-
end_value(@buf.to_f)
|
104
|
-
when :in_exponent
|
105
|
-
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
106
|
-
end_value(@buf.to_f)
|
107
|
-
when :start_zero
|
108
|
-
end_value(@buf.to_i)
|
109
|
-
when :start_int
|
110
|
-
end_value(@buf.to_i)
|
111
|
-
else
|
112
|
-
error('Unexpected end-of-file')
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
76
|
# Create a new parser with an optional initialization block where
|
117
77
|
# we can register event callbacks.
|
118
78
|
#
|
@@ -201,11 +161,11 @@ module JSON
|
|
201
161
|
start_value(ch)
|
202
162
|
when :start_object
|
203
163
|
case ch
|
204
|
-
when RIGHT_BRACE
|
205
|
-
end_container(:object)
|
206
164
|
when QUOTE
|
207
165
|
@state = :start_string
|
208
166
|
@stack.push(:key)
|
167
|
+
when RIGHT_BRACE
|
168
|
+
end_container(:object)
|
209
169
|
when WS
|
210
170
|
# ignore
|
211
171
|
else
|
@@ -260,7 +220,7 @@ module JSON
|
|
260
220
|
if @unicode.size == 4
|
261
221
|
codepoint = @unicode.slice!(0, 4).hex
|
262
222
|
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
263
|
-
error('Expected low surrogate pair half') if @stack.
|
223
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
|
264
224
|
@state = :start_surrogate_pair
|
265
225
|
@stack.push(codepoint)
|
266
226
|
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
@@ -402,17 +362,17 @@ module JSON
|
|
402
362
|
case ch
|
403
363
|
when COMMA
|
404
364
|
@state = :value_sep
|
405
|
-
when RIGHT_BRACKET
|
406
|
-
end_container(:array)
|
407
365
|
when RIGHT_BRACE
|
408
366
|
end_container(:object)
|
367
|
+
when RIGHT_BRACKET
|
368
|
+
end_container(:array)
|
409
369
|
when WS
|
410
370
|
# ignore
|
411
371
|
else
|
412
372
|
error('Expected comma or object or array close')
|
413
373
|
end
|
414
374
|
when :value_sep
|
415
|
-
if @stack
|
375
|
+
if @stack[-1] == :object
|
416
376
|
case ch
|
417
377
|
when QUOTE
|
418
378
|
@state = :start_string
|
@@ -431,6 +391,46 @@ module JSON
|
|
431
391
|
end
|
432
392
|
end
|
433
393
|
|
394
|
+
# Drain any remaining buffered characters into the parser to complete
|
395
|
+
# the parsing of the document.
|
396
|
+
#
|
397
|
+
# This is only required when parsing a document containing a single
|
398
|
+
# numeric value, integer or float. The parser has no other way to
|
399
|
+
# detect when it should no longer expect additional characters with
|
400
|
+
# which to complete the parse, so it must be signaled by a call to
|
401
|
+
# this method.
|
402
|
+
#
|
403
|
+
# If you're parsing more typical object or array documents, there's no
|
404
|
+
# need to call `finish` because the parse will complete when the final
|
405
|
+
# closing `]` or `}` character is scanned.
|
406
|
+
#
|
407
|
+
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
408
|
+
#
|
409
|
+
# Returns nothing.
|
410
|
+
def finish
|
411
|
+
# Partial multi-byte character waiting for completion bytes.
|
412
|
+
error('Unexpected end-of-file') unless @utf8.empty?
|
413
|
+
|
414
|
+
# Partial array, object, or string.
|
415
|
+
error('Unexpected end-of-file') unless @stack.empty?
|
416
|
+
|
417
|
+
case @state
|
418
|
+
when :end_document
|
419
|
+
# done, do nothing
|
420
|
+
when :in_float
|
421
|
+
end_value(@buf.to_f)
|
422
|
+
when :in_exponent
|
423
|
+
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
424
|
+
end_value(@buf.to_f)
|
425
|
+
when :start_zero
|
426
|
+
end_value(@buf.to_i)
|
427
|
+
when :start_int
|
428
|
+
end_value(@buf.to_i)
|
429
|
+
else
|
430
|
+
error('Unexpected end-of-file')
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
434
|
private
|
435
435
|
|
436
436
|
# Invoke all registered observer procs for the event type.
|
data/lib/json/stream/version.rb
CHANGED
data/spec/buffer_spec.rb
CHANGED
@@ -51,6 +51,13 @@ describe JSON::Stream::Buffer do
|
|
51
51
|
assert_equal "\u{10102}", subject << "\x82"
|
52
52
|
end
|
53
53
|
|
54
|
+
it 'rejects valid utf-8 followed by partial two byte sequence' do
|
55
|
+
assert_equal '[', subject << '['
|
56
|
+
assert_equal '"', subject << '"'
|
57
|
+
assert_equal '', subject << "\xC3"
|
58
|
+
-> { subject << '"' }.must_raise JSON::Stream::ParserError
|
59
|
+
end
|
60
|
+
|
54
61
|
it 'rejects invalid two byte start characters' do
|
55
62
|
-> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
|
56
63
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Graham
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|