json-stream 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/lib/json/stream/buffer.rb +16 -8
- data/lib/json/stream/builder.rb +9 -6
- data/lib/json/stream/parser.rb +46 -46
- data/lib/json/stream/version.rb +1 -1
- data/spec/buffer_spec.rb +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62af90ebe18d5c8a58ca9b75695bf2c403b4c4a0
|
4
|
+
data.tar.gz: d3176279af8156702e63ffdf4c963ab6f0e25cb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d757855dcd79878bb9d5cbdd74c05252bde3286259b88315e95b3302ce60bc24bbf2d6f6e0ab32a5507b55bbdeae3b63ab405368880cfb29b1981f3cb291146
|
7
|
+
data.tar.gz: 0a90cf028ed263a4dc571c4834e1b033d9c23b17aaaef36a7275d44e804a69c1c7c8090ea119411858608bfb45a8c6ada7b2e4e6c8862294ef54ef091b1030e0
|
data/README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
JSON::Stream is a JSON parser, based on a finite state machine, that generates
|
4
4
|
events for each state change. This allows streaming both the JSON document into
|
5
|
-
memory and the parsed object graph out of memory to some other process.
|
6
|
-
|
7
|
-
|
5
|
+
memory and the parsed object graph out of memory to some other process.
|
6
|
+
|
7
|
+
This is much like an XML SAX parser that generates events during parsing. There
|
8
|
+
is no requirement for the document, or the object graph, to be fully buffered in
|
8
9
|
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
10
|
For example, streaming and processing large map/reduce views from Apache
|
10
11
|
CouchDB.
|
data/lib/json/stream/buffer.rb
CHANGED
@@ -14,7 +14,7 @@ module JSON
|
|
14
14
|
class Buffer
|
15
15
|
def initialize
|
16
16
|
@state = :start
|
17
|
-
@
|
17
|
+
@buffer = []
|
18
18
|
@need = 0
|
19
19
|
end
|
20
20
|
|
@@ -29,6 +29,12 @@ module JSON
|
|
29
29
|
#
|
30
30
|
# Returns a UTF-8 encoded String.
|
31
31
|
def <<(data)
|
32
|
+
# Avoid state machine for complete UTF-8.
|
33
|
+
if @buffer.empty?
|
34
|
+
data.force_encoding(Encoding::UTF_8)
|
35
|
+
return data if data.valid_encoding?
|
36
|
+
end
|
37
|
+
|
32
38
|
bytes = []
|
33
39
|
data.each_byte do |byte|
|
34
40
|
case @state
|
@@ -37,7 +43,7 @@ module JSON
|
|
37
43
|
bytes << byte
|
38
44
|
elsif byte >= 192
|
39
45
|
@state = :multi_byte
|
40
|
-
@
|
46
|
+
@buffer << byte
|
41
47
|
@need =
|
42
48
|
case
|
43
49
|
when byte >= 240 then 4
|
@@ -49,9 +55,9 @@ module JSON
|
|
49
55
|
end
|
50
56
|
when :multi_byte
|
51
57
|
if byte > 127 && byte < 192
|
52
|
-
@
|
53
|
-
if @
|
54
|
-
bytes += @
|
58
|
+
@buffer << byte
|
59
|
+
if @buffer.size == @need
|
60
|
+
bytes += @buffer.slice!(0, @buffer.size)
|
55
61
|
@state = :start
|
56
62
|
end
|
57
63
|
else
|
@@ -59,8 +65,10 @@ module JSON
|
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
62
|
-
|
63
|
-
|
68
|
+
|
69
|
+
# Build UTF-8 encoded string from completed codepoints.
|
70
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
|
71
|
+
error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
|
64
72
|
end
|
65
73
|
end
|
66
74
|
|
@@ -82,7 +90,7 @@ module JSON
|
|
82
90
|
#
|
83
91
|
# Returns true if the buffer is empty.
|
84
92
|
def empty?
|
85
|
-
@
|
93
|
+
@buffer.empty?
|
86
94
|
end
|
87
95
|
|
88
96
|
private
|
data/lib/json/stream/builder.rb
CHANGED
@@ -38,13 +38,15 @@ module JSON
|
|
38
38
|
|
39
39
|
def end_object
|
40
40
|
return if @stack.size == 1
|
41
|
+
|
41
42
|
node = @stack.pop
|
43
|
+
top = @stack[-1]
|
42
44
|
|
43
|
-
case
|
45
|
+
case top
|
44
46
|
when Hash
|
45
|
-
|
47
|
+
top[@keys.pop] = node
|
46
48
|
when Array
|
47
|
-
|
49
|
+
top << node
|
48
50
|
end
|
49
51
|
end
|
50
52
|
alias :end_array :end_object
|
@@ -58,11 +60,12 @@ module JSON
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def value(value)
|
61
|
-
|
63
|
+
top = @stack[-1]
|
64
|
+
case top
|
62
65
|
when Hash
|
63
|
-
|
66
|
+
top[@keys.pop] = value
|
64
67
|
when Array
|
65
|
-
|
68
|
+
top << value
|
66
69
|
else
|
67
70
|
@stack << value
|
68
71
|
end
|
data/lib/json/stream/parser.rb
CHANGED
@@ -73,46 +73,6 @@ module JSON
|
|
73
73
|
stream.close
|
74
74
|
end
|
75
75
|
|
76
|
-
# Drain any remaining buffered characters into the parser to complete
|
77
|
-
# the parsing of the document.
|
78
|
-
#
|
79
|
-
# This is only required when parsing a document containing a single
|
80
|
-
# numeric value, integer or float. The parser has no other way to
|
81
|
-
# detect when it should no longer expect additional characters with
|
82
|
-
# which to complete the parse, so it must be signaled by a call to
|
83
|
-
# this method.
|
84
|
-
#
|
85
|
-
# If you're parsing more typical object or array documents, there's no
|
86
|
-
# need to call `finish` because the parse will complete when the final
|
87
|
-
# closing `]` or `}` character is scanned.
|
88
|
-
#
|
89
|
-
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
90
|
-
#
|
91
|
-
# Returns nothing.
|
92
|
-
def finish
|
93
|
-
# Partial multi-byte character waiting for completion bytes.
|
94
|
-
error('Unexpected end-of-file') unless @utf8.empty?
|
95
|
-
|
96
|
-
# Partial array, object, or string.
|
97
|
-
error('Unexpected end-of-file') unless @stack.empty?
|
98
|
-
|
99
|
-
case @state
|
100
|
-
when :end_document
|
101
|
-
# done, do nothing
|
102
|
-
when :in_float
|
103
|
-
end_value(@buf.to_f)
|
104
|
-
when :in_exponent
|
105
|
-
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
106
|
-
end_value(@buf.to_f)
|
107
|
-
when :start_zero
|
108
|
-
end_value(@buf.to_i)
|
109
|
-
when :start_int
|
110
|
-
end_value(@buf.to_i)
|
111
|
-
else
|
112
|
-
error('Unexpected end-of-file')
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
76
|
# Create a new parser with an optional initialization block where
|
117
77
|
# we can register event callbacks.
|
118
78
|
#
|
@@ -201,11 +161,11 @@ module JSON
|
|
201
161
|
start_value(ch)
|
202
162
|
when :start_object
|
203
163
|
case ch
|
204
|
-
when RIGHT_BRACE
|
205
|
-
end_container(:object)
|
206
164
|
when QUOTE
|
207
165
|
@state = :start_string
|
208
166
|
@stack.push(:key)
|
167
|
+
when RIGHT_BRACE
|
168
|
+
end_container(:object)
|
209
169
|
when WS
|
210
170
|
# ignore
|
211
171
|
else
|
@@ -260,7 +220,7 @@ module JSON
|
|
260
220
|
if @unicode.size == 4
|
261
221
|
codepoint = @unicode.slice!(0, 4).hex
|
262
222
|
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
263
|
-
error('Expected low surrogate pair half') if @stack.
|
223
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
|
264
224
|
@state = :start_surrogate_pair
|
265
225
|
@stack.push(codepoint)
|
266
226
|
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
@@ -402,17 +362,17 @@ module JSON
|
|
402
362
|
case ch
|
403
363
|
when COMMA
|
404
364
|
@state = :value_sep
|
405
|
-
when RIGHT_BRACKET
|
406
|
-
end_container(:array)
|
407
365
|
when RIGHT_BRACE
|
408
366
|
end_container(:object)
|
367
|
+
when RIGHT_BRACKET
|
368
|
+
end_container(:array)
|
409
369
|
when WS
|
410
370
|
# ignore
|
411
371
|
else
|
412
372
|
error('Expected comma or object or array close')
|
413
373
|
end
|
414
374
|
when :value_sep
|
415
|
-
if @stack
|
375
|
+
if @stack[-1] == :object
|
416
376
|
case ch
|
417
377
|
when QUOTE
|
418
378
|
@state = :start_string
|
@@ -431,6 +391,46 @@ module JSON
|
|
431
391
|
end
|
432
392
|
end
|
433
393
|
|
394
|
+
# Drain any remaining buffered characters into the parser to complete
|
395
|
+
# the parsing of the document.
|
396
|
+
#
|
397
|
+
# This is only required when parsing a document containing a single
|
398
|
+
# numeric value, integer or float. The parser has no other way to
|
399
|
+
# detect when it should no longer expect additional characters with
|
400
|
+
# which to complete the parse, so it must be signaled by a call to
|
401
|
+
# this method.
|
402
|
+
#
|
403
|
+
# If you're parsing more typical object or array documents, there's no
|
404
|
+
# need to call `finish` because the parse will complete when the final
|
405
|
+
# closing `]` or `}` character is scanned.
|
406
|
+
#
|
407
|
+
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
408
|
+
#
|
409
|
+
# Returns nothing.
|
410
|
+
def finish
|
411
|
+
# Partial multi-byte character waiting for completion bytes.
|
412
|
+
error('Unexpected end-of-file') unless @utf8.empty?
|
413
|
+
|
414
|
+
# Partial array, object, or string.
|
415
|
+
error('Unexpected end-of-file') unless @stack.empty?
|
416
|
+
|
417
|
+
case @state
|
418
|
+
when :end_document
|
419
|
+
# done, do nothing
|
420
|
+
when :in_float
|
421
|
+
end_value(@buf.to_f)
|
422
|
+
when :in_exponent
|
423
|
+
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
424
|
+
end_value(@buf.to_f)
|
425
|
+
when :start_zero
|
426
|
+
end_value(@buf.to_i)
|
427
|
+
when :start_int
|
428
|
+
end_value(@buf.to_i)
|
429
|
+
else
|
430
|
+
error('Unexpected end-of-file')
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
434
|
private
|
435
435
|
|
436
436
|
# Invoke all registered observer procs for the event type.
|
data/lib/json/stream/version.rb
CHANGED
data/spec/buffer_spec.rb
CHANGED
@@ -51,6 +51,13 @@ describe JSON::Stream::Buffer do
|
|
51
51
|
assert_equal "\u{10102}", subject << "\x82"
|
52
52
|
end
|
53
53
|
|
54
|
+
it 'rejects valid utf-8 followed by partial two byte sequence' do
|
55
|
+
assert_equal '[', subject << '['
|
56
|
+
assert_equal '"', subject << '"'
|
57
|
+
assert_equal '', subject << "\xC3"
|
58
|
+
-> { subject << '"' }.must_raise JSON::Stream::ParserError
|
59
|
+
end
|
60
|
+
|
54
61
|
it 'rejects invalid two byte start characters' do
|
55
62
|
-> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
|
56
63
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Graham
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|