json-stream 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +15 -11
- data/json-stream.gemspec +6 -4
- data/lib/json/stream/buffer.rb +16 -10
- data/lib/json/stream/builder.rb +9 -8
- data/lib/json/stream/parser.rb +51 -53
- data/lib/json/stream/version.rb +1 -3
- metadata +39 -19
- data/spec/buffer_spec.rb +0 -103
- data/spec/builder_spec.rb +0 -157
- data/spec/fixtures/repository.json +0 -107
- data/spec/parser_spec.rb +0 -913
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f1db3759d432092d3f1cac4dd90fe23551f14cd33bff1fa209a4f32cb2399025
|
4
|
+
data.tar.gz: 5c9532adaae5591585b756686463e93e24beb371d7a78e15cf45ffe09e6b4247
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ac6519d888a81802453b964bc36db7639728c83ad67604516045fd6756c64c34579caa16050f1df32f1b9c8e6b4723433d81db8b2c63f59a2b846d9ab00eeb5
|
7
|
+
data.tar.gz: 7079d8a588958e49c016cc051f27b0ce6f24a902bbe3759702d50da1329815ae1ef929d41968ad24132eb748830062070cdf58b47261d05f4528caed7ad465da
|
data/Gemfile
ADDED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
JSON::Stream is a JSON parser, based on a finite state machine, that generates
|
4
4
|
events for each state change. This allows streaming both the JSON document into
|
5
|
-
memory and the parsed object graph out of memory to some other process.
|
6
|
-
|
7
|
-
|
5
|
+
memory and the parsed object graph out of memory to some other process.
|
6
|
+
|
7
|
+
This is much like an XML SAX parser that generates events during parsing. There
|
8
|
+
is no requirement for the document, or the object graph, to be fully buffered in
|
8
9
|
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
10
|
For example, streaming and processing large map/reduce views from Apache
|
10
11
|
CouchDB.
|
@@ -39,7 +40,7 @@ Again, while JSON::Stream can be used this way, if we just need to stream the
|
|
39
40
|
document from disk or the network, we're better off using the yajl-ruby gem.
|
40
41
|
|
41
42
|
Huge documents arriving over the network in small chunks to an EventMachine
|
42
|
-
receive_data loop is where JSON::Stream is really useful. Inside an
|
43
|
+
`receive_data` loop is where JSON::Stream is really useful. Inside an
|
43
44
|
EventMachine::Connection subclass we might have:
|
44
45
|
|
45
46
|
```ruby
|
@@ -51,8 +52,8 @@ def post_init
|
|
51
52
|
end_object { puts "end object" }
|
52
53
|
start_array { puts "start array" }
|
53
54
|
end_array { puts "end array" }
|
54
|
-
key {|k| puts "key: #{k}" }
|
55
|
-
value {|v| puts "value: #{v}" }
|
55
|
+
key { |k| puts "key: #{k}" }
|
56
|
+
value { |v| puts "value: #{v}" }
|
56
57
|
end
|
57
58
|
end
|
58
59
|
|
@@ -76,16 +77,19 @@ imagine the callbacks looking for an array named `rows` and processing sets
|
|
76
77
|
of these row objects in small batches. Millions of rows, streaming over the
|
77
78
|
network, can be processed in constant memory space this way.
|
78
79
|
|
79
|
-
## Dependencies
|
80
|
-
|
81
|
-
* ruby >= 1.9.2
|
82
|
-
* jruby >= 1.7
|
83
|
-
|
84
80
|
## Alternatives
|
85
81
|
|
86
82
|
* [json](https://github.com/flori/json)
|
87
83
|
* [yajl-ruby](https://github.com/brianmario/yajl-ruby)
|
88
84
|
* [yajl-ffi](https://github.com/dgraham/yajl-ffi)
|
85
|
+
* [application/json-seq](http://www.rfc-editor.org/rfc/rfc7464.txt)
|
86
|
+
|
87
|
+
## Development
|
88
|
+
|
89
|
+
```
|
90
|
+
$ bin/setup
|
91
|
+
$ bin/rake test
|
92
|
+
```
|
89
93
|
|
90
94
|
## License
|
91
95
|
|
data/json-stream.gemspec
CHANGED
@@ -11,10 +11,12 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.homepage = 'http://dgraham.github.io/json-stream/'
|
12
12
|
s.license = 'MIT'
|
13
13
|
|
14
|
-
s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*']
|
15
|
-
s.test_files = Dir['spec/**/*']
|
14
|
+
s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*'] - ['Gemfile.lock']
|
16
15
|
s.require_path = 'lib'
|
17
16
|
|
18
|
-
s.add_development_dependency '
|
19
|
-
s.
|
17
|
+
s.add_development_dependency 'bundler', '~> 2.1'
|
18
|
+
s.add_development_dependency 'minitest', '~> 5.10'
|
19
|
+
s.add_development_dependency 'rake', '~> 12.1'
|
20
|
+
|
21
|
+
s.required_ruby_version = '>= 2.0.0'
|
20
22
|
end
|
data/lib/json/stream/buffer.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# A character buffer that expects a UTF-8 encoded stream of bytes.
|
@@ -14,7 +12,7 @@ module JSON
|
|
14
12
|
class Buffer
|
15
13
|
def initialize
|
16
14
|
@state = :start
|
17
|
-
@
|
15
|
+
@buffer = []
|
18
16
|
@need = 0
|
19
17
|
end
|
20
18
|
|
@@ -29,6 +27,12 @@ module JSON
|
|
29
27
|
#
|
30
28
|
# Returns a UTF-8 encoded String.
|
31
29
|
def <<(data)
|
30
|
+
# Avoid state machine for complete UTF-8.
|
31
|
+
if @buffer.empty?
|
32
|
+
data.force_encoding(Encoding::UTF_8)
|
33
|
+
return data if data.valid_encoding?
|
34
|
+
end
|
35
|
+
|
32
36
|
bytes = []
|
33
37
|
data.each_byte do |byte|
|
34
38
|
case @state
|
@@ -37,7 +41,7 @@ module JSON
|
|
37
41
|
bytes << byte
|
38
42
|
elsif byte >= 192
|
39
43
|
@state = :multi_byte
|
40
|
-
@
|
44
|
+
@buffer << byte
|
41
45
|
@need =
|
42
46
|
case
|
43
47
|
when byte >= 240 then 4
|
@@ -49,9 +53,9 @@ module JSON
|
|
49
53
|
end
|
50
54
|
when :multi_byte
|
51
55
|
if byte > 127 && byte < 192
|
52
|
-
@
|
53
|
-
if @
|
54
|
-
bytes += @
|
56
|
+
@buffer << byte
|
57
|
+
if @buffer.size == @need
|
58
|
+
bytes += @buffer.slice!(0, @buffer.size)
|
55
59
|
@state = :start
|
56
60
|
end
|
57
61
|
else
|
@@ -59,8 +63,10 @@ module JSON
|
|
59
63
|
end
|
60
64
|
end
|
61
65
|
end
|
62
|
-
|
63
|
-
|
66
|
+
|
67
|
+
# Build UTF-8 encoded string from completed codepoints.
|
68
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
|
69
|
+
error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
|
64
70
|
end
|
65
71
|
end
|
66
72
|
|
@@ -82,7 +88,7 @@ module JSON
|
|
82
88
|
#
|
83
89
|
# Returns true if the buffer is empty.
|
84
90
|
def empty?
|
85
|
-
@
|
91
|
+
@buffer.empty?
|
86
92
|
end
|
87
93
|
|
88
94
|
private
|
data/lib/json/stream/builder.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# A parser listener that builds a full, in memory, object from a JSON
|
@@ -38,13 +36,15 @@ module JSON
|
|
38
36
|
|
39
37
|
def end_object
|
40
38
|
return if @stack.size == 1
|
39
|
+
|
41
40
|
node = @stack.pop
|
41
|
+
top = @stack[-1]
|
42
42
|
|
43
|
-
case
|
43
|
+
case top
|
44
44
|
when Hash
|
45
|
-
|
45
|
+
top[@keys.pop] = node
|
46
46
|
when Array
|
47
|
-
|
47
|
+
top << node
|
48
48
|
end
|
49
49
|
end
|
50
50
|
alias :end_array :end_object
|
@@ -58,11 +58,12 @@ module JSON
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def value(value)
|
61
|
-
|
61
|
+
top = @stack[-1]
|
62
|
+
case top
|
62
63
|
when Hash
|
63
|
-
|
64
|
+
top[@keys.pop] = value
|
64
65
|
when Array
|
65
|
-
|
66
|
+
top << value
|
66
67
|
else
|
67
68
|
@stack << value
|
68
69
|
end
|
data/lib/json/stream/parser.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# Raised on any invalid JSON text.
|
@@ -12,8 +10,8 @@ module JSON
|
|
12
10
|
# Examples
|
13
11
|
#
|
14
12
|
# parser = JSON::Stream::Parser.new
|
15
|
-
# parser.key {|key| puts key }
|
16
|
-
# parser.value {|value| puts value }
|
13
|
+
# parser.key { |key| puts key }
|
14
|
+
# parser.value { |value| puts value }
|
17
15
|
# parser << '{"answer":'
|
18
16
|
# parser << ' 42}'
|
19
17
|
class Parser
|
@@ -73,46 +71,6 @@ module JSON
|
|
73
71
|
stream.close
|
74
72
|
end
|
75
73
|
|
76
|
-
# Drain any remaining buffered characters into the parser to complete
|
77
|
-
# the parsing of the document.
|
78
|
-
#
|
79
|
-
# This is only required when parsing a document containing a single
|
80
|
-
# numeric value, integer or float. The parser has no other way to
|
81
|
-
# detect when it should no longer expect additional characters with
|
82
|
-
# which to complete the parse, so it must be signaled by a call to
|
83
|
-
# this method.
|
84
|
-
#
|
85
|
-
# If you're parsing more typical object or array documents, there's no
|
86
|
-
# need to call `finish` because the parse will complete when the final
|
87
|
-
# closing `]` or `}` character is scanned.
|
88
|
-
#
|
89
|
-
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
90
|
-
#
|
91
|
-
# Returns nothing.
|
92
|
-
def finish
|
93
|
-
# Partial multi-byte character waiting for completion bytes.
|
94
|
-
error('Unexpected end-of-file') unless @utf8.empty?
|
95
|
-
|
96
|
-
# Partial array, object, or string.
|
97
|
-
error('Unexpected end-of-file') unless @stack.empty?
|
98
|
-
|
99
|
-
case @state
|
100
|
-
when :end_document
|
101
|
-
# done, do nothing
|
102
|
-
when :in_float
|
103
|
-
end_value(@buf.to_f)
|
104
|
-
when :in_exponent
|
105
|
-
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
106
|
-
end_value(@buf.to_f)
|
107
|
-
when :start_zero
|
108
|
-
end_value(@buf.to_i)
|
109
|
-
when :start_int
|
110
|
-
end_value(@buf.to_i)
|
111
|
-
else
|
112
|
-
error('Unexpected end-of-file')
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
74
|
# Create a new parser with an optional initialization block where
|
117
75
|
# we can register event callbacks.
|
118
76
|
#
|
@@ -125,8 +83,8 @@ module JSON
|
|
125
83
|
# end_object { puts "end object" }
|
126
84
|
# start_array { puts "start array" }
|
127
85
|
# end_array { puts "end array" }
|
128
|
-
# key {|k| puts "key: #{k}" }
|
129
|
-
# value {|v| puts "value: #{v}" }
|
86
|
+
# key { |k| puts "key: #{k}" }
|
87
|
+
# value { |v| puts "value: #{v}" }
|
130
88
|
# end
|
131
89
|
def initialize(&block)
|
132
90
|
@state = :start_document
|
@@ -201,11 +159,11 @@ module JSON
|
|
201
159
|
start_value(ch)
|
202
160
|
when :start_object
|
203
161
|
case ch
|
204
|
-
when RIGHT_BRACE
|
205
|
-
end_container(:object)
|
206
162
|
when QUOTE
|
207
163
|
@state = :start_string
|
208
164
|
@stack.push(:key)
|
165
|
+
when RIGHT_BRACE
|
166
|
+
end_container(:object)
|
209
167
|
when WS
|
210
168
|
# ignore
|
211
169
|
else
|
@@ -260,12 +218,12 @@ module JSON
|
|
260
218
|
if @unicode.size == 4
|
261
219
|
codepoint = @unicode.slice!(0, 4).hex
|
262
220
|
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
263
|
-
error('Expected low surrogate pair half') if @stack.
|
221
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Integer)
|
264
222
|
@state = :start_surrogate_pair
|
265
223
|
@stack.push(codepoint)
|
266
224
|
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
267
225
|
high = @stack.pop
|
268
|
-
error('Expected high surrogate pair half') unless high.is_a?(
|
226
|
+
error('Expected high surrogate pair half') unless high.is_a?(Integer)
|
269
227
|
pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
|
270
228
|
@buf << pair
|
271
229
|
@state = :start_string
|
@@ -402,17 +360,17 @@ module JSON
|
|
402
360
|
case ch
|
403
361
|
when COMMA
|
404
362
|
@state = :value_sep
|
405
|
-
when RIGHT_BRACKET
|
406
|
-
end_container(:array)
|
407
363
|
when RIGHT_BRACE
|
408
364
|
end_container(:object)
|
365
|
+
when RIGHT_BRACKET
|
366
|
+
end_container(:array)
|
409
367
|
when WS
|
410
368
|
# ignore
|
411
369
|
else
|
412
370
|
error('Expected comma or object or array close')
|
413
371
|
end
|
414
372
|
when :value_sep
|
415
|
-
if @stack
|
373
|
+
if @stack[-1] == :object
|
416
374
|
case ch
|
417
375
|
when QUOTE
|
418
376
|
@state = :start_string
|
@@ -431,6 +389,46 @@ module JSON
|
|
431
389
|
end
|
432
390
|
end
|
433
391
|
|
392
|
+
# Drain any remaining buffered characters into the parser to complete
|
393
|
+
# the parsing of the document.
|
394
|
+
#
|
395
|
+
# This is only required when parsing a document containing a single
|
396
|
+
# numeric value, integer or float. The parser has no other way to
|
397
|
+
# detect when it should no longer expect additional characters with
|
398
|
+
# which to complete the parse, so it must be signaled by a call to
|
399
|
+
# this method.
|
400
|
+
#
|
401
|
+
# If you're parsing more typical object or array documents, there's no
|
402
|
+
# need to call `finish` because the parse will complete when the final
|
403
|
+
# closing `]` or `}` character is scanned.
|
404
|
+
#
|
405
|
+
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
406
|
+
#
|
407
|
+
# Returns nothing.
|
408
|
+
def finish
|
409
|
+
# Partial multi-byte character waiting for completion bytes.
|
410
|
+
error('Unexpected end-of-file') unless @utf8.empty?
|
411
|
+
|
412
|
+
# Partial array, object, or string.
|
413
|
+
error('Unexpected end-of-file') unless @stack.empty?
|
414
|
+
|
415
|
+
case @state
|
416
|
+
when :end_document
|
417
|
+
# done, do nothing
|
418
|
+
when :in_float
|
419
|
+
end_value(@buf.to_f)
|
420
|
+
when :in_exponent
|
421
|
+
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
422
|
+
end_value(@buf.to_f)
|
423
|
+
when :start_zero
|
424
|
+
end_value(@buf.to_i)
|
425
|
+
when :start_int
|
426
|
+
end_value(@buf.to_i)
|
427
|
+
else
|
428
|
+
error('Unexpected end-of-file')
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
434
432
|
private
|
435
433
|
|
436
434
|
# Invoke all registered observer procs for the event type.
|
data/lib/json/stream/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Graham
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.1'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.10'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.10'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: rake
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
16
44
|
requirements:
|
17
45
|
- - "~>"
|
18
46
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
47
|
+
version: '12.1'
|
20
48
|
type: :development
|
21
49
|
prerelease: false
|
22
50
|
version_requirements: !ruby/object:Gem::Requirement
|
23
51
|
requirements:
|
24
52
|
- - "~>"
|
25
53
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
54
|
+
version: '12.1'
|
27
55
|
description: A parser best suited for huge JSON documents that don't fit in memory.
|
28
56
|
email:
|
29
57
|
- david.malcom.graham@gmail.com
|
@@ -31,6 +59,7 @@ executables: []
|
|
31
59
|
extensions: []
|
32
60
|
extra_rdoc_files: []
|
33
61
|
files:
|
62
|
+
- Gemfile
|
34
63
|
- LICENSE
|
35
64
|
- README.md
|
36
65
|
- Rakefile
|
@@ -40,15 +69,11 @@ files:
|
|
40
69
|
- lib/json/stream/builder.rb
|
41
70
|
- lib/json/stream/parser.rb
|
42
71
|
- lib/json/stream/version.rb
|
43
|
-
- spec/buffer_spec.rb
|
44
|
-
- spec/builder_spec.rb
|
45
|
-
- spec/fixtures/repository.json
|
46
|
-
- spec/parser_spec.rb
|
47
72
|
homepage: http://dgraham.github.io/json-stream/
|
48
73
|
licenses:
|
49
74
|
- MIT
|
50
75
|
metadata: {}
|
51
|
-
post_install_message:
|
76
|
+
post_install_message:
|
52
77
|
rdoc_options: []
|
53
78
|
require_paths:
|
54
79
|
- lib
|
@@ -56,20 +81,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
81
|
requirements:
|
57
82
|
- - ">="
|
58
83
|
- !ruby/object:Gem::Version
|
59
|
-
version:
|
84
|
+
version: 2.0.0
|
60
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
86
|
requirements:
|
62
87
|
- - ">="
|
63
88
|
- !ruby/object:Gem::Version
|
64
89
|
version: '0'
|
65
90
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
signing_key:
|
91
|
+
rubygems_version: 3.5.4
|
92
|
+
signing_key:
|
69
93
|
specification_version: 4
|
70
94
|
summary: A streaming JSON parser that generates SAX-like events.
|
71
|
-
test_files:
|
72
|
-
- spec/buffer_spec.rb
|
73
|
-
- spec/builder_spec.rb
|
74
|
-
- spec/fixtures/repository.json
|
75
|
-
- spec/parser_spec.rb
|
95
|
+
test_files: []
|
data/spec/buffer_spec.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'json/stream'
|
4
|
-
require 'minitest/autorun'
|
5
|
-
|
6
|
-
describe JSON::Stream::Buffer do
|
7
|
-
subject { JSON::Stream::Buffer.new }
|
8
|
-
|
9
|
-
it 'accepts single byte characters' do
|
10
|
-
assert_equal "", subject << ""
|
11
|
-
assert_equal "abc", subject << "abc"
|
12
|
-
assert_equal "\u0000abc", subject << "\u0000abc"
|
13
|
-
end
|
14
|
-
|
15
|
-
# The é character can be a single codepoint \u00e9 or two codepoints
|
16
|
-
# \u0065\u0301. The first is encoded in 2 bytes, the second in 3 bytes.
|
17
|
-
# The json and yajl-ruby gems and CouchDB do not normalize unicode text
|
18
|
-
# so neither will we. Although, a good way to normalize is by calling
|
19
|
-
# ActiveSupport::Multibyte::Chars.new("é").normalize(:c).
|
20
|
-
it 'accepts combined characters' do
|
21
|
-
assert_equal "\u0065\u0301", subject << "\u0065\u0301"
|
22
|
-
assert_equal 3, (subject << "\u0065\u0301").bytesize
|
23
|
-
assert_equal 2, (subject << "\u0065\u0301").size
|
24
|
-
|
25
|
-
assert_equal "\u00e9", subject << "\u00e9"
|
26
|
-
assert_equal 2, (subject << "\u00e9").bytesize
|
27
|
-
assert_equal 1, (subject << "\u00e9").size
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'accepts valid two byte characters' do
|
31
|
-
assert_equal "abcé", subject << "abcé"
|
32
|
-
assert_equal "a", subject << "a\xC3"
|
33
|
-
assert_equal "é", subject << "\xA9"
|
34
|
-
assert_equal "", subject << "\xC3"
|
35
|
-
assert_equal "é", subject << "\xA9"
|
36
|
-
assert_equal "é", subject << "\xC3\xA9"
|
37
|
-
end
|
38
|
-
|
39
|
-
it 'accepts valid three byte characters' do
|
40
|
-
assert_equal "abcé\u2603", subject << "abcé\u2603"
|
41
|
-
assert_equal "a", subject << "a\xE2"
|
42
|
-
assert_equal "", subject << "\x98"
|
43
|
-
assert_equal "\u2603", subject << "\x83"
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'accepts valid four byte characters' do
|
47
|
-
assert_equal "abcé\u2603\u{10102}é", subject << "abcé\u2603\u{10102}é"
|
48
|
-
assert_equal "a", subject << "a\xF0"
|
49
|
-
assert_equal "", subject << "\x90"
|
50
|
-
assert_equal "", subject << "\x84"
|
51
|
-
assert_equal "\u{10102}", subject << "\x82"
|
52
|
-
end
|
53
|
-
|
54
|
-
it 'rejects invalid two byte start characters' do
|
55
|
-
-> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'rejects invalid three byte start characters' do
|
59
|
-
-> { subject << "\xE2\xE2" }.must_raise JSON::Stream::ParserError
|
60
|
-
end
|
61
|
-
|
62
|
-
it 'rejects invalid four byte start characters' do
|
63
|
-
-> { subject << "\xF0\xF0" }.must_raise JSON::Stream::ParserError
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'rejects a two byte start with single byte continuation character' do
|
67
|
-
-> { subject << "\xC3\u0000" }.must_raise JSON::Stream::ParserError
|
68
|
-
end
|
69
|
-
|
70
|
-
it 'rejects a three byte start with single byte continuation character' do
|
71
|
-
-> { subject << "\xE2\u0010" }.must_raise JSON::Stream::ParserError
|
72
|
-
end
|
73
|
-
|
74
|
-
it 'rejects a four byte start with single byte continuation character' do
|
75
|
-
-> { subject << "\xF0a" }.must_raise JSON::Stream::ParserError
|
76
|
-
end
|
77
|
-
|
78
|
-
it 'rejects an invalid continuation character' do
|
79
|
-
-> { subject << "\xA9" }.must_raise JSON::Stream::ParserError
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'rejects an overlong form' do
|
83
|
-
-> { subject << "\xC0\x80" }.must_raise JSON::Stream::ParserError
|
84
|
-
end
|
85
|
-
|
86
|
-
describe 'checking for empty buffers' do
|
87
|
-
it 'is initially empty' do
|
88
|
-
assert subject.empty?
|
89
|
-
end
|
90
|
-
|
91
|
-
it 'is empty after processing complete characters' do
|
92
|
-
subject << 'test'
|
93
|
-
assert subject.empty?
|
94
|
-
end
|
95
|
-
|
96
|
-
it 'is not empty after processing partial multi-byte characters' do
|
97
|
-
subject << "\xC3"
|
98
|
-
refute subject.empty?
|
99
|
-
subject << "\xA9"
|
100
|
-
assert subject.empty?
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|