json-stream 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +15 -11
- data/json-stream.gemspec +6 -4
- data/lib/json/stream/buffer.rb +16 -10
- data/lib/json/stream/builder.rb +9 -8
- data/lib/json/stream/parser.rb +51 -53
- data/lib/json/stream/version.rb +1 -3
- metadata +39 -19
- data/spec/buffer_spec.rb +0 -103
- data/spec/builder_spec.rb +0 -157
- data/spec/fixtures/repository.json +0 -107
- data/spec/parser_spec.rb +0 -913
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f1db3759d432092d3f1cac4dd90fe23551f14cd33bff1fa209a4f32cb2399025
|
4
|
+
data.tar.gz: 5c9532adaae5591585b756686463e93e24beb371d7a78e15cf45ffe09e6b4247
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ac6519d888a81802453b964bc36db7639728c83ad67604516045fd6756c64c34579caa16050f1df32f1b9c8e6b4723433d81db8b2c63f59a2b846d9ab00eeb5
|
7
|
+
data.tar.gz: 7079d8a588958e49c016cc051f27b0ce6f24a902bbe3759702d50da1329815ae1ef929d41968ad24132eb748830062070cdf58b47261d05f4528caed7ad465da
|
data/Gemfile
ADDED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
JSON::Stream is a JSON parser, based on a finite state machine, that generates
|
4
4
|
events for each state change. This allows streaming both the JSON document into
|
5
|
-
memory and the parsed object graph out of memory to some other process.
|
6
|
-
|
7
|
-
|
5
|
+
memory and the parsed object graph out of memory to some other process.
|
6
|
+
|
7
|
+
This is much like an XML SAX parser that generates events during parsing. There
|
8
|
+
is no requirement for the document, or the object graph, to be fully buffered in
|
8
9
|
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
10
|
For example, streaming and processing large map/reduce views from Apache
|
10
11
|
CouchDB.
|
@@ -39,7 +40,7 @@ Again, while JSON::Stream can be used this way, if we just need to stream the
|
|
39
40
|
document from disk or the network, we're better off using the yajl-ruby gem.
|
40
41
|
|
41
42
|
Huge documents arriving over the network in small chunks to an EventMachine
|
42
|
-
receive_data loop is where JSON::Stream is really useful. Inside an
|
43
|
+
`receive_data` loop is where JSON::Stream is really useful. Inside an
|
43
44
|
EventMachine::Connection subclass we might have:
|
44
45
|
|
45
46
|
```ruby
|
@@ -51,8 +52,8 @@ def post_init
|
|
51
52
|
end_object { puts "end object" }
|
52
53
|
start_array { puts "start array" }
|
53
54
|
end_array { puts "end array" }
|
54
|
-
key {|k| puts "key: #{k}" }
|
55
|
-
value {|v| puts "value: #{v}" }
|
55
|
+
key { |k| puts "key: #{k}" }
|
56
|
+
value { |v| puts "value: #{v}" }
|
56
57
|
end
|
57
58
|
end
|
58
59
|
|
@@ -76,16 +77,19 @@ imagine the callbacks looking for an array named `rows` and processing sets
|
|
76
77
|
of these row objects in small batches. Millions of rows, streaming over the
|
77
78
|
network, can be processed in constant memory space this way.
|
78
79
|
|
79
|
-
## Dependencies
|
80
|
-
|
81
|
-
* ruby >= 1.9.2
|
82
|
-
* jruby >= 1.7
|
83
|
-
|
84
80
|
## Alternatives
|
85
81
|
|
86
82
|
* [json](https://github.com/flori/json)
|
87
83
|
* [yajl-ruby](https://github.com/brianmario/yajl-ruby)
|
88
84
|
* [yajl-ffi](https://github.com/dgraham/yajl-ffi)
|
85
|
+
* [application/json-seq](http://www.rfc-editor.org/rfc/rfc7464.txt)
|
86
|
+
|
87
|
+
## Development
|
88
|
+
|
89
|
+
```
|
90
|
+
$ bin/setup
|
91
|
+
$ bin/rake test
|
92
|
+
```
|
89
93
|
|
90
94
|
## License
|
91
95
|
|
data/json-stream.gemspec
CHANGED
@@ -11,10 +11,12 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.homepage = 'http://dgraham.github.io/json-stream/'
|
12
12
|
s.license = 'MIT'
|
13
13
|
|
14
|
-
s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*']
|
15
|
-
s.test_files = Dir['spec/**/*']
|
14
|
+
s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*'] - ['Gemfile.lock']
|
16
15
|
s.require_path = 'lib'
|
17
16
|
|
18
|
-
s.add_development_dependency '
|
19
|
-
s.
|
17
|
+
s.add_development_dependency 'bundler', '~> 2.1'
|
18
|
+
s.add_development_dependency 'minitest', '~> 5.10'
|
19
|
+
s.add_development_dependency 'rake', '~> 12.1'
|
20
|
+
|
21
|
+
s.required_ruby_version = '>= 2.0.0'
|
20
22
|
end
|
data/lib/json/stream/buffer.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# A character buffer that expects a UTF-8 encoded stream of bytes.
|
@@ -14,7 +12,7 @@ module JSON
|
|
14
12
|
class Buffer
|
15
13
|
def initialize
|
16
14
|
@state = :start
|
17
|
-
@
|
15
|
+
@buffer = []
|
18
16
|
@need = 0
|
19
17
|
end
|
20
18
|
|
@@ -29,6 +27,12 @@ module JSON
|
|
29
27
|
#
|
30
28
|
# Returns a UTF-8 encoded String.
|
31
29
|
def <<(data)
|
30
|
+
# Avoid state machine for complete UTF-8.
|
31
|
+
if @buffer.empty?
|
32
|
+
data.force_encoding(Encoding::UTF_8)
|
33
|
+
return data if data.valid_encoding?
|
34
|
+
end
|
35
|
+
|
32
36
|
bytes = []
|
33
37
|
data.each_byte do |byte|
|
34
38
|
case @state
|
@@ -37,7 +41,7 @@ module JSON
|
|
37
41
|
bytes << byte
|
38
42
|
elsif byte >= 192
|
39
43
|
@state = :multi_byte
|
40
|
-
@
|
44
|
+
@buffer << byte
|
41
45
|
@need =
|
42
46
|
case
|
43
47
|
when byte >= 240 then 4
|
@@ -49,9 +53,9 @@ module JSON
|
|
49
53
|
end
|
50
54
|
when :multi_byte
|
51
55
|
if byte > 127 && byte < 192
|
52
|
-
@
|
53
|
-
if @
|
54
|
-
bytes += @
|
56
|
+
@buffer << byte
|
57
|
+
if @buffer.size == @need
|
58
|
+
bytes += @buffer.slice!(0, @buffer.size)
|
55
59
|
@state = :start
|
56
60
|
end
|
57
61
|
else
|
@@ -59,8 +63,10 @@ module JSON
|
|
59
63
|
end
|
60
64
|
end
|
61
65
|
end
|
62
|
-
|
63
|
-
|
66
|
+
|
67
|
+
# Build UTF-8 encoded string from completed codepoints.
|
68
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
|
69
|
+
error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
|
64
70
|
end
|
65
71
|
end
|
66
72
|
|
@@ -82,7 +88,7 @@ module JSON
|
|
82
88
|
#
|
83
89
|
# Returns true if the buffer is empty.
|
84
90
|
def empty?
|
85
|
-
@
|
91
|
+
@buffer.empty?
|
86
92
|
end
|
87
93
|
|
88
94
|
private
|
data/lib/json/stream/builder.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# A parser listener that builds a full, in memory, object from a JSON
|
@@ -38,13 +36,15 @@ module JSON
|
|
38
36
|
|
39
37
|
def end_object
|
40
38
|
return if @stack.size == 1
|
39
|
+
|
41
40
|
node = @stack.pop
|
41
|
+
top = @stack[-1]
|
42
42
|
|
43
|
-
case
|
43
|
+
case top
|
44
44
|
when Hash
|
45
|
-
|
45
|
+
top[@keys.pop] = node
|
46
46
|
when Array
|
47
|
-
|
47
|
+
top << node
|
48
48
|
end
|
49
49
|
end
|
50
50
|
alias :end_array :end_object
|
@@ -58,11 +58,12 @@ module JSON
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def value(value)
|
61
|
-
|
61
|
+
top = @stack[-1]
|
62
|
+
case top
|
62
63
|
when Hash
|
63
|
-
|
64
|
+
top[@keys.pop] = value
|
64
65
|
when Array
|
65
|
-
|
66
|
+
top << value
|
66
67
|
else
|
67
68
|
@stack << value
|
68
69
|
end
|
data/lib/json/stream/parser.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
1
|
module JSON
|
4
2
|
module Stream
|
5
3
|
# Raised on any invalid JSON text.
|
@@ -12,8 +10,8 @@ module JSON
|
|
12
10
|
# Examples
|
13
11
|
#
|
14
12
|
# parser = JSON::Stream::Parser.new
|
15
|
-
# parser.key {|key| puts key }
|
16
|
-
# parser.value {|value| puts value }
|
13
|
+
# parser.key { |key| puts key }
|
14
|
+
# parser.value { |value| puts value }
|
17
15
|
# parser << '{"answer":'
|
18
16
|
# parser << ' 42}'
|
19
17
|
class Parser
|
@@ -73,46 +71,6 @@ module JSON
|
|
73
71
|
stream.close
|
74
72
|
end
|
75
73
|
|
76
|
-
# Drain any remaining buffered characters into the parser to complete
|
77
|
-
# the parsing of the document.
|
78
|
-
#
|
79
|
-
# This is only required when parsing a document containing a single
|
80
|
-
# numeric value, integer or float. The parser has no other way to
|
81
|
-
# detect when it should no longer expect additional characters with
|
82
|
-
# which to complete the parse, so it must be signaled by a call to
|
83
|
-
# this method.
|
84
|
-
#
|
85
|
-
# If you're parsing more typical object or array documents, there's no
|
86
|
-
# need to call `finish` because the parse will complete when the final
|
87
|
-
# closing `]` or `}` character is scanned.
|
88
|
-
#
|
89
|
-
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
90
|
-
#
|
91
|
-
# Returns nothing.
|
92
|
-
def finish
|
93
|
-
# Partial multi-byte character waiting for completion bytes.
|
94
|
-
error('Unexpected end-of-file') unless @utf8.empty?
|
95
|
-
|
96
|
-
# Partial array, object, or string.
|
97
|
-
error('Unexpected end-of-file') unless @stack.empty?
|
98
|
-
|
99
|
-
case @state
|
100
|
-
when :end_document
|
101
|
-
# done, do nothing
|
102
|
-
when :in_float
|
103
|
-
end_value(@buf.to_f)
|
104
|
-
when :in_exponent
|
105
|
-
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
106
|
-
end_value(@buf.to_f)
|
107
|
-
when :start_zero
|
108
|
-
end_value(@buf.to_i)
|
109
|
-
when :start_int
|
110
|
-
end_value(@buf.to_i)
|
111
|
-
else
|
112
|
-
error('Unexpected end-of-file')
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
74
|
# Create a new parser with an optional initialization block where
|
117
75
|
# we can register event callbacks.
|
118
76
|
#
|
@@ -125,8 +83,8 @@ module JSON
|
|
125
83
|
# end_object { puts "end object" }
|
126
84
|
# start_array { puts "start array" }
|
127
85
|
# end_array { puts "end array" }
|
128
|
-
# key {|k| puts "key: #{k}" }
|
129
|
-
# value {|v| puts "value: #{v}" }
|
86
|
+
# key { |k| puts "key: #{k}" }
|
87
|
+
# value { |v| puts "value: #{v}" }
|
130
88
|
# end
|
131
89
|
def initialize(&block)
|
132
90
|
@state = :start_document
|
@@ -201,11 +159,11 @@ module JSON
|
|
201
159
|
start_value(ch)
|
202
160
|
when :start_object
|
203
161
|
case ch
|
204
|
-
when RIGHT_BRACE
|
205
|
-
end_container(:object)
|
206
162
|
when QUOTE
|
207
163
|
@state = :start_string
|
208
164
|
@stack.push(:key)
|
165
|
+
when RIGHT_BRACE
|
166
|
+
end_container(:object)
|
209
167
|
when WS
|
210
168
|
# ignore
|
211
169
|
else
|
@@ -260,12 +218,12 @@ module JSON
|
|
260
218
|
if @unicode.size == 4
|
261
219
|
codepoint = @unicode.slice!(0, 4).hex
|
262
220
|
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
263
|
-
error('Expected low surrogate pair half') if @stack.
|
221
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Integer)
|
264
222
|
@state = :start_surrogate_pair
|
265
223
|
@stack.push(codepoint)
|
266
224
|
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
267
225
|
high = @stack.pop
|
268
|
-
error('Expected high surrogate pair half') unless high.is_a?(
|
226
|
+
error('Expected high surrogate pair half') unless high.is_a?(Integer)
|
269
227
|
pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
|
270
228
|
@buf << pair
|
271
229
|
@state = :start_string
|
@@ -402,17 +360,17 @@ module JSON
|
|
402
360
|
case ch
|
403
361
|
when COMMA
|
404
362
|
@state = :value_sep
|
405
|
-
when RIGHT_BRACKET
|
406
|
-
end_container(:array)
|
407
363
|
when RIGHT_BRACE
|
408
364
|
end_container(:object)
|
365
|
+
when RIGHT_BRACKET
|
366
|
+
end_container(:array)
|
409
367
|
when WS
|
410
368
|
# ignore
|
411
369
|
else
|
412
370
|
error('Expected comma or object or array close')
|
413
371
|
end
|
414
372
|
when :value_sep
|
415
|
-
if @stack
|
373
|
+
if @stack[-1] == :object
|
416
374
|
case ch
|
417
375
|
when QUOTE
|
418
376
|
@state = :start_string
|
@@ -431,6 +389,46 @@ module JSON
|
|
431
389
|
end
|
432
390
|
end
|
433
391
|
|
392
|
+
# Drain any remaining buffered characters into the parser to complete
|
393
|
+
# the parsing of the document.
|
394
|
+
#
|
395
|
+
# This is only required when parsing a document containing a single
|
396
|
+
# numeric value, integer or float. The parser has no other way to
|
397
|
+
# detect when it should no longer expect additional characters with
|
398
|
+
# which to complete the parse, so it must be signaled by a call to
|
399
|
+
# this method.
|
400
|
+
#
|
401
|
+
# If you're parsing more typical object or array documents, there's no
|
402
|
+
# need to call `finish` because the parse will complete when the final
|
403
|
+
# closing `]` or `}` character is scanned.
|
404
|
+
#
|
405
|
+
# Raises a JSON::Stream::ParserError if the JSON data is malformed.
|
406
|
+
#
|
407
|
+
# Returns nothing.
|
408
|
+
def finish
|
409
|
+
# Partial multi-byte character waiting for completion bytes.
|
410
|
+
error('Unexpected end-of-file') unless @utf8.empty?
|
411
|
+
|
412
|
+
# Partial array, object, or string.
|
413
|
+
error('Unexpected end-of-file') unless @stack.empty?
|
414
|
+
|
415
|
+
case @state
|
416
|
+
when :end_document
|
417
|
+
# done, do nothing
|
418
|
+
when :in_float
|
419
|
+
end_value(@buf.to_f)
|
420
|
+
when :in_exponent
|
421
|
+
error('Unexpected end-of-file') unless @buf =~ DIGIT_END
|
422
|
+
end_value(@buf.to_f)
|
423
|
+
when :start_zero
|
424
|
+
end_value(@buf.to_i)
|
425
|
+
when :start_int
|
426
|
+
end_value(@buf.to_i)
|
427
|
+
else
|
428
|
+
error('Unexpected end-of-file')
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
434
432
|
private
|
435
433
|
|
436
434
|
# Invoke all registered observer procs for the event type.
|
data/lib/json/stream/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-stream
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Graham
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.1'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.10'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.10'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: rake
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
16
44
|
requirements:
|
17
45
|
- - "~>"
|
18
46
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
47
|
+
version: '12.1'
|
20
48
|
type: :development
|
21
49
|
prerelease: false
|
22
50
|
version_requirements: !ruby/object:Gem::Requirement
|
23
51
|
requirements:
|
24
52
|
- - "~>"
|
25
53
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
54
|
+
version: '12.1'
|
27
55
|
description: A parser best suited for huge JSON documents that don't fit in memory.
|
28
56
|
email:
|
29
57
|
- david.malcom.graham@gmail.com
|
@@ -31,6 +59,7 @@ executables: []
|
|
31
59
|
extensions: []
|
32
60
|
extra_rdoc_files: []
|
33
61
|
files:
|
62
|
+
- Gemfile
|
34
63
|
- LICENSE
|
35
64
|
- README.md
|
36
65
|
- Rakefile
|
@@ -40,15 +69,11 @@ files:
|
|
40
69
|
- lib/json/stream/builder.rb
|
41
70
|
- lib/json/stream/parser.rb
|
42
71
|
- lib/json/stream/version.rb
|
43
|
-
- spec/buffer_spec.rb
|
44
|
-
- spec/builder_spec.rb
|
45
|
-
- spec/fixtures/repository.json
|
46
|
-
- spec/parser_spec.rb
|
47
72
|
homepage: http://dgraham.github.io/json-stream/
|
48
73
|
licenses:
|
49
74
|
- MIT
|
50
75
|
metadata: {}
|
51
|
-
post_install_message:
|
76
|
+
post_install_message:
|
52
77
|
rdoc_options: []
|
53
78
|
require_paths:
|
54
79
|
- lib
|
@@ -56,20 +81,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
81
|
requirements:
|
57
82
|
- - ">="
|
58
83
|
- !ruby/object:Gem::Version
|
59
|
-
version:
|
84
|
+
version: 2.0.0
|
60
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
86
|
requirements:
|
62
87
|
- - ">="
|
63
88
|
- !ruby/object:Gem::Version
|
64
89
|
version: '0'
|
65
90
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
signing_key:
|
91
|
+
rubygems_version: 3.5.4
|
92
|
+
signing_key:
|
69
93
|
specification_version: 4
|
70
94
|
summary: A streaming JSON parser that generates SAX-like events.
|
71
|
-
test_files:
|
72
|
-
- spec/buffer_spec.rb
|
73
|
-
- spec/builder_spec.rb
|
74
|
-
- spec/fixtures/repository.json
|
75
|
-
- spec/parser_spec.rb
|
95
|
+
test_files: []
|
data/spec/buffer_spec.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
require 'json/stream'
|
4
|
-
require 'minitest/autorun'
|
5
|
-
|
6
|
-
describe JSON::Stream::Buffer do
|
7
|
-
subject { JSON::Stream::Buffer.new }
|
8
|
-
|
9
|
-
it 'accepts single byte characters' do
|
10
|
-
assert_equal "", subject << ""
|
11
|
-
assert_equal "abc", subject << "abc"
|
12
|
-
assert_equal "\u0000abc", subject << "\u0000abc"
|
13
|
-
end
|
14
|
-
|
15
|
-
# The é character can be a single codepoint \u00e9 or two codepoints
|
16
|
-
# \u0065\u0301. The first is encoded in 2 bytes, the second in 3 bytes.
|
17
|
-
# The json and yajl-ruby gems and CouchDB do not normalize unicode text
|
18
|
-
# so neither will we. Although, a good way to normalize is by calling
|
19
|
-
# ActiveSupport::Multibyte::Chars.new("é").normalize(:c).
|
20
|
-
it 'accepts combined characters' do
|
21
|
-
assert_equal "\u0065\u0301", subject << "\u0065\u0301"
|
22
|
-
assert_equal 3, (subject << "\u0065\u0301").bytesize
|
23
|
-
assert_equal 2, (subject << "\u0065\u0301").size
|
24
|
-
|
25
|
-
assert_equal "\u00e9", subject << "\u00e9"
|
26
|
-
assert_equal 2, (subject << "\u00e9").bytesize
|
27
|
-
assert_equal 1, (subject << "\u00e9").size
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'accepts valid two byte characters' do
|
31
|
-
assert_equal "abcé", subject << "abcé"
|
32
|
-
assert_equal "a", subject << "a\xC3"
|
33
|
-
assert_equal "é", subject << "\xA9"
|
34
|
-
assert_equal "", subject << "\xC3"
|
35
|
-
assert_equal "é", subject << "\xA9"
|
36
|
-
assert_equal "é", subject << "\xC3\xA9"
|
37
|
-
end
|
38
|
-
|
39
|
-
it 'accepts valid three byte characters' do
|
40
|
-
assert_equal "abcé\u2603", subject << "abcé\u2603"
|
41
|
-
assert_equal "a", subject << "a\xE2"
|
42
|
-
assert_equal "", subject << "\x98"
|
43
|
-
assert_equal "\u2603", subject << "\x83"
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'accepts valid four byte characters' do
|
47
|
-
assert_equal "abcé\u2603\u{10102}é", subject << "abcé\u2603\u{10102}é"
|
48
|
-
assert_equal "a", subject << "a\xF0"
|
49
|
-
assert_equal "", subject << "\x90"
|
50
|
-
assert_equal "", subject << "\x84"
|
51
|
-
assert_equal "\u{10102}", subject << "\x82"
|
52
|
-
end
|
53
|
-
|
54
|
-
it 'rejects invalid two byte start characters' do
|
55
|
-
-> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'rejects invalid three byte start characters' do
|
59
|
-
-> { subject << "\xE2\xE2" }.must_raise JSON::Stream::ParserError
|
60
|
-
end
|
61
|
-
|
62
|
-
it 'rejects invalid four byte start characters' do
|
63
|
-
-> { subject << "\xF0\xF0" }.must_raise JSON::Stream::ParserError
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'rejects a two byte start with single byte continuation character' do
|
67
|
-
-> { subject << "\xC3\u0000" }.must_raise JSON::Stream::ParserError
|
68
|
-
end
|
69
|
-
|
70
|
-
it 'rejects a three byte start with single byte continuation character' do
|
71
|
-
-> { subject << "\xE2\u0010" }.must_raise JSON::Stream::ParserError
|
72
|
-
end
|
73
|
-
|
74
|
-
it 'rejects a four byte start with single byte continuation character' do
|
75
|
-
-> { subject << "\xF0a" }.must_raise JSON::Stream::ParserError
|
76
|
-
end
|
77
|
-
|
78
|
-
it 'rejects an invalid continuation character' do
|
79
|
-
-> { subject << "\xA9" }.must_raise JSON::Stream::ParserError
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'rejects an overlong form' do
|
83
|
-
-> { subject << "\xC0\x80" }.must_raise JSON::Stream::ParserError
|
84
|
-
end
|
85
|
-
|
86
|
-
describe 'checking for empty buffers' do
|
87
|
-
it 'is initially empty' do
|
88
|
-
assert subject.empty?
|
89
|
-
end
|
90
|
-
|
91
|
-
it 'is empty after processing complete characters' do
|
92
|
-
subject << 'test'
|
93
|
-
assert subject.empty?
|
94
|
-
end
|
95
|
-
|
96
|
-
it 'is not empty after processing partial multi-byte characters' do
|
97
|
-
subject << "\xC3"
|
98
|
-
refute subject.empty?
|
99
|
-
subject << "\xA9"
|
100
|
-
assert subject.empty?
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|