json-stream 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 15b32baaa4333e97f2eae71f21a03af04e8275ea
4
- data.tar.gz: ea3b178e62a46ae093e5d362abd554fa3c361241
2
+ SHA256:
3
+ metadata.gz: f1db3759d432092d3f1cac4dd90fe23551f14cd33bff1fa209a4f32cb2399025
4
+ data.tar.gz: 5c9532adaae5591585b756686463e93e24beb371d7a78e15cf45ffe09e6b4247
5
5
  SHA512:
6
- metadata.gz: c88c6208cdb0597e6ba9ad1d9a86d62605f2329176eabfc15eb43f227a61efae3c1ff66cd1efe340e73245258e325076b6ecdacf6a3f5b870d03ec03d35c986c
7
- data.tar.gz: cb403d67b89ec3f609b5b9e61b9f12c6a93d654206dbed37b5e9d20a8b94ecd1bd717bfb7f72bf129e3ee923904c4ac033fc90da569879c28e9140b5aa6103bd
6
+ metadata.gz: 9ac6519d888a81802453b964bc36db7639728c83ad67604516045fd6756c64c34579caa16050f1df32f1b9c8e6b4723433d81db8b2c63f59a2b846d9ab00eeb5
7
+ data.tar.gz: 7079d8a588958e49c016cc051f27b0ce6f24a902bbe3759702d50da1329815ae1ef929d41968ad24132eb748830062070cdf58b47261d05f4528caed7ad465da
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2014 David Graham
1
+ Copyright (c) 2010-2024 David Graham
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -2,9 +2,10 @@
2
2
 
3
3
  JSON::Stream is a JSON parser, based on a finite state machine, that generates
4
4
  events for each state change. This allows streaming both the JSON document into
5
- memory and the parsed object graph out of memory to some other process. This
6
- is much like an XML SAX parser that generates events during parsing. There is
7
- no requirement for the document, or the object graph, to be fully buffered in
5
+ memory and the parsed object graph out of memory to some other process.
6
+
7
+ This is much like an XML SAX parser that generates events during parsing. There
8
+ is no requirement for the document, or the object graph, to be fully buffered in
8
9
  memory. This is best suited for huge JSON documents that won't fit in memory.
9
10
  For example, streaming and processing large map/reduce views from Apache
10
11
  CouchDB.
@@ -39,7 +40,7 @@ Again, while JSON::Stream can be used this way, if we just need to stream the
39
40
  document from disk or the network, we're better off using the yajl-ruby gem.
40
41
 
41
42
  Huge documents arriving over the network in small chunks to an EventMachine
42
- receive_data loop is where JSON::Stream is really useful. Inside an
43
+ `receive_data` loop is where JSON::Stream is really useful. Inside an
43
44
  EventMachine::Connection subclass we might have:
44
45
 
45
46
  ```ruby
@@ -51,8 +52,8 @@ def post_init
51
52
  end_object { puts "end object" }
52
53
  start_array { puts "start array" }
53
54
  end_array { puts "end array" }
54
- key {|k| puts "key: #{k}" }
55
- value {|v| puts "value: #{v}" }
55
+ key { |k| puts "key: #{k}" }
56
+ value { |v| puts "value: #{v}" }
56
57
  end
57
58
  end
58
59
 
@@ -76,16 +77,19 @@ imagine the callbacks looking for an array named `rows` and processing sets
76
77
  of these row objects in small batches. Millions of rows, streaming over the
77
78
  network, can be processed in constant memory space this way.
78
79
 
79
- ## Dependencies
80
-
81
- * ruby >= 1.9.2
82
- * jruby >= 1.7
83
-
84
80
  ## Alternatives
85
81
 
86
82
  * [json](https://github.com/flori/json)
87
83
  * [yajl-ruby](https://github.com/brianmario/yajl-ruby)
88
84
  * [yajl-ffi](https://github.com/dgraham/yajl-ffi)
85
+ * [application/json-seq](http://www.rfc-editor.org/rfc/rfc7464.txt)
86
+
87
+ ## Development
88
+
89
+ ```
90
+ $ bin/setup
91
+ $ bin/rake test
92
+ ```
89
93
 
90
94
  ## License
91
95
 
data/json-stream.gemspec CHANGED
@@ -11,10 +11,12 @@ Gem::Specification.new do |s|
11
11
  s.homepage = 'http://dgraham.github.io/json-stream/'
12
12
  s.license = 'MIT'
13
13
 
14
- s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*']
15
- s.test_files = Dir['spec/**/*']
14
+ s.files = Dir['[A-Z]*', 'json-stream.gemspec', '{lib}/**/*'] - ['Gemfile.lock']
16
15
  s.require_path = 'lib'
17
16
 
18
- s.add_development_dependency 'rake', '~> 10.3'
19
- s.required_ruby_version = '>= 1.9.2'
17
+ s.add_development_dependency 'bundler', '~> 2.1'
18
+ s.add_development_dependency 'minitest', '~> 5.10'
19
+ s.add_development_dependency 'rake', '~> 12.1'
20
+
21
+ s.required_ruby_version = '>= 2.0.0'
20
22
  end
@@ -1,5 +1,3 @@
1
- # encoding: UTF-8
2
-
3
1
  module JSON
4
2
  module Stream
5
3
  # A character buffer that expects a UTF-8 encoded stream of bytes.
@@ -14,7 +12,7 @@ module JSON
14
12
  class Buffer
15
13
  def initialize
16
14
  @state = :start
17
- @buf = []
15
+ @buffer = []
18
16
  @need = 0
19
17
  end
20
18
 
@@ -29,6 +27,12 @@ module JSON
29
27
  #
30
28
  # Returns a UTF-8 encoded String.
31
29
  def <<(data)
30
+ # Avoid state machine for complete UTF-8.
31
+ if @buffer.empty?
32
+ data.force_encoding(Encoding::UTF_8)
33
+ return data if data.valid_encoding?
34
+ end
35
+
32
36
  bytes = []
33
37
  data.each_byte do |byte|
34
38
  case @state
@@ -37,7 +41,7 @@ module JSON
37
41
  bytes << byte
38
42
  elsif byte >= 192
39
43
  @state = :multi_byte
40
- @buf << byte
44
+ @buffer << byte
41
45
  @need =
42
46
  case
43
47
  when byte >= 240 then 4
@@ -49,9 +53,9 @@ module JSON
49
53
  end
50
54
  when :multi_byte
51
55
  if byte > 127 && byte < 192
52
- @buf << byte
53
- if @buf.size == @need
54
- bytes += @buf.slice!(0, @buf.size)
56
+ @buffer << byte
57
+ if @buffer.size == @need
58
+ bytes += @buffer.slice!(0, @buffer.size)
55
59
  @state = :start
56
60
  end
57
61
  else
@@ -59,8 +63,10 @@ module JSON
59
63
  end
60
64
  end
61
65
  end
62
- bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
63
- error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
66
+
67
+ # Build UTF-8 encoded string from completed codepoints.
68
+ bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
69
+ error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
64
70
  end
65
71
  end
66
72
 
@@ -82,7 +88,7 @@ module JSON
82
88
  #
83
89
  # Returns true if the buffer is empty.
84
90
  def empty?
85
- @buf.empty?
91
+ @buffer.empty?
86
92
  end
87
93
 
88
94
  private
@@ -1,5 +1,3 @@
1
- # encoding: UTF-8
2
-
3
1
  module JSON
4
2
  module Stream
5
3
  # A parser listener that builds a full, in memory, object from a JSON
@@ -38,13 +36,15 @@ module JSON
38
36
 
39
37
  def end_object
40
38
  return if @stack.size == 1
39
+
41
40
  node = @stack.pop
41
+ top = @stack[-1]
42
42
 
43
- case @stack.last
43
+ case top
44
44
  when Hash
45
- @stack.last[@keys.pop] = node
45
+ top[@keys.pop] = node
46
46
  when Array
47
- @stack.last << node
47
+ top << node
48
48
  end
49
49
  end
50
50
  alias :end_array :end_object
@@ -58,11 +58,12 @@ module JSON
58
58
  end
59
59
 
60
60
  def value(value)
61
- case @stack.last
61
+ top = @stack[-1]
62
+ case top
62
63
  when Hash
63
- @stack.last[@keys.pop] = value
64
+ top[@keys.pop] = value
64
65
  when Array
65
- @stack.last << value
66
+ top << value
66
67
  else
67
68
  @stack << value
68
69
  end
@@ -1,5 +1,3 @@
1
- # encoding: UTF-8
2
-
3
1
  module JSON
4
2
  module Stream
5
3
  # Raised on any invalid JSON text.
@@ -12,8 +10,8 @@ module JSON
12
10
  # Examples
13
11
  #
14
12
  # parser = JSON::Stream::Parser.new
15
- # parser.key {|key| puts key }
16
- # parser.value {|value| puts value }
13
+ # parser.key { |key| puts key }
14
+ # parser.value { |value| puts value }
17
15
  # parser << '{"answer":'
18
16
  # parser << ' 42}'
19
17
  class Parser
@@ -73,46 +71,6 @@ module JSON
73
71
  stream.close
74
72
  end
75
73
 
76
- # Drain any remaining buffered characters into the parser to complete
77
- # the parsing of the document.
78
- #
79
- # This is only required when parsing a document containing a single
80
- # numeric value, integer or float. The parser has no other way to
81
- # detect when it should no longer expect additional characters with
82
- # which to complete the parse, so it must be signaled by a call to
83
- # this method.
84
- #
85
- # If you're parsing more typical object or array documents, there's no
86
- # need to call `finish` because the parse will complete when the final
87
- # closing `]` or `}` character is scanned.
88
- #
89
- # Raises a JSON::Stream::ParserError if the JSON data is malformed.
90
- #
91
- # Returns nothing.
92
- def finish
93
- # Partial multi-byte character waiting for completion bytes.
94
- error('Unexpected end-of-file') unless @utf8.empty?
95
-
96
- # Partial array, object, or string.
97
- error('Unexpected end-of-file') unless @stack.empty?
98
-
99
- case @state
100
- when :end_document
101
- # done, do nothing
102
- when :in_float
103
- end_value(@buf.to_f)
104
- when :in_exponent
105
- error('Unexpected end-of-file') unless @buf =~ DIGIT_END
106
- end_value(@buf.to_f)
107
- when :start_zero
108
- end_value(@buf.to_i)
109
- when :start_int
110
- end_value(@buf.to_i)
111
- else
112
- error('Unexpected end-of-file')
113
- end
114
- end
115
-
116
74
  # Create a new parser with an optional initialization block where
117
75
  # we can register event callbacks.
118
76
  #
@@ -125,8 +83,8 @@ module JSON
125
83
  # end_object { puts "end object" }
126
84
  # start_array { puts "start array" }
127
85
  # end_array { puts "end array" }
128
- # key {|k| puts "key: #{k}" }
129
- # value {|v| puts "value: #{v}" }
86
+ # key { |k| puts "key: #{k}" }
87
+ # value { |v| puts "value: #{v}" }
130
88
  # end
131
89
  def initialize(&block)
132
90
  @state = :start_document
@@ -201,11 +159,11 @@ module JSON
201
159
  start_value(ch)
202
160
  when :start_object
203
161
  case ch
204
- when RIGHT_BRACE
205
- end_container(:object)
206
162
  when QUOTE
207
163
  @state = :start_string
208
164
  @stack.push(:key)
165
+ when RIGHT_BRACE
166
+ end_container(:object)
209
167
  when WS
210
168
  # ignore
211
169
  else
@@ -260,12 +218,12 @@ module JSON
260
218
  if @unicode.size == 4
261
219
  codepoint = @unicode.slice!(0, 4).hex
262
220
  if codepoint >= 0xD800 && codepoint <= 0xDBFF
263
- error('Expected low surrogate pair half') if @stack.last.is_a?(Fixnum)
221
+ error('Expected low surrogate pair half') if @stack[-1].is_a?(Integer)
264
222
  @state = :start_surrogate_pair
265
223
  @stack.push(codepoint)
266
224
  elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
267
225
  high = @stack.pop
268
- error('Expected high surrogate pair half') unless high.is_a?(Fixnum)
226
+ error('Expected high surrogate pair half') unless high.is_a?(Integer)
269
227
  pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
270
228
  @buf << pair
271
229
  @state = :start_string
@@ -402,17 +360,17 @@ module JSON
402
360
  case ch
403
361
  when COMMA
404
362
  @state = :value_sep
405
- when RIGHT_BRACKET
406
- end_container(:array)
407
363
  when RIGHT_BRACE
408
364
  end_container(:object)
365
+ when RIGHT_BRACKET
366
+ end_container(:array)
409
367
  when WS
410
368
  # ignore
411
369
  else
412
370
  error('Expected comma or object or array close')
413
371
  end
414
372
  when :value_sep
415
- if @stack.last == :object
373
+ if @stack[-1] == :object
416
374
  case ch
417
375
  when QUOTE
418
376
  @state = :start_string
@@ -431,6 +389,46 @@ module JSON
431
389
  end
432
390
  end
433
391
 
392
+ # Drain any remaining buffered characters into the parser to complete
393
+ # the parsing of the document.
394
+ #
395
+ # This is only required when parsing a document containing a single
396
+ # numeric value, integer or float. The parser has no other way to
397
+ # detect when it should no longer expect additional characters with
398
+ # which to complete the parse, so it must be signaled by a call to
399
+ # this method.
400
+ #
401
+ # If you're parsing more typical object or array documents, there's no
402
+ # need to call `finish` because the parse will complete when the final
403
+ # closing `]` or `}` character is scanned.
404
+ #
405
+ # Raises a JSON::Stream::ParserError if the JSON data is malformed.
406
+ #
407
+ # Returns nothing.
408
+ def finish
409
+ # Partial multi-byte character waiting for completion bytes.
410
+ error('Unexpected end-of-file') unless @utf8.empty?
411
+
412
+ # Partial array, object, or string.
413
+ error('Unexpected end-of-file') unless @stack.empty?
414
+
415
+ case @state
416
+ when :end_document
417
+ # done, do nothing
418
+ when :in_float
419
+ end_value(@buf.to_f)
420
+ when :in_exponent
421
+ error('Unexpected end-of-file') unless @buf =~ DIGIT_END
422
+ end_value(@buf.to_f)
423
+ when :start_zero
424
+ end_value(@buf.to_i)
425
+ when :start_int
426
+ end_value(@buf.to_i)
427
+ else
428
+ error('Unexpected end-of-file')
429
+ end
430
+ end
431
+
434
432
  private
435
433
 
436
434
  # Invoke all registered observer procs for the event type.
@@ -1,7 +1,5 @@
1
- # encoding: UTF-8
2
-
3
1
  module JSON
4
2
  module Stream
5
- VERSION = '0.2.0'
3
+ VERSION = '0.2.2'
6
4
  end
7
5
  end
metadata CHANGED
@@ -1,29 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Graham
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-06 00:00:00.000000000 Z
11
+ date: 2024-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.1'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.10'
13
41
  - !ruby/object:Gem::Dependency
14
42
  name: rake
15
43
  requirement: !ruby/object:Gem::Requirement
16
44
  requirements:
17
45
  - - "~>"
18
46
  - !ruby/object:Gem::Version
19
- version: '10.3'
47
+ version: '12.1'
20
48
  type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
52
  - - "~>"
25
53
  - !ruby/object:Gem::Version
26
- version: '10.3'
54
+ version: '12.1'
27
55
  description: A parser best suited for huge JSON documents that don't fit in memory.
28
56
  email:
29
57
  - david.malcom.graham@gmail.com
@@ -31,6 +59,7 @@ executables: []
31
59
  extensions: []
32
60
  extra_rdoc_files: []
33
61
  files:
62
+ - Gemfile
34
63
  - LICENSE
35
64
  - README.md
36
65
  - Rakefile
@@ -40,15 +69,11 @@ files:
40
69
  - lib/json/stream/builder.rb
41
70
  - lib/json/stream/parser.rb
42
71
  - lib/json/stream/version.rb
43
- - spec/buffer_spec.rb
44
- - spec/builder_spec.rb
45
- - spec/fixtures/repository.json
46
- - spec/parser_spec.rb
47
72
  homepage: http://dgraham.github.io/json-stream/
48
73
  licenses:
49
74
  - MIT
50
75
  metadata: {}
51
- post_install_message:
76
+ post_install_message:
52
77
  rdoc_options: []
53
78
  require_paths:
54
79
  - lib
@@ -56,20 +81,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
56
81
  requirements:
57
82
  - - ">="
58
83
  - !ruby/object:Gem::Version
59
- version: 1.9.2
84
+ version: 2.0.0
60
85
  required_rubygems_version: !ruby/object:Gem::Requirement
61
86
  requirements:
62
87
  - - ">="
63
88
  - !ruby/object:Gem::Version
64
89
  version: '0'
65
90
  requirements: []
66
- rubyforge_project:
67
- rubygems_version: 2.2.2
68
- signing_key:
91
+ rubygems_version: 3.5.4
92
+ signing_key:
69
93
  specification_version: 4
70
94
  summary: A streaming JSON parser that generates SAX-like events.
71
- test_files:
72
- - spec/buffer_spec.rb
73
- - spec/builder_spec.rb
74
- - spec/fixtures/repository.json
75
- - spec/parser_spec.rb
95
+ test_files: []
data/spec/buffer_spec.rb DELETED
@@ -1,103 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- require 'json/stream'
4
- require 'minitest/autorun'
5
-
6
- describe JSON::Stream::Buffer do
7
- subject { JSON::Stream::Buffer.new }
8
-
9
- it 'accepts single byte characters' do
10
- assert_equal "", subject << ""
11
- assert_equal "abc", subject << "abc"
12
- assert_equal "\u0000abc", subject << "\u0000abc"
13
- end
14
-
15
- # The é character can be a single codepoint \u00e9 or two codepoints
16
- # \u0065\u0301. The first is encoded in 2 bytes, the second in 3 bytes.
17
- # The json and yajl-ruby gems and CouchDB do not normalize unicode text
18
- # so neither will we. Although, a good way to normalize is by calling
19
- # ActiveSupport::Multibyte::Chars.new("é").normalize(:c).
20
- it 'accepts combined characters' do
21
- assert_equal "\u0065\u0301", subject << "\u0065\u0301"
22
- assert_equal 3, (subject << "\u0065\u0301").bytesize
23
- assert_equal 2, (subject << "\u0065\u0301").size
24
-
25
- assert_equal "\u00e9", subject << "\u00e9"
26
- assert_equal 2, (subject << "\u00e9").bytesize
27
- assert_equal 1, (subject << "\u00e9").size
28
- end
29
-
30
- it 'accepts valid two byte characters' do
31
- assert_equal "abcé", subject << "abcé"
32
- assert_equal "a", subject << "a\xC3"
33
- assert_equal "é", subject << "\xA9"
34
- assert_equal "", subject << "\xC3"
35
- assert_equal "é", subject << "\xA9"
36
- assert_equal "é", subject << "\xC3\xA9"
37
- end
38
-
39
- it 'accepts valid three byte characters' do
40
- assert_equal "abcé\u2603", subject << "abcé\u2603"
41
- assert_equal "a", subject << "a\xE2"
42
- assert_equal "", subject << "\x98"
43
- assert_equal "\u2603", subject << "\x83"
44
- end
45
-
46
- it 'accepts valid four byte characters' do
47
- assert_equal "abcé\u2603\u{10102}é", subject << "abcé\u2603\u{10102}é"
48
- assert_equal "a", subject << "a\xF0"
49
- assert_equal "", subject << "\x90"
50
- assert_equal "", subject << "\x84"
51
- assert_equal "\u{10102}", subject << "\x82"
52
- end
53
-
54
- it 'rejects invalid two byte start characters' do
55
- -> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
56
- end
57
-
58
- it 'rejects invalid three byte start characters' do
59
- -> { subject << "\xE2\xE2" }.must_raise JSON::Stream::ParserError
60
- end
61
-
62
- it 'rejects invalid four byte start characters' do
63
- -> { subject << "\xF0\xF0" }.must_raise JSON::Stream::ParserError
64
- end
65
-
66
- it 'rejects a two byte start with single byte continuation character' do
67
- -> { subject << "\xC3\u0000" }.must_raise JSON::Stream::ParserError
68
- end
69
-
70
- it 'rejects a three byte start with single byte continuation character' do
71
- -> { subject << "\xE2\u0010" }.must_raise JSON::Stream::ParserError
72
- end
73
-
74
- it 'rejects a four byte start with single byte continuation character' do
75
- -> { subject << "\xF0a" }.must_raise JSON::Stream::ParserError
76
- end
77
-
78
- it 'rejects an invalid continuation character' do
79
- -> { subject << "\xA9" }.must_raise JSON::Stream::ParserError
80
- end
81
-
82
- it 'rejects an overlong form' do
83
- -> { subject << "\xC0\x80" }.must_raise JSON::Stream::ParserError
84
- end
85
-
86
- describe 'checking for empty buffers' do
87
- it 'is initially empty' do
88
- assert subject.empty?
89
- end
90
-
91
- it 'is empty after processing complete characters' do
92
- subject << 'test'
93
- assert subject.empty?
94
- end
95
-
96
- it 'is not empty after processing partial multi-byte characters' do
97
- subject << "\xC3"
98
- refute subject.empty?
99
- subject << "\xA9"
100
- assert subject.empty?
101
- end
102
- end
103
- end