json-stream 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 15b32baaa4333e97f2eae71f21a03af04e8275ea
4
- data.tar.gz: ea3b178e62a46ae093e5d362abd554fa3c361241
3
+ metadata.gz: 62af90ebe18d5c8a58ca9b75695bf2c403b4c4a0
4
+ data.tar.gz: d3176279af8156702e63ffdf4c963ab6f0e25cb3
5
5
  SHA512:
6
- metadata.gz: c88c6208cdb0597e6ba9ad1d9a86d62605f2329176eabfc15eb43f227a61efae3c1ff66cd1efe340e73245258e325076b6ecdacf6a3f5b870d03ec03d35c986c
7
- data.tar.gz: cb403d67b89ec3f609b5b9e61b9f12c6a93d654206dbed37b5e9d20a8b94ecd1bd717bfb7f72bf129e3ee923904c4ac033fc90da569879c28e9140b5aa6103bd
6
+ metadata.gz: 5d757855dcd79878bb9d5cbdd74c05252bde3286259b88315e95b3302ce60bc24bbf2d6f6e0ab32a5507b55bbdeae3b63ab405368880cfb29b1981f3cb291146
7
+ data.tar.gz: 0a90cf028ed263a4dc571c4834e1b033d9c23b17aaaef36a7275d44e804a69c1c7c8090ea119411858608bfb45a8c6ada7b2e4e6c8862294ef54ef091b1030e0
data/README.md CHANGED
@@ -2,9 +2,10 @@
2
2
 
3
3
  JSON::Stream is a JSON parser, based on a finite state machine, that generates
4
4
  events for each state change. This allows streaming both the JSON document into
5
- memory and the parsed object graph out of memory to some other process. This
6
- is much like an XML SAX parser that generates events during parsing. There is
7
- no requirement for the document, or the object graph, to be fully buffered in
5
+ memory and the parsed object graph out of memory to some other process.
6
+
7
+ This is much like an XML SAX parser that generates events during parsing. There
8
+ is no requirement for the document, or the object graph, to be fully buffered in
8
9
  memory. This is best suited for huge JSON documents that won't fit in memory.
9
10
  For example, streaming and processing large map/reduce views from Apache
10
11
  CouchDB.
@@ -14,7 +14,7 @@ module JSON
14
14
  class Buffer
15
15
  def initialize
16
16
  @state = :start
17
- @buf = []
17
+ @buffer = []
18
18
  @need = 0
19
19
  end
20
20
 
@@ -29,6 +29,12 @@ module JSON
29
29
  #
30
30
  # Returns a UTF-8 encoded String.
31
31
  def <<(data)
32
+ # Avoid state machine for complete UTF-8.
33
+ if @buffer.empty?
34
+ data.force_encoding(Encoding::UTF_8)
35
+ return data if data.valid_encoding?
36
+ end
37
+
32
38
  bytes = []
33
39
  data.each_byte do |byte|
34
40
  case @state
@@ -37,7 +43,7 @@ module JSON
37
43
  bytes << byte
38
44
  elsif byte >= 192
39
45
  @state = :multi_byte
40
- @buf << byte
46
+ @buffer << byte
41
47
  @need =
42
48
  case
43
49
  when byte >= 240 then 4
@@ -49,9 +55,9 @@ module JSON
49
55
  end
50
56
  when :multi_byte
51
57
  if byte > 127 && byte < 192
52
- @buf << byte
53
- if @buf.size == @need
54
- bytes += @buf.slice!(0, @buf.size)
58
+ @buffer << byte
59
+ if @buffer.size == @need
60
+ bytes += @buffer.slice!(0, @buffer.size)
55
61
  @state = :start
56
62
  end
57
63
  else
@@ -59,8 +65,10 @@ module JSON
59
65
  end
60
66
  end
61
67
  end
62
- bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
63
- error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
68
+
69
+ # Build UTF-8 encoded string from completed codepoints.
70
+ bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
71
+ error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
64
72
  end
65
73
  end
66
74
 
@@ -82,7 +90,7 @@ module JSON
82
90
  #
83
91
  # Returns true if the buffer is empty.
84
92
  def empty?
85
- @buf.empty?
93
+ @buffer.empty?
86
94
  end
87
95
 
88
96
  private
@@ -38,13 +38,15 @@ module JSON
38
38
 
39
39
  def end_object
40
40
  return if @stack.size == 1
41
+
41
42
  node = @stack.pop
43
+ top = @stack[-1]
42
44
 
43
- case @stack.last
45
+ case top
44
46
  when Hash
45
- @stack.last[@keys.pop] = node
47
+ top[@keys.pop] = node
46
48
  when Array
47
- @stack.last << node
49
+ top << node
48
50
  end
49
51
  end
50
52
  alias :end_array :end_object
@@ -58,11 +60,12 @@ module JSON
58
60
  end
59
61
 
60
62
  def value(value)
61
- case @stack.last
63
+ top = @stack[-1]
64
+ case top
62
65
  when Hash
63
- @stack.last[@keys.pop] = value
66
+ top[@keys.pop] = value
64
67
  when Array
65
- @stack.last << value
68
+ top << value
66
69
  else
67
70
  @stack << value
68
71
  end
@@ -73,46 +73,6 @@ module JSON
73
73
  stream.close
74
74
  end
75
75
 
76
- # Drain any remaining buffered characters into the parser to complete
77
- # the parsing of the document.
78
- #
79
- # This is only required when parsing a document containing a single
80
- # numeric value, integer or float. The parser has no other way to
81
- # detect when it should no longer expect additional characters with
82
- # which to complete the parse, so it must be signaled by a call to
83
- # this method.
84
- #
85
- # If you're parsing more typical object or array documents, there's no
86
- # need to call `finish` because the parse will complete when the final
87
- # closing `]` or `}` character is scanned.
88
- #
89
- # Raises a JSON::Stream::ParserError if the JSON data is malformed.
90
- #
91
- # Returns nothing.
92
- def finish
93
- # Partial multi-byte character waiting for completion bytes.
94
- error('Unexpected end-of-file') unless @utf8.empty?
95
-
96
- # Partial array, object, or string.
97
- error('Unexpected end-of-file') unless @stack.empty?
98
-
99
- case @state
100
- when :end_document
101
- # done, do nothing
102
- when :in_float
103
- end_value(@buf.to_f)
104
- when :in_exponent
105
- error('Unexpected end-of-file') unless @buf =~ DIGIT_END
106
- end_value(@buf.to_f)
107
- when :start_zero
108
- end_value(@buf.to_i)
109
- when :start_int
110
- end_value(@buf.to_i)
111
- else
112
- error('Unexpected end-of-file')
113
- end
114
- end
115
-
116
76
  # Create a new parser with an optional initialization block where
117
77
  # we can register event callbacks.
118
78
  #
@@ -201,11 +161,11 @@ module JSON
201
161
  start_value(ch)
202
162
  when :start_object
203
163
  case ch
204
- when RIGHT_BRACE
205
- end_container(:object)
206
164
  when QUOTE
207
165
  @state = :start_string
208
166
  @stack.push(:key)
167
+ when RIGHT_BRACE
168
+ end_container(:object)
209
169
  when WS
210
170
  # ignore
211
171
  else
@@ -260,7 +220,7 @@ module JSON
260
220
  if @unicode.size == 4
261
221
  codepoint = @unicode.slice!(0, 4).hex
262
222
  if codepoint >= 0xD800 && codepoint <= 0xDBFF
263
- error('Expected low surrogate pair half') if @stack.last.is_a?(Fixnum)
223
+ error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
264
224
  @state = :start_surrogate_pair
265
225
  @stack.push(codepoint)
266
226
  elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
@@ -402,17 +362,17 @@ module JSON
402
362
  case ch
403
363
  when COMMA
404
364
  @state = :value_sep
405
- when RIGHT_BRACKET
406
- end_container(:array)
407
365
  when RIGHT_BRACE
408
366
  end_container(:object)
367
+ when RIGHT_BRACKET
368
+ end_container(:array)
409
369
  when WS
410
370
  # ignore
411
371
  else
412
372
  error('Expected comma or object or array close')
413
373
  end
414
374
  when :value_sep
415
- if @stack.last == :object
375
+ if @stack[-1] == :object
416
376
  case ch
417
377
  when QUOTE
418
378
  @state = :start_string
@@ -431,6 +391,46 @@ module JSON
431
391
  end
432
392
  end
433
393
 
394
+ # Drain any remaining buffered characters into the parser to complete
395
+ # the parsing of the document.
396
+ #
397
+ # This is only required when parsing a document containing a single
398
+ # numeric value, integer or float. The parser has no other way to
399
+ # detect when it should no longer expect additional characters with
400
+ # which to complete the parse, so it must be signaled by a call to
401
+ # this method.
402
+ #
403
+ # If you're parsing more typical object or array documents, there's no
404
+ # need to call `finish` because the parse will complete when the final
405
+ # closing `]` or `}` character is scanned.
406
+ #
407
+ # Raises a JSON::Stream::ParserError if the JSON data is malformed.
408
+ #
409
+ # Returns nothing.
410
+ def finish
411
+ # Partial multi-byte character waiting for completion bytes.
412
+ error('Unexpected end-of-file') unless @utf8.empty?
413
+
414
+ # Partial array, object, or string.
415
+ error('Unexpected end-of-file') unless @stack.empty?
416
+
417
+ case @state
418
+ when :end_document
419
+ # done, do nothing
420
+ when :in_float
421
+ end_value(@buf.to_f)
422
+ when :in_exponent
423
+ error('Unexpected end-of-file') unless @buf =~ DIGIT_END
424
+ end_value(@buf.to_f)
425
+ when :start_zero
426
+ end_value(@buf.to_i)
427
+ when :start_int
428
+ end_value(@buf.to_i)
429
+ else
430
+ error('Unexpected end-of-file')
431
+ end
432
+ end
433
+
434
434
  private
435
435
 
436
436
  # Invoke all registered observer procs for the event type.
@@ -2,6 +2,6 @@
2
2
 
3
3
  module JSON
4
4
  module Stream
5
- VERSION = '0.2.0'
5
+ VERSION = '0.2.1'
6
6
  end
7
7
  end
@@ -51,6 +51,13 @@ describe JSON::Stream::Buffer do
51
51
  assert_equal "\u{10102}", subject << "\x82"
52
52
  end
53
53
 
54
+ it 'rejects valid utf-8 followed by partial two byte sequence' do
55
+ assert_equal '[', subject << '['
56
+ assert_equal '"', subject << '"'
57
+ assert_equal '', subject << "\xC3"
58
+ -> { subject << '"' }.must_raise JSON::Stream::ParserError
59
+ end
60
+
54
61
  it 'rejects invalid two byte start characters' do
55
62
  -> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
56
63
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Graham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-06 00:00:00.000000000 Z
11
+ date: 2014-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake