json-stream 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 15b32baaa4333e97f2eae71f21a03af04e8275ea
4
- data.tar.gz: ea3b178e62a46ae093e5d362abd554fa3c361241
3
+ metadata.gz: 62af90ebe18d5c8a58ca9b75695bf2c403b4c4a0
4
+ data.tar.gz: d3176279af8156702e63ffdf4c963ab6f0e25cb3
5
5
  SHA512:
6
- metadata.gz: c88c6208cdb0597e6ba9ad1d9a86d62605f2329176eabfc15eb43f227a61efae3c1ff66cd1efe340e73245258e325076b6ecdacf6a3f5b870d03ec03d35c986c
7
- data.tar.gz: cb403d67b89ec3f609b5b9e61b9f12c6a93d654206dbed37b5e9d20a8b94ecd1bd717bfb7f72bf129e3ee923904c4ac033fc90da569879c28e9140b5aa6103bd
6
+ metadata.gz: 5d757855dcd79878bb9d5cbdd74c05252bde3286259b88315e95b3302ce60bc24bbf2d6f6e0ab32a5507b55bbdeae3b63ab405368880cfb29b1981f3cb291146
7
+ data.tar.gz: 0a90cf028ed263a4dc571c4834e1b033d9c23b17aaaef36a7275d44e804a69c1c7c8090ea119411858608bfb45a8c6ada7b2e4e6c8862294ef54ef091b1030e0
data/README.md CHANGED
@@ -2,9 +2,10 @@
2
2
 
3
3
  JSON::Stream is a JSON parser, based on a finite state machine, that generates
4
4
  events for each state change. This allows streaming both the JSON document into
5
- memory and the parsed object graph out of memory to some other process. This
6
- is much like an XML SAX parser that generates events during parsing. There is
7
- no requirement for the document, or the object graph, to be fully buffered in
5
+ memory and the parsed object graph out of memory to some other process.
6
+
7
+ This is much like an XML SAX parser that generates events during parsing. There
8
+ is no requirement for the document, or the object graph, to be fully buffered in
8
9
  memory. This is best suited for huge JSON documents that won't fit in memory.
9
10
  For example, streaming and processing large map/reduce views from Apache
10
11
  CouchDB.
@@ -14,7 +14,7 @@ module JSON
14
14
  class Buffer
15
15
  def initialize
16
16
  @state = :start
17
- @buf = []
17
+ @buffer = []
18
18
  @need = 0
19
19
  end
20
20
 
@@ -29,6 +29,12 @@ module JSON
29
29
  #
30
30
  # Returns a UTF-8 encoded String.
31
31
  def <<(data)
32
+ # Avoid state machine for complete UTF-8.
33
+ if @buffer.empty?
34
+ data.force_encoding(Encoding::UTF_8)
35
+ return data if data.valid_encoding?
36
+ end
37
+
32
38
  bytes = []
33
39
  data.each_byte do |byte|
34
40
  case @state
@@ -37,7 +43,7 @@ module JSON
37
43
  bytes << byte
38
44
  elsif byte >= 192
39
45
  @state = :multi_byte
40
- @buf << byte
46
+ @buffer << byte
41
47
  @need =
42
48
  case
43
49
  when byte >= 240 then 4
@@ -49,9 +55,9 @@ module JSON
49
55
  end
50
56
  when :multi_byte
51
57
  if byte > 127 && byte < 192
52
- @buf << byte
53
- if @buf.size == @need
54
- bytes += @buf.slice!(0, @buf.size)
58
+ @buffer << byte
59
+ if @buffer.size == @need
60
+ bytes += @buffer.slice!(0, @buffer.size)
55
61
  @state = :start
56
62
  end
57
63
  else
@@ -59,8 +65,10 @@ module JSON
59
65
  end
60
66
  end
61
67
  end
62
- bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
63
- error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
68
+
69
+ # Build UTF-8 encoded string from completed codepoints.
70
+ bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |text|
71
+ error('Invalid UTF-8 byte sequence') unless text.valid_encoding?
64
72
  end
65
73
  end
66
74
 
@@ -82,7 +90,7 @@ module JSON
82
90
  #
83
91
  # Returns true if the buffer is empty.
84
92
  def empty?
85
- @buf.empty?
93
+ @buffer.empty?
86
94
  end
87
95
 
88
96
  private
@@ -38,13 +38,15 @@ module JSON
38
38
 
39
39
  def end_object
40
40
  return if @stack.size == 1
41
+
41
42
  node = @stack.pop
43
+ top = @stack[-1]
42
44
 
43
- case @stack.last
45
+ case top
44
46
  when Hash
45
- @stack.last[@keys.pop] = node
47
+ top[@keys.pop] = node
46
48
  when Array
47
- @stack.last << node
49
+ top << node
48
50
  end
49
51
  end
50
52
  alias :end_array :end_object
@@ -58,11 +60,12 @@ module JSON
58
60
  end
59
61
 
60
62
  def value(value)
61
- case @stack.last
63
+ top = @stack[-1]
64
+ case top
62
65
  when Hash
63
- @stack.last[@keys.pop] = value
66
+ top[@keys.pop] = value
64
67
  when Array
65
- @stack.last << value
68
+ top << value
66
69
  else
67
70
  @stack << value
68
71
  end
@@ -73,46 +73,6 @@ module JSON
73
73
  stream.close
74
74
  end
75
75
 
76
- # Drain any remaining buffered characters into the parser to complete
77
- # the parsing of the document.
78
- #
79
- # This is only required when parsing a document containing a single
80
- # numeric value, integer or float. The parser has no other way to
81
- # detect when it should no longer expect additional characters with
82
- # which to complete the parse, so it must be signaled by a call to
83
- # this method.
84
- #
85
- # If you're parsing more typical object or array documents, there's no
86
- # need to call `finish` because the parse will complete when the final
87
- # closing `]` or `}` character is scanned.
88
- #
89
- # Raises a JSON::Stream::ParserError if the JSON data is malformed.
90
- #
91
- # Returns nothing.
92
- def finish
93
- # Partial multi-byte character waiting for completion bytes.
94
- error('Unexpected end-of-file') unless @utf8.empty?
95
-
96
- # Partial array, object, or string.
97
- error('Unexpected end-of-file') unless @stack.empty?
98
-
99
- case @state
100
- when :end_document
101
- # done, do nothing
102
- when :in_float
103
- end_value(@buf.to_f)
104
- when :in_exponent
105
- error('Unexpected end-of-file') unless @buf =~ DIGIT_END
106
- end_value(@buf.to_f)
107
- when :start_zero
108
- end_value(@buf.to_i)
109
- when :start_int
110
- end_value(@buf.to_i)
111
- else
112
- error('Unexpected end-of-file')
113
- end
114
- end
115
-
116
76
  # Create a new parser with an optional initialization block where
117
77
  # we can register event callbacks.
118
78
  #
@@ -201,11 +161,11 @@ module JSON
201
161
  start_value(ch)
202
162
  when :start_object
203
163
  case ch
204
- when RIGHT_BRACE
205
- end_container(:object)
206
164
  when QUOTE
207
165
  @state = :start_string
208
166
  @stack.push(:key)
167
+ when RIGHT_BRACE
168
+ end_container(:object)
209
169
  when WS
210
170
  # ignore
211
171
  else
@@ -260,7 +220,7 @@ module JSON
260
220
  if @unicode.size == 4
261
221
  codepoint = @unicode.slice!(0, 4).hex
262
222
  if codepoint >= 0xD800 && codepoint <= 0xDBFF
263
- error('Expected low surrogate pair half') if @stack.last.is_a?(Fixnum)
223
+ error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
264
224
  @state = :start_surrogate_pair
265
225
  @stack.push(codepoint)
266
226
  elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
@@ -402,17 +362,17 @@ module JSON
402
362
  case ch
403
363
  when COMMA
404
364
  @state = :value_sep
405
- when RIGHT_BRACKET
406
- end_container(:array)
407
365
  when RIGHT_BRACE
408
366
  end_container(:object)
367
+ when RIGHT_BRACKET
368
+ end_container(:array)
409
369
  when WS
410
370
  # ignore
411
371
  else
412
372
  error('Expected comma or object or array close')
413
373
  end
414
374
  when :value_sep
415
- if @stack.last == :object
375
+ if @stack[-1] == :object
416
376
  case ch
417
377
  when QUOTE
418
378
  @state = :start_string
@@ -431,6 +391,46 @@ module JSON
431
391
  end
432
392
  end
433
393
 
394
+ # Drain any remaining buffered characters into the parser to complete
395
+ # the parsing of the document.
396
+ #
397
+ # This is only required when parsing a document containing a single
398
+ # numeric value, integer or float. The parser has no other way to
399
+ # detect when it should no longer expect additional characters with
400
+ # which to complete the parse, so it must be signaled by a call to
401
+ # this method.
402
+ #
403
+ # If you're parsing more typical object or array documents, there's no
404
+ # need to call `finish` because the parse will complete when the final
405
+ # closing `]` or `}` character is scanned.
406
+ #
407
+ # Raises a JSON::Stream::ParserError if the JSON data is malformed.
408
+ #
409
+ # Returns nothing.
410
+ def finish
411
+ # Partial multi-byte character waiting for completion bytes.
412
+ error('Unexpected end-of-file') unless @utf8.empty?
413
+
414
+ # Partial array, object, or string.
415
+ error('Unexpected end-of-file') unless @stack.empty?
416
+
417
+ case @state
418
+ when :end_document
419
+ # done, do nothing
420
+ when :in_float
421
+ end_value(@buf.to_f)
422
+ when :in_exponent
423
+ error('Unexpected end-of-file') unless @buf =~ DIGIT_END
424
+ end_value(@buf.to_f)
425
+ when :start_zero
426
+ end_value(@buf.to_i)
427
+ when :start_int
428
+ end_value(@buf.to_i)
429
+ else
430
+ error('Unexpected end-of-file')
431
+ end
432
+ end
433
+
434
434
  private
435
435
 
436
436
  # Invoke all registered observer procs for the event type.
@@ -2,6 +2,6 @@
2
2
 
3
3
  module JSON
4
4
  module Stream
5
- VERSION = '0.2.0'
5
+ VERSION = '0.2.1'
6
6
  end
7
7
  end
@@ -51,6 +51,13 @@ describe JSON::Stream::Buffer do
51
51
  assert_equal "\u{10102}", subject << "\x82"
52
52
  end
53
53
 
54
+ it 'rejects valid utf-8 followed by partial two byte sequence' do
55
+ assert_equal '[', subject << '['
56
+ assert_equal '"', subject << '"'
57
+ assert_equal '', subject << "\xC3"
58
+ -> { subject << '"' }.must_raise JSON::Stream::ParserError
59
+ end
60
+
54
61
  it 'rejects invalid two byte start characters' do
55
62
  -> { subject << "\xC3\xC3" }.must_raise JSON::Stream::ParserError
56
63
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Graham
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-06 00:00:00.000000000 Z
11
+ date: 2014-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake