json-stream 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -0
- data/README +82 -0
- data/Rakefile +38 -0
- data/lib/json/stream.rb +15 -0
- data/lib/json/stream/buffer.rb +63 -0
- data/lib/json/stream/builder.rb +92 -0
- data/lib/json/stream/parser.rb +430 -0
- data/test/buffer_test.rb +87 -0
- data/test/builder_test.rb +123 -0
- data/test/parser_test.rb +451 -0
- metadata +84 -0
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2010 David Graham
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
== Welcome to JSON::Stream
|
2
|
+
|
3
|
+
JSON::Stream is a finite state machine based JSON parser that generates events
|
4
|
+
for each state change. This allows us to stream both the JSON document into
|
5
|
+
memory and the parsed object graph out of memory to some other process. This
|
6
|
+
is much like an XML SAX parser that generates events during parsing. There is
|
7
|
+
no requirement for the document nor the object graph to be fully buffered in
|
8
|
+
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
|
+
For example, streaming and processing large map/reduce views from Apache CouchDB.
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
The simplest way to parse is to read the full JSON document into memory
|
14
|
+
and then parse it into a full object graph. This is fine for small documents
|
15
|
+
because we have room for both the document and parsed object in memory.
|
16
|
+
|
17
|
+
require 'json/stream'
|
18
|
+
json = File.read('/tmp/test.json')
|
19
|
+
obj = JSON::Stream::Parser.parse(json)
|
20
|
+
|
21
|
+
While it's possible to do this with JSON::Stream, we really want to use the json
|
22
|
+
gem for documents like this. JSON.parse() is much faster than this parser
|
23
|
+
because it can rely on having the entire document in memory to analyze.
|
24
|
+
|
25
|
+
For larger documents we can use an IO object to stream it into the parser.
|
26
|
+
We still need room for the parsed object, but the document itself is never
|
27
|
+
fully read into memory.
|
28
|
+
|
29
|
+
require 'json/stream'
|
30
|
+
stream = File.open('/tmp/test.json')
|
31
|
+
obj = JSON::Stream::Parser.parse(stream)
|
32
|
+
|
33
|
+
Again, while we can do this with JSON::Stream, if we just need to stream the
|
34
|
+
document from disk or the network, we're better off using the yajl-ruby gem.
|
35
|
+
|
36
|
+
Huge documents arriving over the network in small chunks to an EventMachine
|
37
|
+
receive_data loop is where JSON::Stream is really useful. Inside our
|
38
|
+
EventMachine::Connection subclass we might have:
|
39
|
+
|
40
|
+
def post_init
|
41
|
+
@parser = JSON::Stream::Parser.new do
|
42
|
+
start_document { puts "start document" }
|
43
|
+
end_document { puts "end document" }
|
44
|
+
start_object { puts "start object" }
|
45
|
+
end_object { puts "end object" }
|
46
|
+
start_array { puts "start array" }
|
47
|
+
end_array { puts "end array" }
|
48
|
+
key {|k| puts "key: #{k}" }
|
49
|
+
value {|v| puts "value: #{v}" }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def receive_data(data)
|
54
|
+
begin
|
55
|
+
@parser << data
|
56
|
+
rescue JSON::Stream::ParserError => e
|
57
|
+
close_connection
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
Notice how the parser accepts chunks of the JSON document and parses up
|
62
|
+
up to the end of the available buffer. Passing in more data resumes the
|
63
|
+
parse from the prior state. When an interesting state change happens, the
|
64
|
+
parser notifies all registered callback procs of the event.
|
65
|
+
|
66
|
+
The event callback is where we can do interesting data filtering and passing
|
67
|
+
to other processes. The above example simply prints state changes, but
|
68
|
+
imagine the callbacks looking for an array named "rows" and processing sets
|
69
|
+
of these row objects in small batches. We can process millions of rows streaming
|
70
|
+
over the network in constant memory space this way.
|
71
|
+
|
72
|
+
== Dependencies
|
73
|
+
|
74
|
+
* ruby >= 1.9.1
|
75
|
+
|
76
|
+
== Contact
|
77
|
+
|
78
|
+
Project contact: David Graham <david.malcom.graham@gmail.com>
|
79
|
+
|
80
|
+
== License
|
81
|
+
|
82
|
+
JSON::Stream is released under the MIT license. Check the LICENSE file for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/clean'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), 'lib', 'json', 'stream')
|
6
|
+
|
7
|
+
spec = Gem::Specification.new do |s|
|
8
|
+
s.name = "json-stream"
|
9
|
+
s.version = JSON::Stream::VERSION
|
10
|
+
s.date = Time.now.strftime("%Y-%m-%d")
|
11
|
+
s.summary = "A streaming JSON parser that generates SAX-like events."
|
12
|
+
s.description = "A finite state machine based JSON parser that generates events
|
13
|
+
for each state change. This allows us to stream both the JSON document into
|
14
|
+
memory and the parsed object graph out of memory to some other process. This
|
15
|
+
is much like an XML SAX parser that generates events during parsing. There is
|
16
|
+
no requirement for the document nor the object graph to be fully buffered in
|
17
|
+
memory. This is best suited for huge JSON documents that won't fit in memory.
|
18
|
+
For example, streaming and processing large map/reduce views from Apache CouchDB."
|
19
|
+
s.email = "david.malcom.graham@gmail.com"
|
20
|
+
s.homepage = "http://github.com/dgraham/json-stream"
|
21
|
+
s.authors = ["David Graham"]
|
22
|
+
s.files = FileList['LICENSE', 'README', 'Rakefile', "{lib}/**/*"].to_a
|
23
|
+
s.require_path = "lib"
|
24
|
+
s.test_files = FileList["{test}/**/*test.rb"].to_a
|
25
|
+
s.has_rdoc = true
|
26
|
+
s.required_ruby_version = '>= 1.9.1'
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
30
|
+
pkg.need_tar = true
|
31
|
+
end
|
32
|
+
|
33
|
+
Rake::TestTask.new(:test) do |test|
|
34
|
+
test.pattern = 'test/**/*_test.rb'
|
35
|
+
test.warning = true
|
36
|
+
end
|
37
|
+
|
38
|
+
task :default => [:clobber, :test, :gem]
|
data/lib/json/stream.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.dirname(__FILE__) unless
|
4
|
+
$:.include?(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
require 'stringio'
|
7
|
+
require 'stream/buffer'
|
8
|
+
require 'stream/builder'
|
9
|
+
require 'stream/parser'
|
10
|
+
|
11
|
+
module JSON
|
12
|
+
module Stream
|
13
|
+
VERSION = "0.1.0"
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
# A character buffer that expects a UTF-8 encoded stream of bytes.
|
7
|
+
# This handles truncated multi-byte characters properly so we can just
|
8
|
+
# feed it binary data and receive a properly formatted UTF-8 String as
|
9
|
+
# output. See here for UTF-8 parsing details:
|
10
|
+
# http://en.wikipedia.org/wiki/UTF-8
|
11
|
+
# http://tools.ietf.org/html/rfc3629#section-3
|
12
|
+
class Buffer
|
13
|
+
def initialize
|
14
|
+
@state, @buf, @need = :start, [], 0
|
15
|
+
end
|
16
|
+
|
17
|
+
# Fill the buffer with a String of binary UTF-8 encoded bytes. Returns
|
18
|
+
# as much of the data in a UTF-8 String as we have. Truncated multi-byte
|
19
|
+
# characters are saved in the buffer until the next call to this method
|
20
|
+
# where we expect to receive the rest of the multi-byte character.
|
21
|
+
def <<(data)
|
22
|
+
bytes = []
|
23
|
+
data.bytes.each do |b|
|
24
|
+
case @state
|
25
|
+
when :start
|
26
|
+
if b < 128
|
27
|
+
bytes << b
|
28
|
+
elsif b >= 192
|
29
|
+
@state = :multi_byte
|
30
|
+
@buf << b
|
31
|
+
@need = case
|
32
|
+
when b >= 240 then 4
|
33
|
+
when b >= 224 then 3
|
34
|
+
when b >= 192 then 2 end
|
35
|
+
else
|
36
|
+
error('Expected start of multi-byte or single byte char')
|
37
|
+
end
|
38
|
+
when :multi_byte
|
39
|
+
if b > 127 && b < 192
|
40
|
+
@buf << b
|
41
|
+
if @buf.size == @need
|
42
|
+
bytes += @buf.slice!(0, @buf.size)
|
43
|
+
@state = :start
|
44
|
+
end
|
45
|
+
else
|
46
|
+
error('Expected continuation byte')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
|
51
|
+
error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def error(message)
|
58
|
+
raise ParserError, message
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
# A parser listener that builds a full, in memory, object graph from a
|
6
|
+
# JSON document. Typically, we would use the json gem's JSON.parse() method
|
7
|
+
# when we have the full JSON document because it's much faster than this.
|
8
|
+
# JSON::Stream is typically used when we have a huge JSON document streaming
|
9
|
+
# to us and we don't want to hold the entire parsed object in memory.
|
10
|
+
# Regardless, this is a good example of how to write parser callbacks.
|
11
|
+
#
|
12
|
+
# parser = JSON::Stream::Parser.new
|
13
|
+
# builder = JSON::Stream::Builder.new(parser)
|
14
|
+
# parser << json
|
15
|
+
# obj = builder.result
|
16
|
+
class Builder
|
17
|
+
METHODS = %w[start_document end_document start_object end_object start_array end_array key value]
|
18
|
+
|
19
|
+
attr_reader :result
|
20
|
+
|
21
|
+
def initialize(parser)
|
22
|
+
METHODS.each do |name|
|
23
|
+
parser.send(name, &method(name))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_document
|
28
|
+
@stack, @result = [], nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_document
|
32
|
+
@result = @stack.pop.obj
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_object
|
36
|
+
@stack.push(ObjectNode.new)
|
37
|
+
end
|
38
|
+
|
39
|
+
def end_object
|
40
|
+
unless @stack.size == 1
|
41
|
+
node = @stack.pop
|
42
|
+
@stack[-1] << node.obj
|
43
|
+
end
|
44
|
+
end
|
45
|
+
alias :end_array :end_object
|
46
|
+
|
47
|
+
def start_array
|
48
|
+
@stack.push(ArrayNode.new)
|
49
|
+
end
|
50
|
+
|
51
|
+
def key(key)
|
52
|
+
@stack[-1] << key
|
53
|
+
end
|
54
|
+
|
55
|
+
def value(value)
|
56
|
+
@stack[-1] << value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class ArrayNode
|
61
|
+
attr_reader :obj
|
62
|
+
|
63
|
+
def initialize
|
64
|
+
@obj = []
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<(node)
|
68
|
+
@obj << node
|
69
|
+
self
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class ObjectNode
|
74
|
+
attr_reader :obj
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@obj, @key = {}, nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def <<(node)
|
81
|
+
if @key
|
82
|
+
@obj[@key] = node
|
83
|
+
@key = nil
|
84
|
+
else
|
85
|
+
@key = node
|
86
|
+
end
|
87
|
+
self
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,430 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
class ParserError < RuntimeError; end
|
7
|
+
|
8
|
+
# A streaming JSON parser that generates SAX-like events for
|
9
|
+
# state changes. Use the json gem for small documents. Use this
|
10
|
+
# for huge documents that won't fit in memory.
|
11
|
+
class Parser
|
12
|
+
BUF_SIZE = 512
|
13
|
+
CONTROL = /[[:cntrl:]]/
|
14
|
+
WS = /\s/
|
15
|
+
HEX = /[0-9a-fA-F]/
|
16
|
+
DIGIT = /[0-9]/
|
17
|
+
DIGIT_1_9 = /[1-9]/
|
18
|
+
DIGIT_END = /\d$/
|
19
|
+
TRUE_RE = /[rue]/
|
20
|
+
FALSE_RE = /[alse]/
|
21
|
+
NULL_RE = /[ul]/
|
22
|
+
TRUE_KEYWORD = 'true'
|
23
|
+
FALSE_KEYWORD = 'false'
|
24
|
+
NULL_KEYWORD = 'null'
|
25
|
+
LEFT_BRACE = '{'
|
26
|
+
RIGHT_BRACE = '}'
|
27
|
+
LEFT_BRACKET = '['
|
28
|
+
RIGHT_BRACKET = ']'
|
29
|
+
BACKSLASH = '\\'
|
30
|
+
SLASH = '/'
|
31
|
+
QUOTE = '"'
|
32
|
+
COMMA = ','
|
33
|
+
COLON = ':'
|
34
|
+
ZERO = '0'
|
35
|
+
MINUS = '-'
|
36
|
+
PLUS = '+'
|
37
|
+
POINT = '.'
|
38
|
+
EXPONENT = /[eE]/
|
39
|
+
B,F,N,R,T,U = %w[b f n r t u]
|
40
|
+
|
41
|
+
# Parses a full JSON document from a String or an IO stream and returns
|
42
|
+
# the parsed object graph. For parsing small JSON documents with small
|
43
|
+
# memory requirements, use the json gem's faster JSON.parse method instead.
|
44
|
+
def self.parse(json)
|
45
|
+
stream = json.is_a?(String) ? StringIO.new(json) : json
|
46
|
+
parser = Parser.new
|
47
|
+
builder = Builder.new(parser)
|
48
|
+
while (buf = stream.read(BUF_SIZE)) != nil
|
49
|
+
parser << buf
|
50
|
+
end
|
51
|
+
raise ParserError, "unexpected eof" unless builder.result
|
52
|
+
builder.result
|
53
|
+
ensure
|
54
|
+
stream.close
|
55
|
+
end
|
56
|
+
|
57
|
+
%w[start_document end_document start_object end_object
|
58
|
+
start_array end_array key value].each do |name|
|
59
|
+
|
60
|
+
define_method(name) do |&block|
|
61
|
+
@listeners[name] << block
|
62
|
+
end
|
63
|
+
|
64
|
+
define_method("notify_#{name}") do |*args|
|
65
|
+
@listeners[name].each do |block|
|
66
|
+
block.call(*args)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
private "notify_#{name}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Create a new parser with an optional initialization block where
|
73
|
+
# we can register event callbacks. For example:
|
74
|
+
# parser = JSON::Stream::Parser.new do
|
75
|
+
# start_document { puts "start document" }
|
76
|
+
# end_document { puts "end document" }
|
77
|
+
# start_object { puts "start object" }
|
78
|
+
# end_object { puts "end object" }
|
79
|
+
# start_array { puts "start array" }
|
80
|
+
# end_array { puts "end array" }
|
81
|
+
# key {|k| puts "key: #{k}" }
|
82
|
+
# value {|v| puts "value: #{v}" }
|
83
|
+
# end
|
84
|
+
def initialize(&block)
|
85
|
+
@state = :start_document
|
86
|
+
@utf8 = Buffer.new
|
87
|
+
@listeners = Hash.new {|h, k| h[k] = [] }
|
88
|
+
@stack, @unicode, @buf, @pos = [], "", "", -1
|
89
|
+
instance_eval(&block) if block_given?
|
90
|
+
end
|
91
|
+
|
92
|
+
# Pass data into the parser to advance the state machine and
|
93
|
+
# generate callback events. This is well suited for an EventMachine
|
94
|
+
# receive_data loop.
|
95
|
+
def <<(data)
|
96
|
+
(@utf8 << data).each_char do |ch|
|
97
|
+
@pos += 1
|
98
|
+
case @state
|
99
|
+
when :start_document
|
100
|
+
case ch
|
101
|
+
when LEFT_BRACE
|
102
|
+
@state = :start_object
|
103
|
+
@stack.push(:object)
|
104
|
+
notify_start_document
|
105
|
+
notify_start_object
|
106
|
+
when LEFT_BRACKET
|
107
|
+
@state = :start_array
|
108
|
+
@stack.push(:array)
|
109
|
+
notify_start_document
|
110
|
+
notify_start_array
|
111
|
+
when WS
|
112
|
+
# ignore
|
113
|
+
else
|
114
|
+
error("Expected object or array start")
|
115
|
+
end
|
116
|
+
when :start_object
|
117
|
+
case ch
|
118
|
+
when RIGHT_BRACE
|
119
|
+
end_container(:object)
|
120
|
+
when QUOTE
|
121
|
+
@state = :start_string
|
122
|
+
@stack.push(:key)
|
123
|
+
when WS
|
124
|
+
# ignore
|
125
|
+
else
|
126
|
+
error("Expected object key start")
|
127
|
+
end
|
128
|
+
when :start_string
|
129
|
+
case ch
|
130
|
+
when QUOTE
|
131
|
+
if @stack.pop == :string
|
132
|
+
@state = :end_value
|
133
|
+
notify_value(@buf)
|
134
|
+
else # :key
|
135
|
+
@state = :end_key
|
136
|
+
notify_key(@buf)
|
137
|
+
end
|
138
|
+
@buf = ""
|
139
|
+
when BACKSLASH
|
140
|
+
@state = :start_escape
|
141
|
+
when CONTROL
|
142
|
+
error('Control characters must be escaped')
|
143
|
+
else
|
144
|
+
@buf << ch
|
145
|
+
end
|
146
|
+
when :start_escape
|
147
|
+
case ch
|
148
|
+
when QUOTE, BACKSLASH, SLASH
|
149
|
+
@buf << ch
|
150
|
+
@state = :start_string
|
151
|
+
when B
|
152
|
+
@buf << "\b"
|
153
|
+
@state = :start_string
|
154
|
+
when F
|
155
|
+
@buf << "\f"
|
156
|
+
@state = :start_string
|
157
|
+
when N
|
158
|
+
@buf << "\n"
|
159
|
+
@state = :start_string
|
160
|
+
when R
|
161
|
+
@buf << "\r"
|
162
|
+
@state = :start_string
|
163
|
+
when T
|
164
|
+
@buf << "\t"
|
165
|
+
@state = :start_string
|
166
|
+
when U
|
167
|
+
@state = :unicode_escape
|
168
|
+
else
|
169
|
+
error("Expected escaped character")
|
170
|
+
end
|
171
|
+
when :unicode_escape
|
172
|
+
case ch
|
173
|
+
when HEX
|
174
|
+
@unicode << ch
|
175
|
+
if @unicode.size == 4
|
176
|
+
codepoint = @unicode.slice!(0, 4).hex
|
177
|
+
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
178
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
|
179
|
+
@state = :start_surrogate_pair
|
180
|
+
@stack.push(codepoint)
|
181
|
+
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
182
|
+
high = @stack.pop
|
183
|
+
error('Expected high surrogate pair half') unless high.is_a?(Fixnum)
|
184
|
+
pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
|
185
|
+
@buf << pair
|
186
|
+
@state = :start_string
|
187
|
+
else
|
188
|
+
@buf << codepoint
|
189
|
+
@state = :start_string
|
190
|
+
end
|
191
|
+
end
|
192
|
+
else
|
193
|
+
error('Expected unicode escape hex digit')
|
194
|
+
end
|
195
|
+
when :start_surrogate_pair
|
196
|
+
case ch
|
197
|
+
when BACKSLASH
|
198
|
+
@state = :start_surrogate_pair_u
|
199
|
+
else
|
200
|
+
error('Expected low surrogate pair half')
|
201
|
+
end
|
202
|
+
when :start_surrogate_pair_u
|
203
|
+
case ch
|
204
|
+
when U
|
205
|
+
@state = :unicode_escape
|
206
|
+
else
|
207
|
+
error('Expected low surrogate pair half')
|
208
|
+
end
|
209
|
+
when :start_negative_number
|
210
|
+
case ch
|
211
|
+
when ZERO
|
212
|
+
@state = :start_zero
|
213
|
+
@buf << ch
|
214
|
+
when DIGIT_1_9
|
215
|
+
@state = :start_int
|
216
|
+
@buf << ch
|
217
|
+
else
|
218
|
+
error('Expected 0-9 digit')
|
219
|
+
end
|
220
|
+
when :start_zero
|
221
|
+
case ch
|
222
|
+
when POINT
|
223
|
+
@state = :start_float
|
224
|
+
@buf << ch
|
225
|
+
when EXPONENT
|
226
|
+
@state = :start_exponent
|
227
|
+
@buf << ch
|
228
|
+
else
|
229
|
+
@state = :end_value
|
230
|
+
notify_value(@buf.to_i)
|
231
|
+
@buf = ""
|
232
|
+
@pos -= 1
|
233
|
+
redo
|
234
|
+
end
|
235
|
+
when :start_float
|
236
|
+
case ch
|
237
|
+
when DIGIT
|
238
|
+
@state = :in_float
|
239
|
+
@buf << ch
|
240
|
+
else
|
241
|
+
error('Expected 0-9 digit')
|
242
|
+
end
|
243
|
+
when :in_float
|
244
|
+
case ch
|
245
|
+
when DIGIT
|
246
|
+
@buf << ch
|
247
|
+
when EXPONENT
|
248
|
+
@state = :start_exponent
|
249
|
+
@buf << ch
|
250
|
+
else
|
251
|
+
@state = :end_value
|
252
|
+
notify_value(@buf.to_f)
|
253
|
+
@buf = ""
|
254
|
+
@pos -= 1
|
255
|
+
redo
|
256
|
+
end
|
257
|
+
when :start_exponent
|
258
|
+
case ch
|
259
|
+
when MINUS, PLUS, DIGIT
|
260
|
+
@state = :in_exponent
|
261
|
+
@buf << ch
|
262
|
+
else
|
263
|
+
error('Expected +, -, or 0-9 digit')
|
264
|
+
end
|
265
|
+
when :in_exponent
|
266
|
+
case ch
|
267
|
+
when DIGIT
|
268
|
+
@buf << ch
|
269
|
+
else
|
270
|
+
error('Expected 0-9 digit') unless @buf =~ DIGIT_END
|
271
|
+
@state = :end_value
|
272
|
+
num = @buf.include?('.') ? @buf.to_f : @buf.to_i
|
273
|
+
notify_value(num)
|
274
|
+
@buf = ""
|
275
|
+
@pos -= 1
|
276
|
+
redo
|
277
|
+
end
|
278
|
+
when :start_int
|
279
|
+
case ch
|
280
|
+
when DIGIT
|
281
|
+
@buf << ch
|
282
|
+
when POINT
|
283
|
+
@state = :start_float
|
284
|
+
@buf << ch
|
285
|
+
when EXPONENT
|
286
|
+
@state = :start_exponent
|
287
|
+
@buf << ch
|
288
|
+
else
|
289
|
+
@state = :end_value
|
290
|
+
notify_value(@buf.to_i)
|
291
|
+
@buf = ""
|
292
|
+
@pos -= 1
|
293
|
+
redo
|
294
|
+
end
|
295
|
+
when :start_true
|
296
|
+
keyword(TRUE_KEYWORD, true, TRUE_RE, ch)
|
297
|
+
when :start_false
|
298
|
+
keyword(FALSE_KEYWORD, false, FALSE_RE, ch)
|
299
|
+
when :start_null
|
300
|
+
keyword(NULL_KEYWORD, nil, NULL_RE, ch)
|
301
|
+
when :end_key
|
302
|
+
case ch
|
303
|
+
when COLON
|
304
|
+
@state = :key_sep
|
305
|
+
when WS
|
306
|
+
# ignore
|
307
|
+
else
|
308
|
+
error("Expected colon key separator")
|
309
|
+
end
|
310
|
+
when :key_sep
|
311
|
+
start_value(ch)
|
312
|
+
when :start_array
|
313
|
+
case ch
|
314
|
+
when RIGHT_BRACKET
|
315
|
+
end_container(:array)
|
316
|
+
when WS
|
317
|
+
# ignore
|
318
|
+
else
|
319
|
+
start_value(ch)
|
320
|
+
end
|
321
|
+
when :end_value
|
322
|
+
case ch
|
323
|
+
when COMMA
|
324
|
+
@state = :value_sep
|
325
|
+
when RIGHT_BRACKET
|
326
|
+
end_container(:array)
|
327
|
+
when RIGHT_BRACE
|
328
|
+
end_container(:object)
|
329
|
+
when WS
|
330
|
+
# ignore
|
331
|
+
else
|
332
|
+
error("Expected comma or object or array close")
|
333
|
+
end
|
334
|
+
when :value_sep
|
335
|
+
if @stack[-1] == :object
|
336
|
+
case ch
|
337
|
+
when QUOTE
|
338
|
+
@state = :start_string
|
339
|
+
@stack.push(:key)
|
340
|
+
when WS
|
341
|
+
# ignore
|
342
|
+
else
|
343
|
+
error("Expected object key start")
|
344
|
+
end
|
345
|
+
else
|
346
|
+
start_value(ch)
|
347
|
+
end
|
348
|
+
when :end_document
|
349
|
+
error("Unexpected data") unless ch =~ WS
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
private
|
355
|
+
|
356
|
+
def end_container(type)
|
357
|
+
@state = :end_value
|
358
|
+
if @stack.pop == type
|
359
|
+
send("notify_end_#{type}")
|
360
|
+
else
|
361
|
+
error("Expected end of #{type}")
|
362
|
+
end
|
363
|
+
if @stack.empty?
|
364
|
+
@state = :end_document
|
365
|
+
notify_end_document
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def keyword(word, value, re, ch)
|
370
|
+
if ch =~ re
|
371
|
+
@buf << ch
|
372
|
+
else
|
373
|
+
error("Expected #{word} keyword")
|
374
|
+
end
|
375
|
+
if @buf.size == word.size
|
376
|
+
if @buf == word
|
377
|
+
@state = :end_value
|
378
|
+
@buf = ""
|
379
|
+
notify_value(value)
|
380
|
+
else
|
381
|
+
error("Expected #{word} keyword")
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
def start_value(ch)
|
387
|
+
case ch
|
388
|
+
when LEFT_BRACE
|
389
|
+
@state = :start_object
|
390
|
+
@stack.push(:object)
|
391
|
+
notify_start_object
|
392
|
+
when LEFT_BRACKET
|
393
|
+
@state = :start_array
|
394
|
+
@stack.push(:array)
|
395
|
+
notify_start_array
|
396
|
+
when QUOTE
|
397
|
+
@state = :start_string
|
398
|
+
@stack.push(:string)
|
399
|
+
when T
|
400
|
+
@state = :start_true
|
401
|
+
@buf << ch
|
402
|
+
when F
|
403
|
+
@state = :start_false
|
404
|
+
@buf << ch
|
405
|
+
when N
|
406
|
+
@state = :start_null
|
407
|
+
@buf << ch
|
408
|
+
when MINUS
|
409
|
+
@state = :start_negative_number
|
410
|
+
@buf << ch
|
411
|
+
when ZERO
|
412
|
+
@state = :start_zero
|
413
|
+
@buf << ch
|
414
|
+
when DIGIT_1_9
|
415
|
+
@state = :start_int
|
416
|
+
@buf << ch
|
417
|
+
when WS
|
418
|
+
# ignore
|
419
|
+
else
|
420
|
+
error("Expected value")
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
def error(message)
|
425
|
+
raise ParserError, "#{message}: char #{@pos}"
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
end
|
430
|
+
end
|