json-stream 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +19 -0
- data/README +82 -0
- data/Rakefile +38 -0
- data/lib/json/stream.rb +15 -0
- data/lib/json/stream/buffer.rb +63 -0
- data/lib/json/stream/builder.rb +92 -0
- data/lib/json/stream/parser.rb +430 -0
- data/test/buffer_test.rb +87 -0
- data/test/builder_test.rb +123 -0
- data/test/parser_test.rb +451 -0
- metadata +84 -0
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2010 David Graham
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
== Welcome to JSON::Stream
|
2
|
+
|
3
|
+
JSON::Stream is a finite state machine based JSON parser that generates events
|
4
|
+
for each state change. This allows us to stream both the JSON document into
|
5
|
+
memory and the parsed object graph out of memory to some other process. This
|
6
|
+
is much like an XML SAX parser that generates events during parsing. There is
|
7
|
+
no requirement for the document nor the object graph to be fully buffered in
|
8
|
+
memory. This is best suited for huge JSON documents that won't fit in memory.
|
9
|
+
For example, streaming and processing large map/reduce views from Apache CouchDB.
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
The simplest way to parse is to read the full JSON document into memory
|
14
|
+
and then parse it into a full object graph. This is fine for small documents
|
15
|
+
because we have room for both the document and parsed object in memory.
|
16
|
+
|
17
|
+
require 'json/stream'
|
18
|
+
json = File.read('/tmp/test.json')
|
19
|
+
obj = JSON::Stream::Parser.parse(json)
|
20
|
+
|
21
|
+
While it's possible to do this with JSON::Stream, we really want to use the json
|
22
|
+
gem for documents like this. JSON.parse() is much faster than this parser
|
23
|
+
because it can rely on having the entire document in memory to analyze.
|
24
|
+
|
25
|
+
For larger documents we can use an IO object to stream it into the parser.
|
26
|
+
We still need room for the parsed object, but the document itself is never
|
27
|
+
fully read into memory.
|
28
|
+
|
29
|
+
require 'json/stream'
|
30
|
+
stream = File.open('/tmp/test.json')
|
31
|
+
obj = JSON::Stream::Parser.parse(stream)
|
32
|
+
|
33
|
+
Again, while we can do this with JSON::Stream, if we just need to stream the
|
34
|
+
document from disk or the network, we're better off using the yajl-ruby gem.
|
35
|
+
|
36
|
+
Huge documents arriving over the network in small chunks to an EventMachine
|
37
|
+
receive_data loop is where JSON::Stream is really useful. Inside our
|
38
|
+
EventMachine::Connection subclass we might have:
|
39
|
+
|
40
|
+
def post_init
|
41
|
+
@parser = JSON::Stream::Parser.new do
|
42
|
+
start_document { puts "start document" }
|
43
|
+
end_document { puts "end document" }
|
44
|
+
start_object { puts "start object" }
|
45
|
+
end_object { puts "end object" }
|
46
|
+
start_array { puts "start array" }
|
47
|
+
end_array { puts "end array" }
|
48
|
+
key {|k| puts "key: #{k}" }
|
49
|
+
value {|v| puts "value: #{v}" }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def receive_data(data)
|
54
|
+
begin
|
55
|
+
@parser << data
|
56
|
+
rescue JSON::Stream::ParserError => e
|
57
|
+
close_connection
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
Notice how the parser accepts chunks of the JSON document and parses up
|
62
|
+
up to the end of the available buffer. Passing in more data resumes the
|
63
|
+
parse from the prior state. When an interesting state change happens, the
|
64
|
+
parser notifies all registered callback procs of the event.
|
65
|
+
|
66
|
+
The event callback is where we can do interesting data filtering and passing
|
67
|
+
to other processes. The above example simply prints state changes, but
|
68
|
+
imagine the callbacks looking for an array named "rows" and processing sets
|
69
|
+
of these row objects in small batches. We can process millions of rows streaming
|
70
|
+
over the network in constant memory space this way.
|
71
|
+
|
72
|
+
== Dependencies
|
73
|
+
|
74
|
+
* ruby >= 1.9.1
|
75
|
+
|
76
|
+
== Contact
|
77
|
+
|
78
|
+
Project contact: David Graham <david.malcom.graham@gmail.com>
|
79
|
+
|
80
|
+
== License
|
81
|
+
|
82
|
+
JSON::Stream is released under the MIT license. Check the LICENSE file for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/clean'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), 'lib', 'json', 'stream')
|
6
|
+
|
7
|
+
spec = Gem::Specification.new do |s|
|
8
|
+
s.name = "json-stream"
|
9
|
+
s.version = JSON::Stream::VERSION
|
10
|
+
s.date = Time.now.strftime("%Y-%m-%d")
|
11
|
+
s.summary = "A streaming JSON parser that generates SAX-like events."
|
12
|
+
s.description = "A finite state machine based JSON parser that generates events
|
13
|
+
for each state change. This allows us to stream both the JSON document into
|
14
|
+
memory and the parsed object graph out of memory to some other process. This
|
15
|
+
is much like an XML SAX parser that generates events during parsing. There is
|
16
|
+
no requirement for the document nor the object graph to be fully buffered in
|
17
|
+
memory. This is best suited for huge JSON documents that won't fit in memory.
|
18
|
+
For example, streaming and processing large map/reduce views from Apache CouchDB."
|
19
|
+
s.email = "david.malcom.graham@gmail.com"
|
20
|
+
s.homepage = "http://github.com/dgraham/json-stream"
|
21
|
+
s.authors = ["David Graham"]
|
22
|
+
s.files = FileList['LICENSE', 'README', 'Rakefile', "{lib}/**/*"].to_a
|
23
|
+
s.require_path = "lib"
|
24
|
+
s.test_files = FileList["{test}/**/*test.rb"].to_a
|
25
|
+
s.has_rdoc = true
|
26
|
+
s.required_ruby_version = '>= 1.9.1'
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
30
|
+
pkg.need_tar = true
|
31
|
+
end
|
32
|
+
|
33
|
+
Rake::TestTask.new(:test) do |test|
|
34
|
+
test.pattern = 'test/**/*_test.rb'
|
35
|
+
test.warning = true
|
36
|
+
end
|
37
|
+
|
38
|
+
task :default => [:clobber, :test, :gem]
|
data/lib/json/stream.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.dirname(__FILE__) unless
|
4
|
+
$:.include?(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
require 'stringio'
|
7
|
+
require 'stream/buffer'
|
8
|
+
require 'stream/builder'
|
9
|
+
require 'stream/parser'
|
10
|
+
|
11
|
+
module JSON
|
12
|
+
module Stream
|
13
|
+
VERSION = "0.1.0"
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
# A character buffer that expects a UTF-8 encoded stream of bytes.
|
7
|
+
# This handles truncated multi-byte characters properly so we can just
|
8
|
+
# feed it binary data and receive a properly formatted UTF-8 String as
|
9
|
+
# output. See here for UTF-8 parsing details:
|
10
|
+
# http://en.wikipedia.org/wiki/UTF-8
|
11
|
+
# http://tools.ietf.org/html/rfc3629#section-3
|
12
|
+
class Buffer
|
13
|
+
def initialize
|
14
|
+
@state, @buf, @need = :start, [], 0
|
15
|
+
end
|
16
|
+
|
17
|
+
# Fill the buffer with a String of binary UTF-8 encoded bytes. Returns
|
18
|
+
# as much of the data in a UTF-8 String as we have. Truncated multi-byte
|
19
|
+
# characters are saved in the buffer until the next call to this method
|
20
|
+
# where we expect to receive the rest of the multi-byte character.
|
21
|
+
def <<(data)
|
22
|
+
bytes = []
|
23
|
+
data.bytes.each do |b|
|
24
|
+
case @state
|
25
|
+
when :start
|
26
|
+
if b < 128
|
27
|
+
bytes << b
|
28
|
+
elsif b >= 192
|
29
|
+
@state = :multi_byte
|
30
|
+
@buf << b
|
31
|
+
@need = case
|
32
|
+
when b >= 240 then 4
|
33
|
+
when b >= 224 then 3
|
34
|
+
when b >= 192 then 2 end
|
35
|
+
else
|
36
|
+
error('Expected start of multi-byte or single byte char')
|
37
|
+
end
|
38
|
+
when :multi_byte
|
39
|
+
if b > 127 && b < 192
|
40
|
+
@buf << b
|
41
|
+
if @buf.size == @need
|
42
|
+
bytes += @buf.slice!(0, @buf.size)
|
43
|
+
@state = :start
|
44
|
+
end
|
45
|
+
else
|
46
|
+
error('Expected continuation byte')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
|
51
|
+
error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def error(message)
|
58
|
+
raise ParserError, message
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
# A parser listener that builds a full, in memory, object graph from a
|
6
|
+
# JSON document. Typically, we would use the json gem's JSON.parse() method
|
7
|
+
# when we have the full JSON document because it's much faster than this.
|
8
|
+
# JSON::Stream is typically used when we have a huge JSON document streaming
|
9
|
+
# to us and we don't want to hold the entire parsed object in memory.
|
10
|
+
# Regardless, this is a good example of how to write parser callbacks.
|
11
|
+
#
|
12
|
+
# parser = JSON::Stream::Parser.new
|
13
|
+
# builder = JSON::Stream::Builder.new(parser)
|
14
|
+
# parser << json
|
15
|
+
# obj = builder.result
|
16
|
+
class Builder
|
17
|
+
METHODS = %w[start_document end_document start_object end_object start_array end_array key value]
|
18
|
+
|
19
|
+
attr_reader :result
|
20
|
+
|
21
|
+
def initialize(parser)
|
22
|
+
METHODS.each do |name|
|
23
|
+
parser.send(name, &method(name))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_document
|
28
|
+
@stack, @result = [], nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_document
|
32
|
+
@result = @stack.pop.obj
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_object
|
36
|
+
@stack.push(ObjectNode.new)
|
37
|
+
end
|
38
|
+
|
39
|
+
def end_object
|
40
|
+
unless @stack.size == 1
|
41
|
+
node = @stack.pop
|
42
|
+
@stack[-1] << node.obj
|
43
|
+
end
|
44
|
+
end
|
45
|
+
alias :end_array :end_object
|
46
|
+
|
47
|
+
def start_array
|
48
|
+
@stack.push(ArrayNode.new)
|
49
|
+
end
|
50
|
+
|
51
|
+
def key(key)
|
52
|
+
@stack[-1] << key
|
53
|
+
end
|
54
|
+
|
55
|
+
def value(value)
|
56
|
+
@stack[-1] << value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class ArrayNode
|
61
|
+
attr_reader :obj
|
62
|
+
|
63
|
+
def initialize
|
64
|
+
@obj = []
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<(node)
|
68
|
+
@obj << node
|
69
|
+
self
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class ObjectNode
|
74
|
+
attr_reader :obj
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@obj, @key = {}, nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def <<(node)
|
81
|
+
if @key
|
82
|
+
@obj[@key] = node
|
83
|
+
@key = nil
|
84
|
+
else
|
85
|
+
@key = node
|
86
|
+
end
|
87
|
+
self
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,430 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
class ParserError < RuntimeError; end
|
7
|
+
|
8
|
+
# A streaming JSON parser that generates SAX-like events for
|
9
|
+
# state changes. Use the json gem for small documents. Use this
|
10
|
+
# for huge documents that won't fit in memory.
|
11
|
+
class Parser
|
12
|
+
BUF_SIZE = 512
|
13
|
+
CONTROL = /[[:cntrl:]]/
|
14
|
+
WS = /\s/
|
15
|
+
HEX = /[0-9a-fA-F]/
|
16
|
+
DIGIT = /[0-9]/
|
17
|
+
DIGIT_1_9 = /[1-9]/
|
18
|
+
DIGIT_END = /\d$/
|
19
|
+
TRUE_RE = /[rue]/
|
20
|
+
FALSE_RE = /[alse]/
|
21
|
+
NULL_RE = /[ul]/
|
22
|
+
TRUE_KEYWORD = 'true'
|
23
|
+
FALSE_KEYWORD = 'false'
|
24
|
+
NULL_KEYWORD = 'null'
|
25
|
+
LEFT_BRACE = '{'
|
26
|
+
RIGHT_BRACE = '}'
|
27
|
+
LEFT_BRACKET = '['
|
28
|
+
RIGHT_BRACKET = ']'
|
29
|
+
BACKSLASH = '\\'
|
30
|
+
SLASH = '/'
|
31
|
+
QUOTE = '"'
|
32
|
+
COMMA = ','
|
33
|
+
COLON = ':'
|
34
|
+
ZERO = '0'
|
35
|
+
MINUS = '-'
|
36
|
+
PLUS = '+'
|
37
|
+
POINT = '.'
|
38
|
+
EXPONENT = /[eE]/
|
39
|
+
B,F,N,R,T,U = %w[b f n r t u]
|
40
|
+
|
41
|
+
# Parses a full JSON document from a String or an IO stream and returns
|
42
|
+
# the parsed object graph. For parsing small JSON documents with small
|
43
|
+
# memory requirements, use the json gem's faster JSON.parse method instead.
|
44
|
+
def self.parse(json)
|
45
|
+
stream = json.is_a?(String) ? StringIO.new(json) : json
|
46
|
+
parser = Parser.new
|
47
|
+
builder = Builder.new(parser)
|
48
|
+
while (buf = stream.read(BUF_SIZE)) != nil
|
49
|
+
parser << buf
|
50
|
+
end
|
51
|
+
raise ParserError, "unexpected eof" unless builder.result
|
52
|
+
builder.result
|
53
|
+
ensure
|
54
|
+
stream.close
|
55
|
+
end
|
56
|
+
|
57
|
+
%w[start_document end_document start_object end_object
|
58
|
+
start_array end_array key value].each do |name|
|
59
|
+
|
60
|
+
define_method(name) do |&block|
|
61
|
+
@listeners[name] << block
|
62
|
+
end
|
63
|
+
|
64
|
+
define_method("notify_#{name}") do |*args|
|
65
|
+
@listeners[name].each do |block|
|
66
|
+
block.call(*args)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
private "notify_#{name}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Create a new parser with an optional initialization block where
|
73
|
+
# we can register event callbacks. For example:
|
74
|
+
# parser = JSON::Stream::Parser.new do
|
75
|
+
# start_document { puts "start document" }
|
76
|
+
# end_document { puts "end document" }
|
77
|
+
# start_object { puts "start object" }
|
78
|
+
# end_object { puts "end object" }
|
79
|
+
# start_array { puts "start array" }
|
80
|
+
# end_array { puts "end array" }
|
81
|
+
# key {|k| puts "key: #{k}" }
|
82
|
+
# value {|v| puts "value: #{v}" }
|
83
|
+
# end
|
84
|
+
def initialize(&block)
|
85
|
+
@state = :start_document
|
86
|
+
@utf8 = Buffer.new
|
87
|
+
@listeners = Hash.new {|h, k| h[k] = [] }
|
88
|
+
@stack, @unicode, @buf, @pos = [], "", "", -1
|
89
|
+
instance_eval(&block) if block_given?
|
90
|
+
end
|
91
|
+
|
92
|
+
# Pass data into the parser to advance the state machine and
|
93
|
+
# generate callback events. This is well suited for an EventMachine
|
94
|
+
# receive_data loop.
|
95
|
+
def <<(data)
|
96
|
+
(@utf8 << data).each_char do |ch|
|
97
|
+
@pos += 1
|
98
|
+
case @state
|
99
|
+
when :start_document
|
100
|
+
case ch
|
101
|
+
when LEFT_BRACE
|
102
|
+
@state = :start_object
|
103
|
+
@stack.push(:object)
|
104
|
+
notify_start_document
|
105
|
+
notify_start_object
|
106
|
+
when LEFT_BRACKET
|
107
|
+
@state = :start_array
|
108
|
+
@stack.push(:array)
|
109
|
+
notify_start_document
|
110
|
+
notify_start_array
|
111
|
+
when WS
|
112
|
+
# ignore
|
113
|
+
else
|
114
|
+
error("Expected object or array start")
|
115
|
+
end
|
116
|
+
when :start_object
|
117
|
+
case ch
|
118
|
+
when RIGHT_BRACE
|
119
|
+
end_container(:object)
|
120
|
+
when QUOTE
|
121
|
+
@state = :start_string
|
122
|
+
@stack.push(:key)
|
123
|
+
when WS
|
124
|
+
# ignore
|
125
|
+
else
|
126
|
+
error("Expected object key start")
|
127
|
+
end
|
128
|
+
when :start_string
|
129
|
+
case ch
|
130
|
+
when QUOTE
|
131
|
+
if @stack.pop == :string
|
132
|
+
@state = :end_value
|
133
|
+
notify_value(@buf)
|
134
|
+
else # :key
|
135
|
+
@state = :end_key
|
136
|
+
notify_key(@buf)
|
137
|
+
end
|
138
|
+
@buf = ""
|
139
|
+
when BACKSLASH
|
140
|
+
@state = :start_escape
|
141
|
+
when CONTROL
|
142
|
+
error('Control characters must be escaped')
|
143
|
+
else
|
144
|
+
@buf << ch
|
145
|
+
end
|
146
|
+
when :start_escape
|
147
|
+
case ch
|
148
|
+
when QUOTE, BACKSLASH, SLASH
|
149
|
+
@buf << ch
|
150
|
+
@state = :start_string
|
151
|
+
when B
|
152
|
+
@buf << "\b"
|
153
|
+
@state = :start_string
|
154
|
+
when F
|
155
|
+
@buf << "\f"
|
156
|
+
@state = :start_string
|
157
|
+
when N
|
158
|
+
@buf << "\n"
|
159
|
+
@state = :start_string
|
160
|
+
when R
|
161
|
+
@buf << "\r"
|
162
|
+
@state = :start_string
|
163
|
+
when T
|
164
|
+
@buf << "\t"
|
165
|
+
@state = :start_string
|
166
|
+
when U
|
167
|
+
@state = :unicode_escape
|
168
|
+
else
|
169
|
+
error("Expected escaped character")
|
170
|
+
end
|
171
|
+
when :unicode_escape
|
172
|
+
case ch
|
173
|
+
when HEX
|
174
|
+
@unicode << ch
|
175
|
+
if @unicode.size == 4
|
176
|
+
codepoint = @unicode.slice!(0, 4).hex
|
177
|
+
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
178
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
|
179
|
+
@state = :start_surrogate_pair
|
180
|
+
@stack.push(codepoint)
|
181
|
+
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
182
|
+
high = @stack.pop
|
183
|
+
error('Expected high surrogate pair half') unless high.is_a?(Fixnum)
|
184
|
+
pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
|
185
|
+
@buf << pair
|
186
|
+
@state = :start_string
|
187
|
+
else
|
188
|
+
@buf << codepoint
|
189
|
+
@state = :start_string
|
190
|
+
end
|
191
|
+
end
|
192
|
+
else
|
193
|
+
error('Expected unicode escape hex digit')
|
194
|
+
end
|
195
|
+
when :start_surrogate_pair
|
196
|
+
case ch
|
197
|
+
when BACKSLASH
|
198
|
+
@state = :start_surrogate_pair_u
|
199
|
+
else
|
200
|
+
error('Expected low surrogate pair half')
|
201
|
+
end
|
202
|
+
when :start_surrogate_pair_u
|
203
|
+
case ch
|
204
|
+
when U
|
205
|
+
@state = :unicode_escape
|
206
|
+
else
|
207
|
+
error('Expected low surrogate pair half')
|
208
|
+
end
|
209
|
+
when :start_negative_number
|
210
|
+
case ch
|
211
|
+
when ZERO
|
212
|
+
@state = :start_zero
|
213
|
+
@buf << ch
|
214
|
+
when DIGIT_1_9
|
215
|
+
@state = :start_int
|
216
|
+
@buf << ch
|
217
|
+
else
|
218
|
+
error('Expected 0-9 digit')
|
219
|
+
end
|
220
|
+
when :start_zero
|
221
|
+
case ch
|
222
|
+
when POINT
|
223
|
+
@state = :start_float
|
224
|
+
@buf << ch
|
225
|
+
when EXPONENT
|
226
|
+
@state = :start_exponent
|
227
|
+
@buf << ch
|
228
|
+
else
|
229
|
+
@state = :end_value
|
230
|
+
notify_value(@buf.to_i)
|
231
|
+
@buf = ""
|
232
|
+
@pos -= 1
|
233
|
+
redo
|
234
|
+
end
|
235
|
+
when :start_float
|
236
|
+
case ch
|
237
|
+
when DIGIT
|
238
|
+
@state = :in_float
|
239
|
+
@buf << ch
|
240
|
+
else
|
241
|
+
error('Expected 0-9 digit')
|
242
|
+
end
|
243
|
+
when :in_float
|
244
|
+
case ch
|
245
|
+
when DIGIT
|
246
|
+
@buf << ch
|
247
|
+
when EXPONENT
|
248
|
+
@state = :start_exponent
|
249
|
+
@buf << ch
|
250
|
+
else
|
251
|
+
@state = :end_value
|
252
|
+
notify_value(@buf.to_f)
|
253
|
+
@buf = ""
|
254
|
+
@pos -= 1
|
255
|
+
redo
|
256
|
+
end
|
257
|
+
when :start_exponent
|
258
|
+
case ch
|
259
|
+
when MINUS, PLUS, DIGIT
|
260
|
+
@state = :in_exponent
|
261
|
+
@buf << ch
|
262
|
+
else
|
263
|
+
error('Expected +, -, or 0-9 digit')
|
264
|
+
end
|
265
|
+
when :in_exponent
|
266
|
+
case ch
|
267
|
+
when DIGIT
|
268
|
+
@buf << ch
|
269
|
+
else
|
270
|
+
error('Expected 0-9 digit') unless @buf =~ DIGIT_END
|
271
|
+
@state = :end_value
|
272
|
+
num = @buf.include?('.') ? @buf.to_f : @buf.to_i
|
273
|
+
notify_value(num)
|
274
|
+
@buf = ""
|
275
|
+
@pos -= 1
|
276
|
+
redo
|
277
|
+
end
|
278
|
+
when :start_int
|
279
|
+
case ch
|
280
|
+
when DIGIT
|
281
|
+
@buf << ch
|
282
|
+
when POINT
|
283
|
+
@state = :start_float
|
284
|
+
@buf << ch
|
285
|
+
when EXPONENT
|
286
|
+
@state = :start_exponent
|
287
|
+
@buf << ch
|
288
|
+
else
|
289
|
+
@state = :end_value
|
290
|
+
notify_value(@buf.to_i)
|
291
|
+
@buf = ""
|
292
|
+
@pos -= 1
|
293
|
+
redo
|
294
|
+
end
|
295
|
+
when :start_true
|
296
|
+
keyword(TRUE_KEYWORD, true, TRUE_RE, ch)
|
297
|
+
when :start_false
|
298
|
+
keyword(FALSE_KEYWORD, false, FALSE_RE, ch)
|
299
|
+
when :start_null
|
300
|
+
keyword(NULL_KEYWORD, nil, NULL_RE, ch)
|
301
|
+
when :end_key
|
302
|
+
case ch
|
303
|
+
when COLON
|
304
|
+
@state = :key_sep
|
305
|
+
when WS
|
306
|
+
# ignore
|
307
|
+
else
|
308
|
+
error("Expected colon key separator")
|
309
|
+
end
|
310
|
+
when :key_sep
|
311
|
+
start_value(ch)
|
312
|
+
when :start_array
|
313
|
+
case ch
|
314
|
+
when RIGHT_BRACKET
|
315
|
+
end_container(:array)
|
316
|
+
when WS
|
317
|
+
# ignore
|
318
|
+
else
|
319
|
+
start_value(ch)
|
320
|
+
end
|
321
|
+
when :end_value
|
322
|
+
case ch
|
323
|
+
when COMMA
|
324
|
+
@state = :value_sep
|
325
|
+
when RIGHT_BRACKET
|
326
|
+
end_container(:array)
|
327
|
+
when RIGHT_BRACE
|
328
|
+
end_container(:object)
|
329
|
+
when WS
|
330
|
+
# ignore
|
331
|
+
else
|
332
|
+
error("Expected comma or object or array close")
|
333
|
+
end
|
334
|
+
when :value_sep
|
335
|
+
if @stack[-1] == :object
|
336
|
+
case ch
|
337
|
+
when QUOTE
|
338
|
+
@state = :start_string
|
339
|
+
@stack.push(:key)
|
340
|
+
when WS
|
341
|
+
# ignore
|
342
|
+
else
|
343
|
+
error("Expected object key start")
|
344
|
+
end
|
345
|
+
else
|
346
|
+
start_value(ch)
|
347
|
+
end
|
348
|
+
when :end_document
|
349
|
+
error("Unexpected data") unless ch =~ WS
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
private
|
355
|
+
|
356
|
+
def end_container(type)
|
357
|
+
@state = :end_value
|
358
|
+
if @stack.pop == type
|
359
|
+
send("notify_end_#{type}")
|
360
|
+
else
|
361
|
+
error("Expected end of #{type}")
|
362
|
+
end
|
363
|
+
if @stack.empty?
|
364
|
+
@state = :end_document
|
365
|
+
notify_end_document
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def keyword(word, value, re, ch)
|
370
|
+
if ch =~ re
|
371
|
+
@buf << ch
|
372
|
+
else
|
373
|
+
error("Expected #{word} keyword")
|
374
|
+
end
|
375
|
+
if @buf.size == word.size
|
376
|
+
if @buf == word
|
377
|
+
@state = :end_value
|
378
|
+
@buf = ""
|
379
|
+
notify_value(value)
|
380
|
+
else
|
381
|
+
error("Expected #{word} keyword")
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
def start_value(ch)
|
387
|
+
case ch
|
388
|
+
when LEFT_BRACE
|
389
|
+
@state = :start_object
|
390
|
+
@stack.push(:object)
|
391
|
+
notify_start_object
|
392
|
+
when LEFT_BRACKET
|
393
|
+
@state = :start_array
|
394
|
+
@stack.push(:array)
|
395
|
+
notify_start_array
|
396
|
+
when QUOTE
|
397
|
+
@state = :start_string
|
398
|
+
@stack.push(:string)
|
399
|
+
when T
|
400
|
+
@state = :start_true
|
401
|
+
@buf << ch
|
402
|
+
when F
|
403
|
+
@state = :start_false
|
404
|
+
@buf << ch
|
405
|
+
when N
|
406
|
+
@state = :start_null
|
407
|
+
@buf << ch
|
408
|
+
when MINUS
|
409
|
+
@state = :start_negative_number
|
410
|
+
@buf << ch
|
411
|
+
when ZERO
|
412
|
+
@state = :start_zero
|
413
|
+
@buf << ch
|
414
|
+
when DIGIT_1_9
|
415
|
+
@state = :start_int
|
416
|
+
@buf << ch
|
417
|
+
when WS
|
418
|
+
# ignore
|
419
|
+
else
|
420
|
+
error("Expected value")
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
def error(message)
|
425
|
+
raise ParserError, "#{message}: char #{@pos}"
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
end
|
430
|
+
end
|