json-stream-path 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +29 -0
- data/Rakefile +16 -0
- data/json-stream-path.gemspec +24 -0
- data/lib/json/stream.rb +8 -0
- data/lib/json/stream/buffer.rb +63 -0
- data/lib/json/stream/builder.rb +106 -0
- data/lib/json/stream/j_path_tree.rb +44 -0
- data/lib/json/stream/parser.rb +545 -0
- data/lib/json/stream/path/version.rb +7 -0
- data/lib/json/stream/version.rb +7 -0
- data/test/buffer_test.rb +87 -0
- data/test/builder_test.rb +123 -0
- data/test/j_path_tree_test.rb +97 -0
- data/test/parser_test.rb +451 -0
- data/test/stream_example.json +28 -0
- data/test/stream_j_path_test.rb +114 -0
- metadata +128 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Manojs
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Json::Stream::Path
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'json-stream-path'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install json-stream-path
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |test|
|
5
|
+
test.libs << 'lib' << 'test'
|
6
|
+
test.pattern = 'test/*.rb'
|
7
|
+
test.verbose = true
|
8
|
+
end
|
9
|
+
#require 'rake/testtask'
|
10
|
+
#
|
11
|
+
#Rake::TestTask.new do |t|
|
12
|
+
# t.libs << 'test'
|
13
|
+
#end
|
14
|
+
#
|
15
|
+
#desc "Run tests"
|
16
|
+
#task :default => :test
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'json/stream/path/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "json-stream-path"
|
8
|
+
spec.version = Json::Stream::Path::VERSION
|
9
|
+
spec.authors = ["Manojs"]
|
10
|
+
spec.email = ["manojs.nitt@gmail.com"]
|
11
|
+
spec.description = %q{Gem desc}
|
12
|
+
spec.summary = %q{Gem summary}
|
13
|
+
spec.homepage = "https://github.com/bethink/json-stream-path"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "pry"
|
24
|
+
end
|
data/lib/json/stream.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
# A character buffer that expects a UTF-8 encoded stream of bytes.
|
7
|
+
# This handles truncated multi-byte characters properly so we can just
|
8
|
+
# feed it binary data and receive a properly formatted UTF-8 String as
|
9
|
+
# output. See here for UTF-8 parsing details:
|
10
|
+
# http://en.wikipedia.org/wiki/UTF-8
|
11
|
+
# http://tools.ietf.org/html/rfc3629#section-3
|
12
|
+
class Buffer
|
13
|
+
def initialize
|
14
|
+
@state, @buf, @need = :start, [], 0
|
15
|
+
end
|
16
|
+
|
17
|
+
# Fill the buffer with a String of binary UTF-8 encoded bytes. Returns
|
18
|
+
# as much of the data in a UTF-8 String as we have. Truncated multi-byte
|
19
|
+
# characters are saved in the buffer until the next call to this method
|
20
|
+
# where we expect to receive the rest of the multi-byte character.
|
21
|
+
def <<(data)
|
22
|
+
bytes = []
|
23
|
+
data.bytes.each do |b|
|
24
|
+
case @state
|
25
|
+
when :start
|
26
|
+
if b < 128
|
27
|
+
bytes << b
|
28
|
+
elsif b >= 192
|
29
|
+
@state = :multi_byte
|
30
|
+
@buf << b
|
31
|
+
@need = case
|
32
|
+
when b >= 240 then 4
|
33
|
+
when b >= 224 then 3
|
34
|
+
when b >= 192 then 2 end
|
35
|
+
else
|
36
|
+
error('Expected start of multi-byte or single byte char')
|
37
|
+
end
|
38
|
+
when :multi_byte
|
39
|
+
if b > 127 && b < 192
|
40
|
+
@buf << b
|
41
|
+
if @buf.size == @need
|
42
|
+
bytes += @buf.slice!(0, @buf.size)
|
43
|
+
@state = :start
|
44
|
+
end
|
45
|
+
else
|
46
|
+
error('Expected continuation byte')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
|
51
|
+
error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def error(message)
|
58
|
+
raise ParserError, message
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
# A parser listener that builds a full, in memory, object graph from a
|
6
|
+
# JSON document. Typically, we would use the json gem's JSON.parse() method
|
7
|
+
# when we have the full JSON document because it's much faster than this.
|
8
|
+
# JSON::Stream is typically used when we have a huge JSON document streaming
|
9
|
+
# to us and we don't want to hold the entire parsed object in memory.
|
10
|
+
# Regardless, this is a good example of how to write parser callbacks.
|
11
|
+
#
|
12
|
+
# parser = JSON::Stream::Parser.new
|
13
|
+
# builder = JSON::Stream::Builder.new(parser)
|
14
|
+
# parser << json
|
15
|
+
# obj = builder.result
|
16
|
+
class Builder
|
17
|
+
METHODS = %w[start_document end_document start_object end_object start_array end_array key value]
|
18
|
+
|
19
|
+
attr_reader :result
|
20
|
+
|
21
|
+
def initialize(parser)
|
22
|
+
METHODS.each do |name|
|
23
|
+
parser.send(name, &method(name))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_document
|
28
|
+
@stack, @result = [], nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_document
|
32
|
+
#puts "====EOD===== #{@stack.inspect} ========\n"
|
33
|
+
@result = @stack.pop.obj
|
34
|
+
end
|
35
|
+
|
36
|
+
def start_object
|
37
|
+
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
38
|
+
@stack.push(ObjectNode.new)
|
39
|
+
end
|
40
|
+
|
41
|
+
def end_object
|
42
|
+
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
43
|
+
unless @stack.size == 1
|
44
|
+
node = @stack.pop
|
45
|
+
@stack[-1] << node.obj
|
46
|
+
end
|
47
|
+
end
|
48
|
+
alias :end_array :end_object
|
49
|
+
|
50
|
+
def start_array
|
51
|
+
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
52
|
+
@stack.push(ArrayNode.new)
|
53
|
+
end
|
54
|
+
|
55
|
+
def key(key)
|
56
|
+
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
57
|
+
@stack[-1] << key
|
58
|
+
end
|
59
|
+
|
60
|
+
def value(value)
|
61
|
+
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
62
|
+
@stack[-1] << value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class ArrayNode
|
67
|
+
attr_reader :obj
|
68
|
+
|
69
|
+
def initialize
|
70
|
+
@obj = []
|
71
|
+
end
|
72
|
+
|
73
|
+
def <<(node)
|
74
|
+
@obj << node
|
75
|
+
self
|
76
|
+
end
|
77
|
+
|
78
|
+
def to_s
|
79
|
+
obj.inspect
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class ObjectNode
|
84
|
+
attr_reader :obj
|
85
|
+
|
86
|
+
def initialize
|
87
|
+
@obj, @key = {}, nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def <<(node)
|
91
|
+
if @key
|
92
|
+
@obj[@key] = node
|
93
|
+
@key = nil
|
94
|
+
else
|
95
|
+
@key = node
|
96
|
+
end
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
"OBJ Node: {#{@key.inspect}: #{@obj.inspect}}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
class JPathTree
|
2
|
+
attr_accessor :jpath, :tree
|
3
|
+
|
4
|
+
def initialize(jpath)
|
5
|
+
self.jpath = jpath
|
6
|
+
self.tree = (jpath.nil? or jpath.empty? or !jpath.match(/^\//)) ? nil : {}
|
7
|
+
self.tree || return
|
8
|
+
|
9
|
+
prev_key = nil
|
10
|
+
self.jpath.split('/')[1..-1].each do |name|
|
11
|
+
name = name.intern
|
12
|
+
self.tree[name] = {prev: prev_key, value: false, next: nil}
|
13
|
+
self.tree[prev_key][:next] = name if prev_key
|
14
|
+
prev_key = name
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def parsing_feasible?(key)
|
19
|
+
key = key.is_a?(String) ? key.intern : nil
|
20
|
+
return if key.nil? or self.tree.nil? or self.tree.empty?
|
21
|
+
|
22
|
+
node = self.tree[key]
|
23
|
+
|
24
|
+
return unless (node)
|
25
|
+
return if (node[:value])
|
26
|
+
|
27
|
+
prev_node = self.tree[node[:prev]]
|
28
|
+
next_node = self.tree[node[:next]]
|
29
|
+
|
30
|
+
if (next_node.nil?)
|
31
|
+
if (prev_node.nil? || prev_node[:value])
|
32
|
+
node[:value] = true
|
33
|
+
return true
|
34
|
+
end
|
35
|
+
elsif (prev_node.nil? || prev_node[:value])
|
36
|
+
node[:value] = true
|
37
|
+
return nil
|
38
|
+
else
|
39
|
+
return nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module JSON
|
4
|
+
module Stream
|
5
|
+
|
6
|
+
class ParserError < RuntimeError;
|
7
|
+
end
|
8
|
+
|
9
|
+
# A streaming JSON parser that generates SAX-like events for
|
10
|
+
# state changes. Use the json gem for small documents. Use this
|
11
|
+
# for huge documents that won't fit in memory.
|
12
|
+
class Parser
|
13
|
+
BUF_SIZE = 512
|
14
|
+
CONTROL = /[[:cntrl:]]/
|
15
|
+
WS = /\s/
|
16
|
+
HEX = /[0-9a-fA-F]/
|
17
|
+
DIGIT = /[0-9]/
|
18
|
+
DIGIT_1_9 = /[1-9]/
|
19
|
+
DIGIT_END = /\d$/
|
20
|
+
TRUE_RE = /[rue]/
|
21
|
+
FALSE_RE = /[alse]/
|
22
|
+
NULL_RE = /[ul]/
|
23
|
+
TRUE_KEYWORD = 'true'
|
24
|
+
FALSE_KEYWORD = 'false'
|
25
|
+
NULL_KEYWORD = 'null'
|
26
|
+
LEFT_BRACE = '{'
|
27
|
+
RIGHT_BRACE = '}'
|
28
|
+
LEFT_BRACKET = '['
|
29
|
+
RIGHT_BRACKET = ']'
|
30
|
+
BACKSLASH = '\\'
|
31
|
+
SLASH = '/'
|
32
|
+
QUOTE = '"'
|
33
|
+
COMMA = ','
|
34
|
+
COLON = ':'
|
35
|
+
ZERO = '0'
|
36
|
+
MINUS = '-'
|
37
|
+
PLUS = '+'
|
38
|
+
POINT = '.'
|
39
|
+
EXPONENT = /[eE]/
|
40
|
+
B, F, N, R, T, U = %w[b f n r t u]
|
41
|
+
|
42
|
+
# Parses a full JSON document from a String or an IO stream and returns
|
43
|
+
# the parsed object graph. For parsing small JSON documents with small
|
44
|
+
# memory requirements, use the json gem's faster JSON.parse method instead.
|
45
|
+
def self.parse(json)
|
46
|
+
stream = json.is_a?(String) ? StringIO.new(json) : json
|
47
|
+
parser = Parser.new
|
48
|
+
builder = Builder.new(parser)
|
49
|
+
while (buf = stream.read(BUF_SIZE)) != nil
|
50
|
+
parser << buf
|
51
|
+
end
|
52
|
+
raise ParserError, "unexpected eof" unless builder.result
|
53
|
+
builder.result
|
54
|
+
ensure
|
55
|
+
stream.close
|
56
|
+
end
|
57
|
+
|
58
|
+
# Create a new parser with an optional initialization block where
|
59
|
+
# we can register event callbacks. For example:
|
60
|
+
# parser = JSON::Stream::Parser.new do
|
61
|
+
# start_document { puts "start document" }
|
62
|
+
# end_document { puts "end document" }
|
63
|
+
# start_object { puts "start object" }
|
64
|
+
# end_object { puts "end object" }
|
65
|
+
# start_array { puts "start array" }
|
66
|
+
# end_array { puts "end array" }
|
67
|
+
# key {|k| puts "key: #{k}" }
|
68
|
+
# value {|v| puts "value: #{v}" }
|
69
|
+
# end
|
70
|
+
def initialize(&block)
|
71
|
+
@state = :start_document
|
72
|
+
@utf8 = Buffer.new
|
73
|
+
@listeners = Hash.new { |h, k| h[k] = [] }
|
74
|
+
@stack, @unicode, @buf, @pos = [], "", "", -1
|
75
|
+
@partial_stack = []
|
76
|
+
@jpath, @jpath_tree = nil, nil
|
77
|
+
@stop_parsing = nil
|
78
|
+
instance_eval(&block) if block_given?
|
79
|
+
end
|
80
|
+
|
81
|
+
def parse(json, jpath=nil)
|
82
|
+
jpath && jpath.strip!
|
83
|
+
|
84
|
+
if (jpath.nil? || jpath.match(/^\/$/) || jpath.empty?)
|
85
|
+
return self.class.parse(json)
|
86
|
+
end
|
87
|
+
|
88
|
+
@jpath = jpath
|
89
|
+
@jpath_tree = JPathTree.new(jpath)
|
90
|
+
stream = json.is_a?(String) ? StringIO.new(json) : json
|
91
|
+
builder = Builder.new(self)
|
92
|
+
while (buf = stream.read(BUF_SIZE)) != nil
|
93
|
+
self << buf
|
94
|
+
end
|
95
|
+
|
96
|
+
raise ParserError, "unexpected eof" unless builder.result
|
97
|
+
result = builder.result
|
98
|
+
(result && result.values.first) || nil
|
99
|
+
ensure
|
100
|
+
stream.close unless stream.nil?
|
101
|
+
end
|
102
|
+
|
103
|
+
%w[start_document end_document start_object end_object
|
104
|
+
start_array end_array key value].each do |name|
|
105
|
+
|
106
|
+
define_method(name) do |&block|
|
107
|
+
@listeners[name] << block
|
108
|
+
end
|
109
|
+
|
110
|
+
define_method("notify_#{name}") do |*args|
|
111
|
+
|
112
|
+
@listeners[name].each do |block|
|
113
|
+
#puts "----------------------------------------------"
|
114
|
+
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
115
|
+
#puts "----------------------------------------------\n"
|
116
|
+
|
117
|
+
name = name.intern
|
118
|
+
|
119
|
+
if (!@jpath) # If use not used jpath, it should parse whole file
|
120
|
+
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
121
|
+
block.call(*args)
|
122
|
+
else
|
123
|
+
|
124
|
+
if (!@jpath_tree.tree) # If jpath is invalid it should through error
|
125
|
+
class InvalidJSONPath < Exception;
|
126
|
+
end;
|
127
|
+
raise InvalidJSONPath.new "Invalid json path. Example: '/root/child'"
|
128
|
+
end
|
129
|
+
|
130
|
+
if (!@parsing_area and @jpath_tree.parsing_feasible?(args[0])) # Check whether passed args[0] is comes under parsable area
|
131
|
+
@parsing_area = true
|
132
|
+
end
|
133
|
+
|
134
|
+
if (name == :end_document) # An :end_document call is required
|
135
|
+
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
136
|
+
block.call(*args)
|
137
|
+
end
|
138
|
+
|
139
|
+
if (@parsing_area)
|
140
|
+
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
141
|
+
block.call(*args)
|
142
|
+
|
143
|
+
if (name == :key and @partial_stack.empty?) # Reached first key in the parsable JSON area
|
144
|
+
@partial_stack << :start_document
|
145
|
+
@partial_stack << :key
|
146
|
+
end
|
147
|
+
|
148
|
+
poped_partial_stack_value = @partial_stack[-1]
|
149
|
+
|
150
|
+
if ( poped_partial_stack_value == :key || poped_partial_stack_value == :start_array || poped_partial_stack_value == :start_object)
|
151
|
+
if (name == :start_array)
|
152
|
+
@partial_stack << :start_array
|
153
|
+
elsif (name == :start_object)
|
154
|
+
@partial_stack << :start_object
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
if (poped_partial_stack_value == :key)
|
159
|
+
#if (name == :start_array) # this commented code in all the elsif block moved up to reduce the code
|
160
|
+
# @partial_stack << :start_array
|
161
|
+
#elsif (name == :start_object)
|
162
|
+
# @partial_stack << :start_object
|
163
|
+
if (name == :value)
|
164
|
+
@parsing_area = false
|
165
|
+
@stop_parsing = true
|
166
|
+
@partial_stack.pop
|
167
|
+
end
|
168
|
+
elsif (poped_partial_stack_value == :start_array)
|
169
|
+
if (name == :end_array)
|
170
|
+
@partial_stack.pop
|
171
|
+
if (poped_partial_stack_value == :key)
|
172
|
+
@parsing_area = false
|
173
|
+
@stop_parsing = true
|
174
|
+
@partial_stack.pop
|
175
|
+
end
|
176
|
+
end
|
177
|
+
elsif (poped_partial_stack_value == :start_object)
|
178
|
+
if (name == :end_object)
|
179
|
+
@partial_stack.pop
|
180
|
+
if (poped_partial_stack_value == :key)
|
181
|
+
@parsing_area = false
|
182
|
+
@stop_parsing = true
|
183
|
+
@partial_stack.pop
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|
192
|
+
private "notify_#{name}"
|
193
|
+
end
|
194
|
+
|
195
|
+
# Pass data into the parser to advance the state machine and
|
196
|
+
# generate callback events. This is well suited for an EventMachine
|
197
|
+
# receive_data loop.
|
198
|
+
def <<(data)
|
199
|
+
(@utf8 << data).each_char do |ch|
|
200
|
+
|
201
|
+
if (@stop_parsing)
|
202
|
+
#notify_end_object
|
203
|
+
#notify_end_document
|
204
|
+
#end_container(:object)
|
205
|
+
puts "========== #{@stack.inspect} ========="
|
206
|
+
#print ch
|
207
|
+
#break
|
208
|
+
end
|
209
|
+
|
210
|
+
@pos += 1
|
211
|
+
case @state
|
212
|
+
when :start_document
|
213
|
+
case ch
|
214
|
+
when LEFT_BRACE
|
215
|
+
@state = :start_object
|
216
|
+
@stack.push(:object)
|
217
|
+
@parsing_area = true
|
218
|
+
notify_start_document
|
219
|
+
notify_start_object
|
220
|
+
@parsing_area = false
|
221
|
+
when LEFT_BRACKET
|
222
|
+
@state = :start_array
|
223
|
+
@stack.push(:array)
|
224
|
+
notify_start_document
|
225
|
+
notify_start_array
|
226
|
+
when WS
|
227
|
+
# ignore
|
228
|
+
else
|
229
|
+
error("Expected object or array start")
|
230
|
+
end
|
231
|
+
when :start_object
|
232
|
+
case ch
|
233
|
+
when RIGHT_BRACE
|
234
|
+
end_container(:object)
|
235
|
+
when QUOTE
|
236
|
+
@state = :start_string
|
237
|
+
@stack.push(:key)
|
238
|
+
when WS
|
239
|
+
# ignore
|
240
|
+
else
|
241
|
+
error("Expected object key start")
|
242
|
+
end
|
243
|
+
when :start_string
|
244
|
+
case ch
|
245
|
+
when QUOTE
|
246
|
+
if @stack.pop == :string
|
247
|
+
@state = :end_value
|
248
|
+
notify_value(@buf)
|
249
|
+
else # :key
|
250
|
+
@state = :end_key
|
251
|
+
notify_key(@buf)
|
252
|
+
end
|
253
|
+
@buf = ""
|
254
|
+
when BACKSLASH
|
255
|
+
@state = :start_escape
|
256
|
+
when CONTROL
|
257
|
+
error('Control characters must be escaped')
|
258
|
+
else
|
259
|
+
@buf << ch
|
260
|
+
end
|
261
|
+
when :start_escape
|
262
|
+
case ch
|
263
|
+
when QUOTE, BACKSLASH, SLASH
|
264
|
+
@buf << ch
|
265
|
+
@state = :start_string
|
266
|
+
when B
|
267
|
+
@buf << "\b"
|
268
|
+
@state = :start_string
|
269
|
+
when F
|
270
|
+
@buf << "\f"
|
271
|
+
@state = :start_string
|
272
|
+
when N
|
273
|
+
@buf << "\n"
|
274
|
+
@state = :start_string
|
275
|
+
when R
|
276
|
+
@buf << "\r"
|
277
|
+
@state = :start_string
|
278
|
+
when T
|
279
|
+
@buf << "\t"
|
280
|
+
@state = :start_string
|
281
|
+
when U
|
282
|
+
@state = :unicode_escape
|
283
|
+
else
|
284
|
+
error("Expected escaped character")
|
285
|
+
end
|
286
|
+
when :unicode_escape
|
287
|
+
case ch
|
288
|
+
when HEX
|
289
|
+
@unicode << ch
|
290
|
+
if @unicode.size == 4
|
291
|
+
codepoint = @unicode.slice!(0, 4).hex
|
292
|
+
if codepoint >= 0xD800 && codepoint <= 0xDBFF
|
293
|
+
error('Expected low surrogate pair half') if @stack[-1].is_a?(Fixnum)
|
294
|
+
@state = :start_surrogate_pair
|
295
|
+
@stack.push(codepoint)
|
296
|
+
elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
|
297
|
+
high = @stack.pop
|
298
|
+
error('Expected high surrogate pair half') unless high.is_a?(Fixnum)
|
299
|
+
pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
|
300
|
+
@buf << pair
|
301
|
+
@state = :start_string
|
302
|
+
else
|
303
|
+
@buf << codepoint
|
304
|
+
@state = :start_string
|
305
|
+
end
|
306
|
+
end
|
307
|
+
else
|
308
|
+
error('Expected unicode escape hex digit')
|
309
|
+
end
|
310
|
+
when :start_surrogate_pair
|
311
|
+
case ch
|
312
|
+
when BACKSLASH
|
313
|
+
@state = :start_surrogate_pair_u
|
314
|
+
else
|
315
|
+
error('Expected low surrogate pair half')
|
316
|
+
end
|
317
|
+
when :start_surrogate_pair_u
|
318
|
+
case ch
|
319
|
+
when U
|
320
|
+
@state = :unicode_escape
|
321
|
+
else
|
322
|
+
error('Expected low surrogate pair half')
|
323
|
+
end
|
324
|
+
when :start_negative_number
|
325
|
+
case ch
|
326
|
+
when ZERO
|
327
|
+
@state = :start_zero
|
328
|
+
@buf << ch
|
329
|
+
when DIGIT_1_9
|
330
|
+
@state = :start_int
|
331
|
+
@buf << ch
|
332
|
+
else
|
333
|
+
error('Expected 0-9 digit')
|
334
|
+
end
|
335
|
+
when :start_zero
|
336
|
+
case ch
|
337
|
+
when POINT
|
338
|
+
@state = :start_float
|
339
|
+
@buf << ch
|
340
|
+
when EXPONENT
|
341
|
+
@state = :start_exponent
|
342
|
+
@buf << ch
|
343
|
+
else
|
344
|
+
@state = :end_value
|
345
|
+
notify_value(@buf.to_i)
|
346
|
+
@buf = ""
|
347
|
+
@pos -= 1
|
348
|
+
redo
|
349
|
+
end
|
350
|
+
when :start_float
|
351
|
+
case ch
|
352
|
+
when DIGIT
|
353
|
+
@state = :in_float
|
354
|
+
@buf << ch
|
355
|
+
else
|
356
|
+
error('Expected 0-9 digit')
|
357
|
+
end
|
358
|
+
when :in_float
|
359
|
+
case ch
|
360
|
+
when DIGIT
|
361
|
+
@buf << ch
|
362
|
+
when EXPONENT
|
363
|
+
@state = :start_exponent
|
364
|
+
@buf << ch
|
365
|
+
else
|
366
|
+
@state = :end_value
|
367
|
+
notify_value(@buf.to_f)
|
368
|
+
@buf = ""
|
369
|
+
@pos -= 1
|
370
|
+
redo
|
371
|
+
end
|
372
|
+
when :start_exponent
|
373
|
+
case ch
|
374
|
+
when MINUS, PLUS, DIGIT
|
375
|
+
@state = :in_exponent
|
376
|
+
@buf << ch
|
377
|
+
else
|
378
|
+
error('Expected +, -, or 0-9 digit')
|
379
|
+
end
|
380
|
+
when :in_exponent
|
381
|
+
case ch
|
382
|
+
when DIGIT
|
383
|
+
@buf << ch
|
384
|
+
else
|
385
|
+
error('Expected 0-9 digit') unless @buf =~ DIGIT_END
|
386
|
+
@state = :end_value
|
387
|
+
num = @buf.include?('.') ? @buf.to_f : @buf.to_i
|
388
|
+
notify_value(num)
|
389
|
+
@buf = ""
|
390
|
+
@pos -= 1
|
391
|
+
redo
|
392
|
+
end
|
393
|
+
when :start_int
|
394
|
+
case ch
|
395
|
+
when DIGIT
|
396
|
+
@buf << ch
|
397
|
+
when POINT
|
398
|
+
@state = :start_float
|
399
|
+
@buf << ch
|
400
|
+
when EXPONENT
|
401
|
+
@state = :start_exponent
|
402
|
+
@buf << ch
|
403
|
+
else
|
404
|
+
@state = :end_value
|
405
|
+
notify_value(@buf.to_i)
|
406
|
+
@buf = ""
|
407
|
+
@pos -= 1
|
408
|
+
redo
|
409
|
+
end
|
410
|
+
when :start_true
|
411
|
+
keyword(TRUE_KEYWORD, true, TRUE_RE, ch)
|
412
|
+
when :start_false
|
413
|
+
keyword(FALSE_KEYWORD, false, FALSE_RE, ch)
|
414
|
+
when :start_null
|
415
|
+
keyword(NULL_KEYWORD, nil, NULL_RE, ch)
|
416
|
+
when :end_key
|
417
|
+
case ch
|
418
|
+
when COLON
|
419
|
+
@state = :key_sep
|
420
|
+
when WS
|
421
|
+
# ignore
|
422
|
+
else
|
423
|
+
error("Expected colon key separator")
|
424
|
+
end
|
425
|
+
when :key_sep
|
426
|
+
start_value(ch)
|
427
|
+
when :start_array
|
428
|
+
case ch
|
429
|
+
when RIGHT_BRACKET
|
430
|
+
end_container(:array)
|
431
|
+
when WS
|
432
|
+
# ignore
|
433
|
+
else
|
434
|
+
start_value(ch)
|
435
|
+
end
|
436
|
+
when :end_value
|
437
|
+
case ch
|
438
|
+
when COMMA
|
439
|
+
@state = :value_sep
|
440
|
+
when RIGHT_BRACKET
|
441
|
+
end_container(:array)
|
442
|
+
when RIGHT_BRACE
|
443
|
+
end_container(:object)
|
444
|
+
when WS
|
445
|
+
# ignore
|
446
|
+
else
|
447
|
+
error("Expected comma or object or array close")
|
448
|
+
end
|
449
|
+
when :value_sep
|
450
|
+
if @stack[-1] == :object
|
451
|
+
case ch
|
452
|
+
when QUOTE
|
453
|
+
@state = :start_string
|
454
|
+
@stack.push(:key)
|
455
|
+
when WS
|
456
|
+
# ignore
|
457
|
+
else
|
458
|
+
error("Expected object key start")
|
459
|
+
end
|
460
|
+
else
|
461
|
+
start_value(ch)
|
462
|
+
end
|
463
|
+
when :end_document
|
464
|
+
error("Unexpected data") unless ch =~ WS
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
private
|
470
|
+
|
471
|
+
def end_container(type)
|
472
|
+
@state = :end_value
|
473
|
+
if @stack.pop == type
|
474
|
+
send("notify_end_#{type}")
|
475
|
+
else
|
476
|
+
error("Expected end of #{type}")
|
477
|
+
end
|
478
|
+
if @stack.empty?
|
479
|
+
@state = :end_document
|
480
|
+
notify_end_document
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
def keyword(word, value, re, ch)
|
485
|
+
if ch =~ re
|
486
|
+
@buf << ch
|
487
|
+
else
|
488
|
+
error("Expected #{word} keyword")
|
489
|
+
end
|
490
|
+
if @buf.size == word.size
|
491
|
+
if @buf == word
|
492
|
+
@state = :end_value
|
493
|
+
@buf = ""
|
494
|
+
notify_value(value)
|
495
|
+
else
|
496
|
+
error("Expected #{word} keyword")
|
497
|
+
end
|
498
|
+
end
|
499
|
+
end
|
500
|
+
|
501
|
+
def start_value(ch)
|
502
|
+
case ch
|
503
|
+
when LEFT_BRACE
|
504
|
+
@state = :start_object
|
505
|
+
@stack.push(:object)
|
506
|
+
notify_start_object
|
507
|
+
when LEFT_BRACKET
|
508
|
+
@state = :start_array
|
509
|
+
@stack.push(:array)
|
510
|
+
notify_start_array
|
511
|
+
when QUOTE
|
512
|
+
@state = :start_string
|
513
|
+
@stack.push(:string)
|
514
|
+
when T
|
515
|
+
@state = :start_true
|
516
|
+
@buf << ch
|
517
|
+
when F
|
518
|
+
@state = :start_false
|
519
|
+
@buf << ch
|
520
|
+
when N
|
521
|
+
@state = :start_null
|
522
|
+
@buf << ch
|
523
|
+
when MINUS
|
524
|
+
@state = :start_negative_number
|
525
|
+
@buf << ch
|
526
|
+
when ZERO
|
527
|
+
@state = :start_zero
|
528
|
+
@buf << ch
|
529
|
+
when DIGIT_1_9
|
530
|
+
@state = :start_int
|
531
|
+
@buf << ch
|
532
|
+
when WS
|
533
|
+
# ignore
|
534
|
+
else
|
535
|
+
error("Expected value")
|
536
|
+
end
|
537
|
+
end
|
538
|
+
|
539
|
+
def error(message)
|
540
|
+
raise ParserError, "#{message}: char #{@pos}"
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
end
|
545
|
+
end
|