json-stream-path 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -1
- data/README.md +37 -3
- data/json-stream-path.gemspec +2 -1
- data/lib/json/stream/builder.rb +0 -6
- data/lib/json/stream/parser.rb +8 -23
- data/lib/json/stream/path/version.rb +1 -1
- metadata +3 -3
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,12 +1,25 @@
|
|
1
1
|
# Json::Stream::Path
|
2
2
|
|
3
|
-
|
3
|
+
Please update to 0.1.0 version. It's bug free.
|
4
|
+
|
5
|
+
It's a JSON Path like implementation and memory optimization in JSON Stream gem (https://github.com/dgraham/json-stream)
|
6
|
+
|
7
|
+
JSON Stream Path is well suit for parsing subset of JSON data from a huge JSON file.
|
8
|
+
|
9
|
+
Usual stream based parsers will reduce the memory only in parsing file. But they may still keeps the graph(a intermediate data structure to create hash) of whole JSON data in memory. Again the graph is converted into hash that again may consume memory. If only subset of JSON data have to be parsed from a huge JSON file, graph no need to be created for the whole JSON data and final hash also no need to contain whole JSON data.
|
10
|
+
|
11
|
+
JSON Stream Path address this problem. Since path to parse in JSON is supplied as the argument, it will consume memory to create graph only for the subset of huge JSON data. Same way, final out come (hash) is the subset of huge JSON data which will again consume memory for the whole subset of JSON data.
|
12
|
+
|
13
|
+
JSON Stream Path address this problem. Path to parse in JSON file is supplied as an argument. JSON Stream Path parses according to the path mentioned. So it consumes memory to create graph only for the subset of huge JSON data. Same way, final out come (hash) is the subset of huge JSON data which will again consume memory for the subset of huge JSON data.
|
14
|
+
|
15
|
+
Future releases will optimize memory and improve performance. Please keep update to the latest version.
|
16
|
+
|
4
17
|
|
5
18
|
## Installation
|
6
19
|
|
7
20
|
Add this line to your application's Gemfile:
|
8
21
|
|
9
|
-
gem 'json-stream-path'
|
22
|
+
gem 'json-stream-path', :require => 'json/stream'
|
10
23
|
|
11
24
|
And then execute:
|
12
25
|
|
@@ -18,7 +31,28 @@ Or install it yourself as:
|
|
18
31
|
|
19
32
|
## Usage
|
20
33
|
|
21
|
-
|
34
|
+
To parse subset of data from JSON file.
|
35
|
+
|
36
|
+
require 'json/stream'
|
37
|
+
parser = JSON::Stream::Parser.new
|
38
|
+
stream = File.open('path/to/json-file.json')
|
39
|
+
hash = parser.parse(stream, '/json/path')
|
40
|
+
|
41
|
+
|
42
|
+
To parse whole JSON file
|
43
|
+
|
44
|
+
require 'json/stream'
|
45
|
+
parser = JSON::Stream::Parser.new
|
46
|
+
stream = File.open('path/to/json-file.json')
|
47
|
+
hash = parser.parse(stream)
|
48
|
+
|
49
|
+
For the other basic usage refer https://github.com/dgraham/json-stream
|
50
|
+
|
51
|
+
Thanks to David Graham for his JSON Stream https://github.com/dgraham/json-stream
|
52
|
+
|
53
|
+
Future Enhancements:
|
54
|
+
Implement full stack JSON Path
|
55
|
+
Option to get hash or JSON
|
22
56
|
|
23
57
|
## Contributing
|
24
58
|
|
data/json-stream-path.gemspec
CHANGED
@@ -16,9 +16,10 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
-
spec.
|
19
|
+
spec.require_path = "lib"
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "pry"
|
24
|
+
spec.required_ruby_version = '>= 1.9.2'
|
24
25
|
end
|
data/lib/json/stream/builder.rb
CHANGED
@@ -29,17 +29,14 @@ module JSON
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def end_document
|
32
|
-
#puts "====EOD===== #{@stack.inspect} ========\n"
|
33
32
|
@result = @stack.pop.obj
|
34
33
|
end
|
35
34
|
|
36
35
|
def start_object
|
37
|
-
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
38
36
|
@stack.push(ObjectNode.new)
|
39
37
|
end
|
40
38
|
|
41
39
|
def end_object
|
42
|
-
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
43
40
|
unless @stack.size == 1
|
44
41
|
node = @stack.pop
|
45
42
|
@stack[-1] << node.obj
|
@@ -48,17 +45,14 @@ module JSON
|
|
48
45
|
alias :end_array :end_object
|
49
46
|
|
50
47
|
def start_array
|
51
|
-
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
52
48
|
@stack.push(ArrayNode.new)
|
53
49
|
end
|
54
50
|
|
55
51
|
def key(key)
|
56
|
-
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
57
52
|
@stack[-1] << key
|
58
53
|
end
|
59
54
|
|
60
55
|
def value(value)
|
61
|
-
#puts "BUILDER: ========= #{@stack.inspect} ========\n"
|
62
56
|
@stack[-1] << value
|
63
57
|
end
|
64
58
|
end
|
data/lib/json/stream/parser.rb
CHANGED
@@ -74,6 +74,7 @@ module JSON
|
|
74
74
|
@stack, @unicode, @buf, @pos = [], "", "", -1
|
75
75
|
@partial_stack = []
|
76
76
|
@jpath, @jpath_tree = nil, nil
|
77
|
+
@parsing_area = false
|
77
78
|
@stop_parsing = nil
|
78
79
|
instance_eval(&block) if block_given?
|
79
80
|
end
|
@@ -89,13 +90,13 @@ module JSON
|
|
89
90
|
@jpath_tree = JPathTree.new(jpath)
|
90
91
|
stream = json.is_a?(String) ? StringIO.new(json) : json
|
91
92
|
builder = Builder.new(self)
|
93
|
+
|
92
94
|
while (buf = stream.read(BUF_SIZE)) != nil
|
93
95
|
self << buf
|
94
96
|
end
|
95
97
|
|
96
|
-
raise ParserError, "unexpected eof" unless builder.result
|
97
|
-
result
|
98
|
-
(result && result.values.first) || nil
|
98
|
+
#raise ParserError, "unexpected eof" unless builder.result
|
99
|
+
builder.result && builder.result.values.first
|
99
100
|
ensure
|
100
101
|
stream.close unless stream.nil?
|
101
102
|
end
|
@@ -110,14 +111,8 @@ module JSON
|
|
110
111
|
define_method("notify_#{name}") do |*args|
|
111
112
|
|
112
113
|
@listeners[name].each do |block|
|
113
|
-
#puts "----------------------------------------------"
|
114
|
-
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
115
|
-
#puts "----------------------------------------------\n"
|
116
114
|
|
117
|
-
|
118
|
-
|
119
|
-
if (!@jpath) # If use not used jpath, it should parse whole file
|
120
|
-
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
115
|
+
if (!@jpath) # If not used jpath, it should parse whole file
|
121
116
|
block.call(*args)
|
122
117
|
else
|
123
118
|
|
@@ -132,12 +127,11 @@ module JSON
|
|
132
127
|
end
|
133
128
|
|
134
129
|
if (name == :end_document) # An :end_document call is required
|
135
|
-
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
136
130
|
block.call(*args)
|
131
|
+
return
|
137
132
|
end
|
138
133
|
|
139
134
|
if (@parsing_area)
|
140
|
-
#puts "#{name} ----- #{@parsing_area.inspect} ---- args #{args.inspect} --------- stack #{@stack.inspect} -------- State #{@state.inspect} --------- Partial Stack #{@partial_stack.inspect} ------ \n\n"
|
141
135
|
block.call(*args)
|
142
136
|
|
143
137
|
if (name == :key and @partial_stack.empty?) # Reached first key in the parsable JSON area
|
@@ -147,7 +141,7 @@ module JSON
|
|
147
141
|
|
148
142
|
poped_partial_stack_value = @partial_stack[-1]
|
149
143
|
|
150
|
-
if (
|
144
|
+
if (poped_partial_stack_value == :key || poped_partial_stack_value == :start_array || poped_partial_stack_value == :start_object)
|
151
145
|
if (name == :start_array)
|
152
146
|
@partial_stack << :start_array
|
153
147
|
elsif (name == :start_object)
|
@@ -156,10 +150,6 @@ module JSON
|
|
156
150
|
end
|
157
151
|
|
158
152
|
if (poped_partial_stack_value == :key)
|
159
|
-
#if (name == :start_array) # this commented code in all the elsif block moved up to reduce the code
|
160
|
-
# @partial_stack << :start_array
|
161
|
-
#elsif (name == :start_object)
|
162
|
-
# @partial_stack << :start_object
|
163
153
|
if (name == :value)
|
164
154
|
@parsing_area = false
|
165
155
|
@stop_parsing = true
|
@@ -196,15 +186,10 @@ module JSON
|
|
196
186
|
# generate callback events. This is well suited for an EventMachine
|
197
187
|
# receive_data loop.
|
198
188
|
def <<(data)
|
189
|
+
|
199
190
|
(@utf8 << data).each_char do |ch|
|
200
191
|
|
201
192
|
if (@stop_parsing)
|
202
|
-
#notify_end_object
|
203
|
-
#notify_end_document
|
204
|
-
#end_container(:object)
|
205
|
-
#puts "========== #{@stack.inspect} ========="
|
206
|
-
#print ch
|
207
|
-
#break
|
208
193
|
end
|
209
194
|
|
210
195
|
@pos += 1
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-stream-path
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -97,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
97
|
requirements:
|
98
98
|
- - ! '>='
|
99
99
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
100
|
+
version: 1.9.2
|
101
101
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
102
|
none: false
|
103
103
|
requirements:
|