json-streamer 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +35 -4
- data/json-streamer.gemspec +1 -1
- data/lib/json/streamer.rb +47 -29
- data/lib/json/streamer/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 14eacb500abba80f385b5f334b44801820cadc61
|
4
|
+
data.tar.gz: 5e6c6fd650b13ca12a8f541d4a80f34bc73a7e1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c868d1324dd546e77eeb798df4716dc8cecea27f200643de75fe8009f4599c49c59c0a9744dbfb6f9c8e1ada67d52c46df3cc081eacb5faa8f2ddb32db0daa17
|
7
|
+
data.tar.gz: c762bbb47ed0f1bd0a6c8a3ea0a3b2265db28ab8f0676714a643a645e35804423a9358949994fb4c6973e7ae1a0acd33ac002f0c2bf75267854d0bb4c8527cd2
|
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Json::Streamer
|
2
2
|
|
3
|
-
Utility to support JSON streaming allowing you to get
|
4
|
-
Useful for e.g. streaming objects from a JSON array.
|
3
|
+
Utility to support JSON streaming allowing you to get data based on various criteria (key, nesting level, etc).
|
5
4
|
|
6
5
|
This gem will basically spare you the need to define you own callbacks when parsing JSON stream.
|
7
6
|
Streaming is useful for
|
@@ -9,7 +8,8 @@ Streaming is useful for
|
|
9
8
|
- files read in chunks (e.g. arriving over network)
|
10
9
|
- cases where you expect some issue with the file (e.g. losing connection to source, invalid data at some point) but would like to get as much data as possible anyway
|
11
10
|
|
12
|
-
|
11
|
+
Performance:
|
12
|
+
|
13
13
|
The gem uses JSON::Stream's events in the background. It was chosen because it's a pure Ruby parser.
|
14
14
|
A similar implementation can be done using the ~10 times faster Yajl::FFI gem that is dependent on the native YAJL library.
|
15
15
|
I did not measure the performance of my implementation on top of these libraries.
|
@@ -45,7 +45,9 @@ streamer = Json::Streamer::JsonStreamer.new(file_stream, 500)
|
|
45
45
|
```ruby
|
46
46
|
# Get objects based on nesting level
|
47
47
|
# First level will give you the full JSON, second level will give you objects within full JSON object, etc.
|
48
|
-
streamer.
|
48
|
+
streamer.get(nesting_level:2).each do |object|
|
49
|
+
p object
|
50
|
+
end
|
49
51
|
```
|
50
52
|
|
51
53
|
Getting second level objects on the JSON below will yield you 2 empty objects
|
@@ -55,6 +57,35 @@ Getting second level objects on the JSON below will yield you 2 empty objects
|
|
55
57
|
"object1": {},
|
56
58
|
"object2": {}
|
57
59
|
}
|
60
|
+
=>
|
61
|
+
{}
|
62
|
+
{}
|
63
|
+
```
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
# Get data based on key
|
67
|
+
streamer.get(key:'key').each do |object|
|
68
|
+
p object
|
69
|
+
end
|
70
|
+
```
|
71
|
+
|
72
|
+
```json
|
73
|
+
{
|
74
|
+
"obj1" : {
|
75
|
+
"key" : "value"
|
76
|
+
},
|
77
|
+
"key" : "value",
|
78
|
+
"obj2" : {
|
79
|
+
"key" : {
|
80
|
+
"key" : value"
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
=>
|
85
|
+
"value"
|
86
|
+
"value"
|
87
|
+
"value"
|
88
|
+
{"key"=>"value"}
|
58
89
|
```
|
59
90
|
|
60
91
|
Check the unit tests for more examples.
|
data/json-streamer.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Csaba Apagyi"]
|
10
10
|
spec.email = ["csapagyi@users.noreply.github.com"]
|
11
11
|
|
12
|
-
spec.summary = %q{Utility to support JSON streaming allowing you to get
|
12
|
+
spec.summary = %q{Utility to support JSON streaming allowing you to get data based on various criteria (key, nesting level, etc)}
|
13
13
|
spec.homepage = "https://github.com/csapagyi/json-streamer"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/lib/json/streamer.rb
CHANGED
@@ -4,13 +4,16 @@ require "json/stream"
|
|
4
4
|
module Json
|
5
5
|
module Streamer
|
6
6
|
class JsonStreamer
|
7
|
+
|
8
|
+
attr_reader :aggregator
|
9
|
+
|
7
10
|
def initialize(file_io, chunk_size = 1000)
|
8
11
|
@parser = JSON::Stream::Parser.new
|
9
12
|
|
10
13
|
@file_io = file_io
|
11
14
|
@chunk_size = chunk_size
|
12
15
|
|
13
|
-
@
|
16
|
+
@current_nesting_level = -1
|
14
17
|
@current_key = nil
|
15
18
|
@aggregator = {}
|
16
19
|
@temp_aggregator_keys = {}
|
@@ -18,36 +21,47 @@ module Json
|
|
18
21
|
@parser.start_object {start_object}
|
19
22
|
@parser.start_array {start_array}
|
20
23
|
@parser.key {|k| key(k)}
|
21
|
-
@parser.value {|v| value(v)}
|
22
24
|
|
23
25
|
end
|
24
26
|
|
25
|
-
def
|
26
|
-
@yield_nesting_level =
|
27
|
+
def get(nesting_level:-1, key:nil)
|
28
|
+
@yield_nesting_level = nesting_level
|
29
|
+
@wanted_key = key
|
30
|
+
|
31
|
+
@parser.value do |v|
|
32
|
+
if @aggregator[@current_nesting_level].kind_of? Array
|
33
|
+
@aggregator[@current_nesting_level] << v
|
34
|
+
else
|
35
|
+
@aggregator[@current_nesting_level][@current_key] = v
|
36
|
+
if yield_value?
|
37
|
+
yield v
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
27
41
|
|
28
42
|
# Callback containing yield has be defined in the method called via block
|
29
43
|
@parser.end_object do
|
30
|
-
if
|
31
|
-
yield @aggregator[@
|
44
|
+
if yield_object?
|
45
|
+
yield @aggregator[@current_nesting_level].clone
|
32
46
|
# TODO probably can be faster than reject!{true}
|
33
|
-
@aggregator[@
|
47
|
+
@aggregator[@current_nesting_level].reject!{true}
|
34
48
|
else
|
35
49
|
merge_up
|
36
50
|
end
|
37
51
|
|
38
|
-
@
|
52
|
+
@current_nesting_level -= 1
|
39
53
|
end
|
40
54
|
|
41
55
|
@parser.end_array do
|
42
|
-
if
|
43
|
-
yield @aggregator[@
|
56
|
+
if yield_object?
|
57
|
+
yield @aggregator[@current_nesting_level].clone
|
44
58
|
# TODO probably can be faster than reject!{true}
|
45
|
-
@aggregator[@
|
59
|
+
@aggregator[@current_nesting_level].reject!{true}
|
46
60
|
else
|
47
61
|
merge_up
|
48
62
|
end
|
49
63
|
|
50
|
-
@
|
64
|
+
@current_nesting_level -= 1
|
51
65
|
end
|
52
66
|
|
53
67
|
@file_io.each(@chunk_size) do |chunk|
|
@@ -55,16 +69,24 @@ module Json
|
|
55
69
|
end
|
56
70
|
end
|
57
71
|
|
72
|
+
def yield_object?
|
73
|
+
@current_nesting_level.eql? @yield_nesting_level or (not @wanted_key.nil? and @wanted_key == @temp_aggregator_keys[@current_nesting_level-1])
|
74
|
+
end
|
75
|
+
|
76
|
+
def yield_value?
|
77
|
+
@wanted_key == @current_key
|
78
|
+
end
|
79
|
+
|
58
80
|
def start_object
|
59
|
-
@temp_aggregator_keys[@
|
60
|
-
@
|
61
|
-
@aggregator[@
|
81
|
+
@temp_aggregator_keys[@current_nesting_level] = @current_key
|
82
|
+
@current_nesting_level += 1
|
83
|
+
@aggregator[@current_nesting_level] = {}
|
62
84
|
end
|
63
85
|
|
64
86
|
def start_array
|
65
|
-
@temp_aggregator_keys[@
|
66
|
-
@
|
67
|
-
@aggregator[@
|
87
|
+
@temp_aggregator_keys[@current_nesting_level] = @current_key
|
88
|
+
@current_nesting_level += 1
|
89
|
+
@aggregator[@current_nesting_level] = []
|
68
90
|
end
|
69
91
|
|
70
92
|
def key k
|
@@ -72,23 +94,19 @@ module Json
|
|
72
94
|
end
|
73
95
|
|
74
96
|
def value v
|
75
|
-
|
76
|
-
@aggregator[@object_nesting_level] << v
|
77
|
-
else
|
78
|
-
@aggregator[@object_nesting_level][@current_key] = v
|
79
|
-
end
|
97
|
+
|
80
98
|
end
|
81
99
|
|
82
100
|
def merge_up
|
83
|
-
return if @
|
84
|
-
|
85
|
-
if @aggregator[
|
86
|
-
@aggregator[
|
101
|
+
return if @current_nesting_level == 0
|
102
|
+
previous_nesting_level = @current_nesting_level - 1
|
103
|
+
if @aggregator[previous_nesting_level].kind_of? Array
|
104
|
+
@aggregator[previous_nesting_level] << @aggregator[@current_nesting_level]
|
87
105
|
else
|
88
|
-
@aggregator[
|
106
|
+
@aggregator[previous_nesting_level][@temp_aggregator_keys[previous_nesting_level]] = @aggregator[@current_nesting_level]
|
89
107
|
end
|
90
108
|
|
91
|
-
@aggregator.delete(@
|
109
|
+
@aggregator.delete(@current_nesting_level)
|
92
110
|
@aggregator
|
93
111
|
end
|
94
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-streamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Csaba Apagyi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -121,6 +121,6 @@ rubyforge_project:
|
|
121
121
|
rubygems_version: 2.5.1
|
122
122
|
signing_key:
|
123
123
|
specification_version: 4
|
124
|
-
summary: Utility to support JSON streaming allowing you to get
|
125
|
-
criteria
|
124
|
+
summary: Utility to support JSON streaming allowing you to get data based on various
|
125
|
+
criteria (key, nesting level, etc)
|
126
126
|
test_files: []
|