json-streamer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a0dc6d53af41106de2afd3c73edbe29626230388
4
- data.tar.gz: 93bebbd4e33fb2eb273f849a4ffb2a1d75d9808e
3
+ metadata.gz: 14eacb500abba80f385b5f334b44801820cadc61
4
+ data.tar.gz: 5e6c6fd650b13ca12a8f541d4a80f34bc73a7e1d
5
5
  SHA512:
6
- metadata.gz: a2d38100ad39aee46704ca2eb171609eea8b126c0a0f7b2cbf694b35220c479954e8a80dbba71a3136a0657e2119e0d4fe18df12975c8daa02d5ff6bcb43f8dc
7
- data.tar.gz: 24b2c90609308bab883997514fc3a7ed74b1ac980a6c5ca536bc4c573bf4c3781047693ff347b63c2b7bf296549bb7deec88362e07bda27a67da56693cc61ca8
6
+ metadata.gz: c868d1324dd546e77eeb798df4716dc8cecea27f200643de75fe8009f4599c49c59c0a9744dbfb6f9c8e1ada67d52c46df3cc081eacb5faa8f2ddb32db0daa17
7
+ data.tar.gz: c762bbb47ed0f1bd0a6c8a3ea0a3b2265db28ab8f0676714a643a645e35804423a9358949994fb4c6973e7ae1a0acd33ac002f0c2bf75267854d0bb4c8527cd2
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Json::Streamer
2
2
 
3
- Utility to support JSON streaming allowing you to get objects based on various criteria.
4
- Useful for e.g. streaming objects from a JSON array.
3
+ Utility to support JSON streaming allowing you to get data based on various criteria (key, nesting level, etc).
5
4
 
6
5
  This gem will basically spare you the need to define you own callbacks when parsing JSON stream.
7
6
  Streaming is useful for
@@ -9,7 +8,8 @@ Streaming is useful for
9
8
  - files read in chunks (e.g. arriving over network)
10
9
  - cases where you expect some issue with the file (e.g. losing connection to source, invalid data at some point) but would like to get as much data as possible anyway
11
10
 
12
- Regarding performance:
11
+ Performance:
12
+
13
13
  The gem uses JSON::Stream's events in the background. It was chosen because it's a pure Ruby parser.
14
14
  A similar implementation can be done using the ~10 times faster Yajl::FFI gem that is dependent on the native YAJL library.
15
15
  I did not measure the performance of my implementation on top of these libraries.
@@ -45,7 +45,9 @@ streamer = Json::Streamer::JsonStreamer.new(file_stream, 500)
45
45
  ```ruby
46
46
  # Get objects based on nesting level
47
47
  # First level will give you the full JSON, second level will give you objects within full JSON object, etc.
48
- streamer.get_objects_from_level(2)
48
+ streamer.get(nesting_level:2).each do |object|
49
+ p object
50
+ end
49
51
  ```
50
52
 
51
53
  Getting second level objects on the JSON below will yield you 2 empty objects
@@ -55,6 +57,35 @@ Getting second level objects on the JSON below will yield you 2 empty objects
55
57
  "object1": {},
56
58
  "object2": {}
57
59
  }
60
+ =>
61
+ {}
62
+ {}
63
+ ```
64
+
65
+ ```ruby
66
+ # Get data based on key
67
+ streamer.get(key:'key').each do |object|
68
+ p object
69
+ end
70
+ ```
71
+
72
+ ```json
73
+ {
74
+ "obj1" : {
75
+ "key" : "value"
76
+ },
77
+ "key" : "value",
78
+ "obj2" : {
79
+ "key" : {
80
+ "key" : value"
81
+ }
82
+ }
83
+ }
84
+ =>
85
+ "value"
86
+ "value"
87
+ "value"
88
+ {"key"=>"value"}
58
89
  ```
59
90
 
60
91
  Check the unit tests for more examples.
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Csaba Apagyi"]
10
10
  spec.email = ["csapagyi@users.noreply.github.com"]
11
11
 
12
- spec.summary = %q{Utility to support JSON streaming allowing you to get objects based on various criteria. Useful for e.g. streaming objects from a JSON array.}
12
+ spec.summary = %q{Utility to support JSON streaming allowing you to get data based on various criteria (key, nesting level, etc)}
13
13
  spec.homepage = "https://github.com/csapagyi/json-streamer"
14
14
  spec.license = "MIT"
15
15
 
@@ -4,13 +4,16 @@ require "json/stream"
4
4
  module Json
5
5
  module Streamer
6
6
  class JsonStreamer
7
+
8
+ attr_reader :aggregator
9
+
7
10
  def initialize(file_io, chunk_size = 1000)
8
11
  @parser = JSON::Stream::Parser.new
9
12
 
10
13
  @file_io = file_io
11
14
  @chunk_size = chunk_size
12
15
 
13
- @object_nesting_level = 0
16
+ @current_nesting_level = -1
14
17
  @current_key = nil
15
18
  @aggregator = {}
16
19
  @temp_aggregator_keys = {}
@@ -18,36 +21,47 @@ module Json
18
21
  @parser.start_object {start_object}
19
22
  @parser.start_array {start_array}
20
23
  @parser.key {|k| key(k)}
21
- @parser.value {|v| value(v)}
22
24
 
23
25
  end
24
26
 
25
- def get_objects_from_level(yield_nesting_level)
26
- @yield_nesting_level = yield_nesting_level
27
+ def get(nesting_level:-1, key:nil)
28
+ @yield_nesting_level = nesting_level
29
+ @wanted_key = key
30
+
31
+ @parser.value do |v|
32
+ if @aggregator[@current_nesting_level].kind_of? Array
33
+ @aggregator[@current_nesting_level] << v
34
+ else
35
+ @aggregator[@current_nesting_level][@current_key] = v
36
+ if yield_value?
37
+ yield v
38
+ end
39
+ end
40
+ end
27
41
 
28
42
  # Callback containing yield has be defined in the method called via block
29
43
  @parser.end_object do
30
- if @object_nesting_level.eql? @yield_nesting_level
31
- yield @aggregator[@object_nesting_level].clone
44
+ if yield_object?
45
+ yield @aggregator[@current_nesting_level].clone
32
46
  # TODO probably can be faster than reject!{true}
33
- @aggregator[@object_nesting_level].reject!{true}
47
+ @aggregator[@current_nesting_level].reject!{true}
34
48
  else
35
49
  merge_up
36
50
  end
37
51
 
38
- @object_nesting_level -= 1
52
+ @current_nesting_level -= 1
39
53
  end
40
54
 
41
55
  @parser.end_array do
42
- if @object_nesting_level.eql? @yield_nesting_level
43
- yield @aggregator[@object_nesting_level].clone
56
+ if yield_object?
57
+ yield @aggregator[@current_nesting_level].clone
44
58
  # TODO probably can be faster than reject!{true}
45
- @aggregator[@object_nesting_level].reject!{true}
59
+ @aggregator[@current_nesting_level].reject!{true}
46
60
  else
47
61
  merge_up
48
62
  end
49
63
 
50
- @object_nesting_level -= 1
64
+ @current_nesting_level -= 1
51
65
  end
52
66
 
53
67
  @file_io.each(@chunk_size) do |chunk|
@@ -55,16 +69,24 @@ module Json
55
69
  end
56
70
  end
57
71
 
72
+ def yield_object?
73
+ @current_nesting_level.eql? @yield_nesting_level or (not @wanted_key.nil? and @wanted_key == @temp_aggregator_keys[@current_nesting_level-1])
74
+ end
75
+
76
+ def yield_value?
77
+ @wanted_key == @current_key
78
+ end
79
+
58
80
  def start_object
59
- @temp_aggregator_keys[@object_nesting_level] = @current_key
60
- @object_nesting_level += 1
61
- @aggregator[@object_nesting_level] = {}
81
+ @temp_aggregator_keys[@current_nesting_level] = @current_key
82
+ @current_nesting_level += 1
83
+ @aggregator[@current_nesting_level] = {}
62
84
  end
63
85
 
64
86
  def start_array
65
- @temp_aggregator_keys[@object_nesting_level] = @current_key
66
- @object_nesting_level += 1
67
- @aggregator[@object_nesting_level] = []
87
+ @temp_aggregator_keys[@current_nesting_level] = @current_key
88
+ @current_nesting_level += 1
89
+ @aggregator[@current_nesting_level] = []
68
90
  end
69
91
 
70
92
  def key k
@@ -72,23 +94,19 @@ module Json
72
94
  end
73
95
 
74
96
  def value v
75
- if @aggregator[@object_nesting_level].kind_of? Array
76
- @aggregator[@object_nesting_level] << v
77
- else
78
- @aggregator[@object_nesting_level][@current_key] = v
79
- end
97
+
80
98
  end
81
99
 
82
100
  def merge_up
83
- return if @object_nesting_level == 1
84
- previous_object_nesting_level = @object_nesting_level - 1
85
- if @aggregator[previous_object_nesting_level].kind_of? Array
86
- @aggregator[previous_object_nesting_level] << @aggregator[@object_nesting_level]
101
+ return if @current_nesting_level == 0
102
+ previous_nesting_level = @current_nesting_level - 1
103
+ if @aggregator[previous_nesting_level].kind_of? Array
104
+ @aggregator[previous_nesting_level] << @aggregator[@current_nesting_level]
87
105
  else
88
- @aggregator[previous_object_nesting_level][@temp_aggregator_keys[previous_object_nesting_level]] = @aggregator[@object_nesting_level]
106
+ @aggregator[previous_nesting_level][@temp_aggregator_keys[previous_nesting_level]] = @aggregator[@current_nesting_level]
89
107
  end
90
108
 
91
- @aggregator.delete(@object_nesting_level)
109
+ @aggregator.delete(@current_nesting_level)
92
110
  @aggregator
93
111
  end
94
112
  end
@@ -1,5 +1,5 @@
1
1
  module Json
2
2
  module Streamer
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-streamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Csaba Apagyi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-28 00:00:00.000000000 Z
11
+ date: 2016-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -121,6 +121,6 @@ rubyforge_project:
121
121
  rubygems_version: 2.5.1
122
122
  signing_key:
123
123
  specification_version: 4
124
- summary: Utility to support JSON streaming allowing you to get objects based on various
125
- criteria. Useful for e.g. streaming objects from a JSON array.
124
+ summary: Utility to support JSON streaming allowing you to get data based on various
125
+ criteria (key, nesting level, etc)
126
126
  test_files: []