json-emitter 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +111 -0
- data/lib/json-emitter/buffered_stream.rb +68 -0
- data/lib/json-emitter/emitter.rb +72 -0
- data/lib/json-emitter/stream.rb +55 -0
- data/lib/json-emitter/version.rb +4 -0
- data/lib/json-emitter.rb +100 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 82b399ea5929c610e5f113a35c72b9df9f6cf9957292dc8e2dde5837472484c3
|
4
|
+
data.tar.gz: 9925436be580fa8b772a18e4bad918ffcb3938f638cf0e633db4788c4e1df52c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 85048433a36d8d985d57099c2d0dacc0f5ddb31eb128ef8f030c8136224bd37d101db41ed14aff3f8a59b9c5958aa4e391b4b28598242ff1978d6c65d114f206
|
7
|
+
data.tar.gz: e058dd052d8604acbf8c8516a887dd1672a0018faf4d25c874feacf273655fbd83f10c587982fa028c98b16dd67bccd32d51421ba5a8bb49395096672df74026
|
data/README.md
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
## JsonEmitter
|
2
|
+
|
3
|
+
JsonEmitter is a library for efficiently generating very large bits of JSON in Ruby. Need to generate a JSON array of 10,000 database records without eating up all your RAM? No problem! Objects? Nested structures? JsonEmitter has you covered.
|
4
|
+
|
5
|
+
Use JsonEmitter in your Rack/Rails/Sinatra/Grape API to stream large JSON responses without worrying about RAM or HTTP timeouts. Use it to write large JSON objects to your filesystem, S3, or ~~a 3D printer~~ anywhere else!
|
6
|
+
|
7
|
+
**Stream a JSON array from ActiveRecord**
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
order_query = Order.limit(10_000).find_each(batch_size: 500)
|
11
|
+
stream = JsonEmitter.array(order_query) { |order|
|
12
|
+
{
|
13
|
+
number: order.id,
|
14
|
+
desc: order.description,
|
15
|
+
...
|
16
|
+
}
|
17
|
+
}
|
18
|
+
```
|
19
|
+
|
20
|
+
**Stream a JSON object**
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
order_query = Order.limit(10_000).find_each(batch_size: 500)
|
24
|
+
stream = JsonEmitter.object({
|
25
|
+
tuesday: false,
|
26
|
+
|
27
|
+
orders: order_query.lazy.map { |order|
|
28
|
+
{id: order.id, desc: order.description}
|
29
|
+
}
|
30
|
+
|
31
|
+
big_text_1: ->() {
|
32
|
+
load_tons_of_text
|
33
|
+
},
|
34
|
+
|
35
|
+
big_text_2: ->() {
|
36
|
+
load_tons_of_text
|
37
|
+
},
|
38
|
+
})
|
39
|
+
```
|
40
|
+
|
41
|
+
**Generate the JSON and put it somewhere**
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
# write to a file or any IO
|
45
|
+
File.open("/tmp/foo.json", "w+") { |file|
|
46
|
+
stream.write file
|
47
|
+
}
|
48
|
+
|
49
|
+
# get chunks and do something with them
|
50
|
+
stream.each { |json_chunk|
|
51
|
+
...
|
52
|
+
}
|
53
|
+
|
54
|
+
# this will buffer the JSON into roughly 8k chunks
|
55
|
+
stream.buffered(8).each { |json_8k_chunk|
|
56
|
+
...
|
57
|
+
}
|
58
|
+
```
|
59
|
+
|
60
|
+
# HTTP Chunked Transfer (a.k.a streaming)
|
61
|
+
|
62
|
+
In HTTP 1.0 the entire response is normally sent all at once. Usually this is fine, but it can cause problems when very large responses must be generated and sent. These problems usually manifest as spikes in memory usage and/or responses that take so long to send that the client (or an in-between proxy) times out the request.
|
63
|
+
|
64
|
+
The solution to this in HTTP 1.1 is chunked transfer encoding. The response body can be split up and sent in a series of separate "chunks" for the client to receive and automatically put back together. Ruby's Rack specification supports chunking, as do most frameworks based on it (e.g. Rails, Sinatra, Grape, etc).
|
65
|
+
|
66
|
+
The following examples all show the same streaming API in various Rack-based frameworks. Without streaming, the examples could eat up tons of memory, take too long, and time out on the client. With streaming, the following improvements are possible without your client-side code needing any changes:
|
67
|
+
|
68
|
+
1. Only 500 orders will ever be in memory at once.
|
69
|
+
2. Only one `ApiV1::Entities::Order` will ever be in memory at once.
|
70
|
+
3. Only 16kb (roughly) of JSON will ever be in memory at once.
|
71
|
+
5. That 16kb of JSON will be sent to the client while the next 16kb of JSON is generating.
|
72
|
+
|
73
|
+
**IMPORTANT** Not every Ruby application server supports HTTP chunking. Puma definitely supports it and WEBrick definitely does not. Phusion Passenger claims to but I have not tried it.
|
74
|
+
|
75
|
+
## Rails
|
76
|
+
|
77
|
+
TODO
|
78
|
+
|
79
|
+
## Sinatra
|
80
|
+
|
81
|
+
TODO
|
82
|
+
|
83
|
+
## Grape
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
get :orders do
|
87
|
+
enumerator = Order.
|
88
|
+
where("created_at >= ?", 1.year.ago).
|
89
|
+
find_each(batch_size: 500)
|
90
|
+
|
91
|
+
stream JsonEmitter.array(enumerator) { |order|
|
92
|
+
ApiV1::Entities::Order.new(order)
|
93
|
+
}.buffered(16)
|
94
|
+
end
|
95
|
+
```
|
96
|
+
|
97
|
+
## Rack
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
app = ->(env) {
|
101
|
+
enumerator = Order.
|
102
|
+
where("created_at >= ?", 1.year.ago).
|
103
|
+
find_each(batch_size: 500)
|
104
|
+
|
105
|
+
stream = JsonEmitter.array(enumerator) { |order|
|
106
|
+
order.to_h
|
107
|
+
}.buffered(16)
|
108
|
+
|
109
|
+
[200, {"Content-Type" => "application/json"}, stream]
|
110
|
+
}
|
111
|
+
```
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module JsonEmitter
|
2
|
+
#
|
3
|
+
# Represents a stream of JSON to be generated and yielded. It can be treated like any Enumerable.
|
4
|
+
# Unlike JsonEmitter::Stream, the yielded output is buffered into (roughly) equally sized chunks.
|
5
|
+
#
|
6
|
+
class BufferedStream
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
#
|
10
|
+
# Initialize a new buffered stream.
|
11
|
+
#
|
12
|
+
# @param enum [Enumerator] An enumerator that yields pieces of JSON.
|
13
|
+
# @param buffer_size [Integer] The buffer size in kb. This is a size *hint*, not a hard limit.
|
14
|
+
# @param unit [Symbol] :bytes | :kb (default) | :mb
|
15
|
+
#
|
16
|
+
def initialize(enum, buffer_size, unit: :kb)
|
17
|
+
@enum = enum
|
18
|
+
@buffer_size = case unit
|
19
|
+
when :bytes then buffer_size
|
20
|
+
when :kb then buffer_size * 1024
|
21
|
+
when :mb then buffer_size * 1024 * 1024
|
22
|
+
else raise ArgumentError, "unknown buffer size unit ':#{unit}'"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Write the stream to the specified IO object.
|
28
|
+
#
|
29
|
+
# @param io [IO]
|
30
|
+
#
|
31
|
+
def write(io)
|
32
|
+
buffer.each { |str|
|
33
|
+
io << str
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# If a block is given, each chunk of JSON is yielded to it. If not block is given, an Enumerator is returned.
|
39
|
+
#
|
40
|
+
# @return [Enumerator]
|
41
|
+
#
|
42
|
+
def each
|
43
|
+
if block_given?
|
44
|
+
buffer.each { |str|
|
45
|
+
yield str
|
46
|
+
}
|
47
|
+
else
|
48
|
+
buffer
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def buffer
|
55
|
+
Enumerator.new { |y|
|
56
|
+
buff = ""
|
57
|
+
@enum.each { |str|
|
58
|
+
buff << str
|
59
|
+
if buff.bytesize >= @buffer_size
|
60
|
+
y << buff
|
61
|
+
buff = ""
|
62
|
+
end
|
63
|
+
}
|
64
|
+
y << buff unless buff.empty?
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module JsonEmitter
|
2
|
+
#
|
3
|
+
# Builds Enumerators that yield JSON from Ruby Arrays or Hashes.
|
4
|
+
#
|
5
|
+
class Emitter
|
6
|
+
#
|
7
|
+
# Generates an Enumerator that will stream out a JSON array.
|
8
|
+
#
|
9
|
+
# @param enum [Enumerable] Something that can be enumerated over, like an Array or Enumerator. Each element should be something that can be rendered as JSON (e.g. a number, string, boolean, Array, or Hash).
|
10
|
+
# @yield If a block is given, it will be yielded each value in the array. The return value from the block will be converted to JSON instead of the original value.
|
11
|
+
# @return [Enumerator]
|
12
|
+
#
|
13
|
+
def array(enum, &mapper)
|
14
|
+
Enumerator.new { |y|
|
15
|
+
y << "[".freeze
|
16
|
+
|
17
|
+
first = true
|
18
|
+
enum.each { |val|
|
19
|
+
y << ",".freeze unless first
|
20
|
+
first = false if first
|
21
|
+
|
22
|
+
mapped_val = mapper ? mapper.call(val) : val
|
23
|
+
json_values(mapped_val).each { |json_val|
|
24
|
+
y << json_val
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
y << "]".freeze
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Generates an Enumerator that will stream out a JSON object.
|
34
|
+
#
|
35
|
+
# @param hash [Hash] Keys should be Strings or Symbols and values should be any JSON-compatible value like a number, string, boolean, Array, or Hash.
|
36
|
+
# @return [Enumerator]
|
37
|
+
#
|
38
|
+
def object(hash)
|
39
|
+
Enumerator.new { |y|
|
40
|
+
y << "{".freeze
|
41
|
+
|
42
|
+
first = true
|
43
|
+
hash.each { |key, val|
|
44
|
+
y << ",".freeze unless first
|
45
|
+
first = false if first
|
46
|
+
|
47
|
+
json_key = MultiJson.dump(key.to_s)
|
48
|
+
y << "#{json_key}:"
|
49
|
+
|
50
|
+
json_values(val).each { |json_val|
|
51
|
+
y << json_val
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
y << "}".freeze
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def json_values(x)
|
62
|
+
case x
|
63
|
+
when Hash then object x
|
64
|
+
when Enumerable then array x
|
65
|
+
when Proc
|
66
|
+
y = x.call
|
67
|
+
json_values y
|
68
|
+
else [MultiJson.dump(x)]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module JsonEmitter
|
2
|
+
#
|
3
|
+
# Represents a stream of JSON to be generated and yielded. It can be treated like any Enumerable.
|
4
|
+
# The size of the yielded strings can vary from 1 to 1000's. If that's a problem, call JsonEmitter::Stream.buffer.
|
5
|
+
#
|
6
|
+
class Stream
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
#
|
10
|
+
# Initialize a new stream.
|
11
|
+
#
|
12
|
+
# @param enum [Enumerator] An enumerator that yields pieces of JSON.
|
13
|
+
#
|
14
|
+
def initialize(enum)
|
15
|
+
@enum = enum
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Returns a new stream that will buffer the output. You can perform the same "write" or "each" operations
|
20
|
+
# on the new stream, but the chunks of output will be (roughly) uniform in size.
|
21
|
+
#
|
22
|
+
# @param buffer_size [Integer] The buffer size in kb. This is a size *hint*, not a hard limit.
|
23
|
+
# @return [JsonEmitter::BufferedStream]
|
24
|
+
#
|
25
|
+
def buffered(buffer_size = 16, unit: :kb)
|
26
|
+
BufferedStream.new(@enum, buffer_size, unit: unit)
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Write the stream to the specified IO object.
|
31
|
+
#
|
32
|
+
# @param io [IO]
|
33
|
+
#
|
34
|
+
def write(io)
|
35
|
+
each { |str|
|
36
|
+
io << str
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# If a block is given, each chunk of JSON is yielded to it. If not block is given, an Enumerator is returned.
|
42
|
+
#
|
43
|
+
# @return [Enumerator]
|
44
|
+
#
|
45
|
+
def each
|
46
|
+
if block_given?
|
47
|
+
@enum.each { |str|
|
48
|
+
yield str
|
49
|
+
}
|
50
|
+
else
|
51
|
+
@enum
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/json-emitter.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
3
|
+
require 'json-emitter/version'
|
4
|
+
require 'json-emitter/emitter'
|
5
|
+
require 'json-emitter/stream'
|
6
|
+
require 'json-emitter/buffered_stream'
|
7
|
+
|
8
|
+
#
|
9
|
+
# Efficiently generate very large strings of JSON from Ruby objects.
|
10
|
+
#
|
11
|
+
# Complex values like arrays and objects may be as large and nested as you need without compromising efficiency.
|
12
|
+
# Primitive values will be serialized to JSON using MultiJson.dump. MultiJson finds and uses the most efficient
|
13
|
+
# JSON generator you have on your system (e.g. oj) and falls back to the stdlib JSON library.
|
14
|
+
#
|
15
|
+
# The emitter can be used to output to anything (files, network sockets, etc), and the output can optionally be
|
16
|
+
# buffered. This works very well with so-called "HTTP chunked responses" in Rack/Rails/Sinatra/Grape/etc.
|
17
|
+
#
|
18
|
+
module JsonEmitter
|
19
|
+
#
|
20
|
+
# Generates an stream that will output a JSON array. The input can be any Enumerable, such as an Array or an Enumerator.
|
21
|
+
#
|
22
|
+
# The following example uses minumum memory to genrate a very large JSON array string from an ActiveRecord query.
|
23
|
+
# Only 500 Order records will ever by in memory at once. The JSON will be generated in small chunks so that
|
24
|
+
# the whole string is never in all memory at once.
|
25
|
+
#
|
26
|
+
# enumerator = Order.limit(10_000).find_each(batch_size: 500)
|
27
|
+
# stream = JsonEmitter.array(enumerator) { |order|
|
28
|
+
# {
|
29
|
+
# number: order.id,
|
30
|
+
# desc: order.description,
|
31
|
+
# ...
|
32
|
+
# }
|
33
|
+
# }
|
34
|
+
#
|
35
|
+
# # generate the JSON in chunks and write them to STDOUT
|
36
|
+
# stream.write($stdout)
|
37
|
+
#
|
38
|
+
# # generate chunks of JSON and do something with them
|
39
|
+
# stream.each do |json_chunk|
|
40
|
+
# # do something with each json chunk
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # if you need the outputted chunks to be (roughly) equal in size, call "buffered"
|
44
|
+
# # and pass in the buffer size in kb.
|
45
|
+
# buffered_stream = stream.buffered(16)
|
46
|
+
#
|
47
|
+
# @param enum [Enumerable] Something that can be enumerated over, like an Array or Enumerator. Each element should be something that can be rendered as JSON (e.g. a number, string, boolean, Array, or Hash).
|
48
|
+
# @yield If a block is given, it will be yielded each value in the array. The return value from the block will be converted to JSON instead of the original value.
|
49
|
+
# @return [JsonEmitter::Stream]
|
50
|
+
#
|
51
|
+
def self.array(enum, &mapper)
|
52
|
+
emitter = Emitter.new.array(enum, &mapper)
|
53
|
+
Stream.new(emitter)
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Generates an stream that will output a JSON object.
|
58
|
+
#
|
59
|
+
# If some of the values will be large arrays, use Enumerators or lazy Enumerators to build each element on demand
|
60
|
+
# (to potentially save lots of RAM).
|
61
|
+
#
|
62
|
+
# You can also use Procs to generate large arrays, objects, blocks of text, etc. They'll only be used one at
|
63
|
+
# a time, which can potentially save lots of RAM.
|
64
|
+
#
|
65
|
+
# The following example generates a very large JSON object string from several components.
|
66
|
+
#
|
67
|
+
# stream = JsonEmitter.object({
|
68
|
+
# time: Time.now.iso8601,
|
69
|
+
# is_true: true,
|
70
|
+
# orders: Order.limit(10_000).find_each(batch_size: 500).lazy.map { |order|
|
71
|
+
# {number: order.id, desc: order.description}
|
72
|
+
# },
|
73
|
+
# high_mem_thing_1: ->() {
|
74
|
+
# get_high_mem_thing1()
|
75
|
+
# },
|
76
|
+
# high_mem_thing_2: ->() {
|
77
|
+
# get_high_mem_thing2()
|
78
|
+
# },
|
79
|
+
# })
|
80
|
+
#
|
81
|
+
# # generate the JSON in chunks and write them to STDOUT
|
82
|
+
# stream.write($stdout)
|
83
|
+
#
|
84
|
+
# # generate chunks of JSON and do something with them
|
85
|
+
# stream.each do |json_chunk|
|
86
|
+
# # do something with each json chunk
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# # if you need the outputted chunks to be (roughly) equal in size, call "buffered"
|
90
|
+
# # and pass in the buffer size in kb.
|
91
|
+
# buffered_stream = stream.buffered(16)
|
92
|
+
#
|
93
|
+
# @param hash [Hash] Keys should be Strings or Symbols and values should be any JSON-compatible value like a number, string, boolean, Array, or Hash.
|
94
|
+
# @return [JsonEmitter::Stream]
|
95
|
+
#
|
96
|
+
def self.object(hash)
|
97
|
+
emitter = Emitter.new.object(hash)
|
98
|
+
Stream.new(emitter)
|
99
|
+
end
|
100
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: json-emitter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jordan Hollinger
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-01-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: multi_json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
description: Generates and outputs JSON in well-sized chunks
|
28
|
+
email: jordan.hollinger@gmail.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- README.md
|
34
|
+
- lib/json-emitter.rb
|
35
|
+
- lib/json-emitter/buffered_stream.rb
|
36
|
+
- lib/json-emitter/emitter.rb
|
37
|
+
- lib/json-emitter/stream.rb
|
38
|
+
- lib/json-emitter/version.rb
|
39
|
+
homepage: https://jhollinger.github.io/json-emitter/
|
40
|
+
licenses:
|
41
|
+
- MIT
|
42
|
+
metadata: {}
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 2.3.0
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 2.7.6
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Efficiently generate tons of JSON
|
63
|
+
test_files: []
|