mongo_batch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +102 -0
- data/lib/mongo_batch.rb +43 -0
- data/spec/factories/posts.rb +5 -0
- data/spec/mongo_batch_spec.rb +126 -0
- data/spec/spec_helper.rb +18 -0
- metadata +139 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9343db89a622725ac3532e5e7b16ff128b5d04b2
|
4
|
+
data.tar.gz: 5a2a6233dbc88941e86f53659020a8d4d1d4d516
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7fc6a20a065fe187c8980584c50dcaed5597b7ffae396c6c3d091249e7fe5ee6a38f7537005a8b0044c247f25477c996f9cc0f5b4543f70873c75d08c1d66b63
|
7
|
+
data.tar.gz: 17ed373f8e1f0cf2cddf265e3604df9ea42c207da69169346c2eefed1e30781a7140278271a0585e04360c6acb7e1cef7d5173a4228676056ce55118bcc84de1
|
data/README.md
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# MongoBatch [](https://travis-ci.org/altmetric/mongo_batch)
|
2
|
+
|
3
|
+
A Ruby library to run Mongoid queries on large collections in batches.
|
4
|
+
|
5
|
+
**Supported Ruby versions:** 2.1, 2.2
|
6
|
+
|
7
|
+
**Supported Mongoid versions:** 4.0, 5.0
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```
|
12
|
+
gem install mongo_batch -v '~> 0.1'
|
13
|
+
```
|
14
|
+
|
15
|
+
Or, in your `Gemfile`:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'mongo_batch', '~> 0.1'
|
19
|
+
```
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
Extend your Mongoid models with `MongoBatch` to be able to call
|
24
|
+
`find_in_batches` on your models. The method will yield each batch of
|
25
|
+
records to the given block.
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'mongo_batch'
|
29
|
+
|
30
|
+
class Post
|
31
|
+
extend MongoBatch
|
32
|
+
end
|
33
|
+
|
34
|
+
Post.find_in_batches do |batch|
|
35
|
+
batch.each do |post|
|
36
|
+
post.update(body: 'Hello world!')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
If you do not pass a block to `find_in_batches`, the method will
|
42
|
+
return an [Enumerator](http://ruby-doc.org/core-2.2.2/Enumerator.html).
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
Post.find_in_batches.with_index.each do |batch, index|
|
46
|
+
batch.each do |post|
|
47
|
+
post.update(body: "Hello world! #{index}")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
```
|
51
|
+
|
52
|
+
The default batch size is 1,000 records, but `find_in_batches` accepts
|
53
|
+
an option to configure a different batch size, as well as options to
|
54
|
+
limit the records to process, sorting criteria and an initial offset.
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
Post
|
58
|
+
.find_in_batches(batch_size: 500, to: 2_000, offset: 100, order_by: { _id: :desc })
|
59
|
+
.each do |batch|
|
60
|
+
batch.each do |post|
|
61
|
+
post.update(body: "Hola mundo!")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
```
|
66
|
+
|
67
|
+
If you have more complex queries or prefer not to extend your models
|
68
|
+
with `MongoBatch`,
|
69
|
+
you can use `MongoBatch.in_batches` and supply the query you want to batch.
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
MongoBatch
|
73
|
+
.in_batches(Post.where(:body.exists => true).no_timeout)
|
74
|
+
.each do |batch|
|
75
|
+
batch.each do |post|
|
76
|
+
post.update(body: 'Hello world!')
|
77
|
+
end
|
78
|
+
end
|
79
|
+
```
|
80
|
+
|
81
|
+
`MongoBatch.in_batches` also accepts values to configure the batch size, limit of
|
82
|
+
records to process, sorting criteria or an initial offset.
|
83
|
+
|
84
|
+
```ruby
|
85
|
+
MongoBatch
|
86
|
+
.in_batches(Post.where(:body.exists => true).no_timeout,
|
87
|
+
to: 2_000, offset: 100, order_by: { _id::desc })
|
88
|
+
.each do |batch|
|
89
|
+
batch.each do |post|
|
90
|
+
post.update(body: 'Hi mum!')
|
91
|
+
end
|
92
|
+
end
|
93
|
+
```
|
94
|
+
|
95
|
+
## License
|
96
|
+
|
97
|
+
Copyright © 2015 Altmetric LLP
|
98
|
+
|
99
|
+
Distributed under the MIT License.
|
100
|
+
|
101
|
+
[URI]: http://ruby-doc.org/stdlib/libdoc/uri/rdoc/URI.html
|
102
|
+
|
data/lib/mongo_batch.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module MongoBatch
|
2
|
+
class Batcher
|
3
|
+
attr_reader :query, :batch_size, :to, :offset, :order_by
|
4
|
+
|
5
|
+
def initialize(query, options = {})
|
6
|
+
@query = query
|
7
|
+
@batch_size = options.fetch(:batch_size) { 1_000 }
|
8
|
+
@to = options.fetch(:to) { query.count }
|
9
|
+
@offset = options.fetch(:offset) { 0 }
|
10
|
+
@order_by = options.fetch(:order_by) { { _id: :asc } }
|
11
|
+
end
|
12
|
+
|
13
|
+
def batches
|
14
|
+
Enumerator.new(to) do |yielder|
|
15
|
+
processed_so_far = offset
|
16
|
+
|
17
|
+
offset.step(by: batch_size, to: to - batch_size).each do |offset|
|
18
|
+
yielder << query.order_by(order_by).limit(batch_size).skip(offset)
|
19
|
+
processed_so_far += batch_size
|
20
|
+
end
|
21
|
+
|
22
|
+
if processed_so_far < to
|
23
|
+
last_limit = to - processed_so_far
|
24
|
+
yielder << query.order(order_by).limit(last_limit).skip(processed_so_far)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.in_batches(query, options = {})
|
31
|
+
Batcher.new(query, options).batches
|
32
|
+
end
|
33
|
+
|
34
|
+
def find_in_batches(options = {}, &block)
|
35
|
+
batcher = Batcher.new(self, options)
|
36
|
+
|
37
|
+
if block
|
38
|
+
batcher.batches.each(&block)
|
39
|
+
else
|
40
|
+
batcher.batches
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mongo_batch'
|
3
|
+
|
4
|
+
describe MongoBatch do
|
5
|
+
class Post
|
6
|
+
include Mongoid::Document
|
7
|
+
include Mongoid::Timestamps
|
8
|
+
extend MongoBatch
|
9
|
+
|
10
|
+
field :body, type: String
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#find_in_batches' do
|
14
|
+
it 'yields to the given block for each batch of records' do
|
15
|
+
posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
|
16
|
+
|
17
|
+
expect do |block|
|
18
|
+
Post.find_in_batches(batch_size: 2).each(&block)
|
19
|
+
end.to yield_successive_args(posts[(0..1)],
|
20
|
+
posts[(2..3)],
|
21
|
+
posts[(4..5)],
|
22
|
+
posts[(6..7)],
|
23
|
+
posts[(8..9)])
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns an enumerator if a block is not given' do
|
27
|
+
expect(Post.find_in_batches(batch_size: 2)).to be_an(Enumerator)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe '.in_batches' do
|
32
|
+
it 'returns an enumerator if a block is not given' do
|
33
|
+
expect(described_class.in_batches(batch_size: 2)).to be_an(Enumerator)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'starts from the first record if "offset" is not specified' do
|
37
|
+
posts = FactoryGirl.create_list(:post, 4).sort_by(&:id)
|
38
|
+
|
39
|
+
batches = described_class.in_batches(Post).map(&:to_a)
|
40
|
+
|
41
|
+
expect(batches.first).to start_with(posts[0])
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'skips the number of records indicated in "offset"' do
|
45
|
+
posts = FactoryGirl.create_list(:post, 4)
|
46
|
+
|
47
|
+
posts_in_batches = described_class.in_batches(Post, batch_size: 2, offset: 3).map(&:to_a)
|
48
|
+
|
49
|
+
expect(posts_in_batches).to eq([[posts[3]]])
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'calculates the count of records if "to" is not specified' do
|
53
|
+
posts = FactoryGirl.create_list(:post, 4)
|
54
|
+
|
55
|
+
posts_in_batches = described_class.in_batches(Post, batch_size: 2).map(&:to_a)
|
56
|
+
|
57
|
+
expect(posts_in_batches).to eq([posts[(0..1)], posts[(2..3)]])
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'does not calculate the count of records if "to" is specified' do
|
61
|
+
FactoryGirl.create_list(:post, 4)
|
62
|
+
|
63
|
+
expect(Post).not_to receive(:count)
|
64
|
+
|
65
|
+
described_class.in_batches(Post, to: 1).map(&:to_a)
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'orders the elements ascending by _id by default' do
|
69
|
+
posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
|
70
|
+
|
71
|
+
ids = Post.find_in_batches(batch_size: 2).map(&:to_a).flatten.map(&:id)
|
72
|
+
|
73
|
+
expect(ids).to eq(posts.map(&:id))
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'allows to specify a custom sorting order' do
|
77
|
+
posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
|
78
|
+
|
79
|
+
ids = described_class
|
80
|
+
.in_batches(Post, batch_size: 2, order_by: { id: 'desc' })
|
81
|
+
.map(&:to_a)
|
82
|
+
.flatten
|
83
|
+
.map(&:id)
|
84
|
+
|
85
|
+
expect(ids).to eq(posts.map(&:id).reverse)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'preserves any scopes previously applied' do
|
89
|
+
posts = FactoryGirl.create_list(:post, 5, body: 'Hello world!')
|
90
|
+
FactoryGirl.create_list(:post, 1)
|
91
|
+
|
92
|
+
posts_in_batches = described_class
|
93
|
+
.in_batches(Post.where(:body.exists => true), batch_size: 2)
|
94
|
+
.map(&:to_a)
|
95
|
+
|
96
|
+
expect(posts_in_batches).to eq([posts[(0..1)], posts[(2..3)], posts[(4..5)]])
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'when "to" is a multiple of the batch size' do
|
100
|
+
it 'splits the query into uniform batches covering all the records in the query' do
|
101
|
+
posts = FactoryGirl.create_list(:post, 10)
|
102
|
+
|
103
|
+
posts_in_batches = described_class.in_batches(Post, batch_size: 2, to: 10).map(&:to_a)
|
104
|
+
|
105
|
+
expect(posts_in_batches).to eq([posts[(0..1)],
|
106
|
+
posts[(2..3)],
|
107
|
+
posts[(4..5)],
|
108
|
+
posts[(6..7)],
|
109
|
+
posts[(8..9)]])
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context 'when "to" is not a multiple of the batch size' do
|
114
|
+
it 'splits the query into batches covering all the records in the query' do
|
115
|
+
posts = FactoryGirl.create_list(:post, 10)
|
116
|
+
|
117
|
+
posts_in_batches = described_class.in_batches(Post, batch_size: 3, to: 10).map(&:to_a)
|
118
|
+
|
119
|
+
expect(posts_in_batches).to eq([posts[(0..2)],
|
120
|
+
posts[(3..5)],
|
121
|
+
posts[(6..8)],
|
122
|
+
posts[(9..9)]])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'mongoid'
|
2
|
+
require 'database_cleaner'
|
3
|
+
require 'factory_girl'
|
4
|
+
require 'factories/posts'
|
5
|
+
|
6
|
+
Mongoid.load!(File.expand_path('../../config/mongoid.yml', __FILE__), 'test')
|
7
|
+
|
8
|
+
RSpec.configure do |config|
|
9
|
+
config.before(:suite) do
|
10
|
+
DatabaseCleaner[:mongoid].strategy = :truncation
|
11
|
+
end
|
12
|
+
|
13
|
+
config.around(:each) do |example|
|
14
|
+
DatabaseCleaner.cleaning do
|
15
|
+
example.run
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongo_batch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Oliver Martell
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.10'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.10'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: mongoid
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '4.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: database_cleaner
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.5'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.5'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: factory_girl
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '4.5'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '4.5'
|
97
|
+
description: |2
|
98
|
+
A library to iterate over entire Mongo collections or large queries
|
99
|
+
exposing an API to control things like batch size, order and limit.
|
100
|
+
email:
|
101
|
+
- support@altmetric.com
|
102
|
+
executables: []
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- README.md
|
107
|
+
- lib/mongo_batch.rb
|
108
|
+
- spec/factories/posts.rb
|
109
|
+
- spec/mongo_batch_spec.rb
|
110
|
+
- spec/spec_helper.rb
|
111
|
+
homepage: https://github.com/altmetric/mongo_batch
|
112
|
+
licenses:
|
113
|
+
- MIT
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubyforge_project:
|
131
|
+
rubygems_version: 2.4.5
|
132
|
+
signing_key:
|
133
|
+
specification_version: 4
|
134
|
+
summary: A library to batch Mongo queries
|
135
|
+
test_files:
|
136
|
+
- spec/factories/posts.rb
|
137
|
+
- spec/mongo_batch_spec.rb
|
138
|
+
- spec/spec_helper.rb
|
139
|
+
has_rdoc:
|