mongo_batch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9343db89a622725ac3532e5e7b16ff128b5d04b2
4
+ data.tar.gz: 5a2a6233dbc88941e86f53659020a8d4d1d4d516
5
+ SHA512:
6
+ metadata.gz: 7fc6a20a065fe187c8980584c50dcaed5597b7ffae396c6c3d091249e7fe5ee6a38f7537005a8b0044c247f25477c996f9cc0f5b4543f70873c75d08c1d66b63
7
+ data.tar.gz: 17ed373f8e1f0cf2cddf265e3604df9ea42c207da69169346c2eefed1e30781a7140278271a0585e04360c6acb7e1cef7d5173a4228676056ce55118bcc84de1
data/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # MongoBatch [![Build Status](https://travis-ci.org/altmetric/mongo_batch.svg?branch=master)](https://travis-ci.org/altmetric/mongo_batch)
2
+
3
+ A Ruby library to run Mongoid queries on large collections in batches.
4
+
5
+ **Supported Ruby versions:** 2.1, 2.2
6
+
7
+ **Supported Mongoid versions:** 4.0, 5.0
8
+
9
+ ## Installation
10
+
11
+ ```
12
+ gem install mongo_batch -v '~> 0.1'
13
+ ```
14
+
15
+ Or, in your `Gemfile`:
16
+
17
+ ```ruby
18
+ gem 'mongo_batch', '~> 0.1'
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ Extend your Mongoid models with `MongoBatch` to be able to call
24
+ `find_in_batches` on your models. The method will yield each batch of
25
+ records to the given block.
26
+
27
+ ```ruby
28
+ require 'mongo_batch'
29
+
30
+ class Post
31
+ extend MongoBatch
32
+ end
33
+
34
+ Post.find_in_batches do |batch|
35
+ batch.each do |post|
36
+ post.update(body: 'Hello world!')
37
+ end
38
+ end
39
+ ```
40
+
41
+ If you do not pass a block to `find_in_batches`, the method will
42
+ return an [Enumerator](http://ruby-doc.org/core-2.2.2/Enumerator.html).
43
+
44
+ ```ruby
45
+ Post.find_in_batches.with_index.each do |batch, index|
46
+ batch.each do |post|
47
+ post.update(body: "Hello world! #{index}")
48
+ end
49
+ end
50
+ ```
51
+
52
+ The default batch size is 1,000 records, but `find_in_batches` accepts
53
+ an option to configure a different batch size, as well as options to
54
+ limit the records to process, sorting criteria and an initial offset.
55
+
56
+ ```ruby
57
+ Post
58
+ .find_in_batches(batch_size: 500, to: 2_000, offset: 100, order_by: { _id: :desc })
59
+ .each do |batch|
60
+ batch.each do |post|
61
+ post.update(body: "Hola mundo!")
62
+ end
63
+ end
64
+
65
+ ```
66
+
67
+ If you have more complex queries or prefer not to extend your models
68
+ with `MongoBatch`,
69
+ you can use `MongoBatch.in_batches` and supply the query you want to batch.
70
+
71
+ ```ruby
72
+ MongoBatch
73
+ .in_batches(Post.where(:body.exists => true).no_timeout)
74
+ .each do |batch|
75
+ batch.each do |post|
76
+ post.update(body: 'Hello world!')
77
+ end
78
+ end
79
+ ```
80
+
81
+ `MongoBatch.in_batches` also accepts values to configure the batch size, limit of
82
+ records to process, sorting criteria or an initial offset.
83
+
84
+ ```ruby
85
+ MongoBatch
86
+ .in_batches(Post.where(:body.exists => true).no_timeout,
87
+ to: 2_000, offset: 100, order_by: { _id::desc })
88
+ .each do |batch|
89
+ batch.each do |post|
90
+ post.update(body: 'Hi mum!')
91
+ end
92
+ end
93
+ ```
94
+
95
+ ## License
96
+
97
+ Copyright © 2015 Altmetric LLP
98
+
99
+ Distributed under the MIT License.
100
+
101
+ [URI]: http://ruby-doc.org/stdlib/libdoc/uri/rdoc/URI.html
102
+
@@ -0,0 +1,43 @@
1
+ module MongoBatch
2
+ class Batcher
3
+ attr_reader :query, :batch_size, :to, :offset, :order_by
4
+
5
+ def initialize(query, options = {})
6
+ @query = query
7
+ @batch_size = options.fetch(:batch_size) { 1_000 }
8
+ @to = options.fetch(:to) { query.count }
9
+ @offset = options.fetch(:offset) { 0 }
10
+ @order_by = options.fetch(:order_by) { { _id: :asc } }
11
+ end
12
+
13
+ def batches
14
+ Enumerator.new(to) do |yielder|
15
+ processed_so_far = offset
16
+
17
+ offset.step(by: batch_size, to: to - batch_size).each do |offset|
18
+ yielder << query.order_by(order_by).limit(batch_size).skip(offset)
19
+ processed_so_far += batch_size
20
+ end
21
+
22
+ if processed_so_far < to
23
+ last_limit = to - processed_so_far
24
+ yielder << query.order(order_by).limit(last_limit).skip(processed_so_far)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.in_batches(query, options = {})
31
+ Batcher.new(query, options).batches
32
+ end
33
+
34
+ def find_in_batches(options = {}, &block)
35
+ batcher = Batcher.new(self, options)
36
+
37
+ if block
38
+ batcher.batches.each(&block)
39
+ else
40
+ batcher.batches
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,5 @@
1
+ FactoryGirl.define do
2
+ factory :post do
3
+ sequence(:id)
4
+ end
5
+ end
@@ -0,0 +1,126 @@
1
+ require 'spec_helper'
2
+ require 'mongo_batch'
3
+
4
+ describe MongoBatch do
5
+ class Post
6
+ include Mongoid::Document
7
+ include Mongoid::Timestamps
8
+ extend MongoBatch
9
+
10
+ field :body, type: String
11
+ end
12
+
13
+ describe '#find_in_batches' do
14
+ it 'yields to the given block for each batch of records' do
15
+ posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
16
+
17
+ expect do |block|
18
+ Post.find_in_batches(batch_size: 2).each(&block)
19
+ end.to yield_successive_args(posts[(0..1)],
20
+ posts[(2..3)],
21
+ posts[(4..5)],
22
+ posts[(6..7)],
23
+ posts[(8..9)])
24
+ end
25
+
26
+ it 'returns an enumerator if a block is not given' do
27
+ expect(Post.find_in_batches(batch_size: 2)).to be_an(Enumerator)
28
+ end
29
+ end
30
+
31
+ describe '.in_batches' do
32
+ it 'returns an enumerator if a block is not given' do
33
+ expect(described_class.in_batches(batch_size: 2)).to be_an(Enumerator)
34
+ end
35
+
36
+ it 'starts from the first record if "offset" is not specified' do
37
+ posts = FactoryGirl.create_list(:post, 4).sort_by(&:id)
38
+
39
+ batches = described_class.in_batches(Post).map(&:to_a)
40
+
41
+ expect(batches.first).to start_with(posts[0])
42
+ end
43
+
44
+ it 'skips the number of records indicated in "offset"' do
45
+ posts = FactoryGirl.create_list(:post, 4)
46
+
47
+ posts_in_batches = described_class.in_batches(Post, batch_size: 2, offset: 3).map(&:to_a)
48
+
49
+ expect(posts_in_batches).to eq([[posts[3]]])
50
+ end
51
+
52
+ it 'calculates the count of records if "to" is not specified' do
53
+ posts = FactoryGirl.create_list(:post, 4)
54
+
55
+ posts_in_batches = described_class.in_batches(Post, batch_size: 2).map(&:to_a)
56
+
57
+ expect(posts_in_batches).to eq([posts[(0..1)], posts[(2..3)]])
58
+ end
59
+
60
+ it 'does not calculate the count of records if "to" is specified' do
61
+ FactoryGirl.create_list(:post, 4)
62
+
63
+ expect(Post).not_to receive(:count)
64
+
65
+ described_class.in_batches(Post, to: 1).map(&:to_a)
66
+ end
67
+
68
+ it 'orders the elements ascending by _id by default' do
69
+ posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
70
+
71
+ ids = Post.find_in_batches(batch_size: 2).map(&:to_a).flatten.map(&:id)
72
+
73
+ expect(ids).to eq(posts.map(&:id))
74
+ end
75
+
76
+ it 'allows to specify a custom sorting order' do
77
+ posts = FactoryGirl.create_list(:post, 10).sort_by(&:id)
78
+
79
+ ids = described_class
80
+ .in_batches(Post, batch_size: 2, order_by: { id: 'desc' })
81
+ .map(&:to_a)
82
+ .flatten
83
+ .map(&:id)
84
+
85
+ expect(ids).to eq(posts.map(&:id).reverse)
86
+ end
87
+
88
+ it 'preserves any scopes previously applied' do
89
+ posts = FactoryGirl.create_list(:post, 5, body: 'Hello world!')
90
+ FactoryGirl.create_list(:post, 1)
91
+
92
+ posts_in_batches = described_class
93
+ .in_batches(Post.where(:body.exists => true), batch_size: 2)
94
+ .map(&:to_a)
95
+
96
+ expect(posts_in_batches).to eq([posts[(0..1)], posts[(2..3)], posts[(4..5)]])
97
+ end
98
+
99
+ context 'when "to" is a multiple of the batch size' do
100
+ it 'splits the query into uniform batches covering all the records in the query' do
101
+ posts = FactoryGirl.create_list(:post, 10)
102
+
103
+ posts_in_batches = described_class.in_batches(Post, batch_size: 2, to: 10).map(&:to_a)
104
+
105
+ expect(posts_in_batches).to eq([posts[(0..1)],
106
+ posts[(2..3)],
107
+ posts[(4..5)],
108
+ posts[(6..7)],
109
+ posts[(8..9)]])
110
+ end
111
+ end
112
+
113
+ context 'when "to" is not a multiple of the batch size' do
114
+ it 'splits the query into batches covering all the records in the query' do
115
+ posts = FactoryGirl.create_list(:post, 10)
116
+
117
+ posts_in_batches = described_class.in_batches(Post, batch_size: 3, to: 10).map(&:to_a)
118
+
119
+ expect(posts_in_batches).to eq([posts[(0..2)],
120
+ posts[(3..5)],
121
+ posts[(6..8)],
122
+ posts[(9..9)]])
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,18 @@
1
+ require 'mongoid'
2
+ require 'database_cleaner'
3
+ require 'factory_girl'
4
+ require 'factories/posts'
5
+
6
+ Mongoid.load!(File.expand_path('../../config/mongoid.yml', __FILE__), 'test')
7
+
8
+ RSpec.configure do |config|
9
+ config.before(:suite) do
10
+ DatabaseCleaner[:mongoid].strategy = :truncation
11
+ end
12
+
13
+ config.around(:each) do |example|
14
+ DatabaseCleaner.cleaning do
15
+ example.run
16
+ end
17
+ end
18
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongo_batch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Oliver Martell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mongoid
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '4.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '4.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: database_cleaner
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.5'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.5'
83
+ - !ruby/object:Gem::Dependency
84
+ name: factory_girl
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '4.5'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '4.5'
97
+ description: |2
98
+ A library to iterate over entire Mongo collections or large queries
99
+ exposing an API to control things like batch size, order and limit.
100
+ email:
101
+ - support@altmetric.com
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - README.md
107
+ - lib/mongo_batch.rb
108
+ - spec/factories/posts.rb
109
+ - spec/mongo_batch_spec.rb
110
+ - spec/spec_helper.rb
111
+ homepage: https://github.com/altmetric/mongo_batch
112
+ licenses:
113
+ - MIT
114
+ metadata: {}
115
+ post_install_message:
116
+ rdoc_options: []
117
+ require_paths:
118
+ - lib
119
+ required_ruby_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ requirements: []
130
+ rubyforge_project:
131
+ rubygems_version: 2.4.5
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: A library to batch Mongo queries
135
+ test_files:
136
+ - spec/factories/posts.rb
137
+ - spec/mongo_batch_spec.rb
138
+ - spec/spec_helper.rb
139
+ has_rdoc: