batcher 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in batcher.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2011 Harold Gimenez and Stattleship
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included
11
+ in all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19
+ DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,31 @@
1
+ Batcher
2
+ ======
3
+
4
+ Processing ActiveRecord data is impossible with large amounts of data because the memory footprint of tons of ActiveRecord objects is too large and unmanagable. ActiveRecord models provide a `find_each` method for batch processing of data; unfortunately it only allows you to slice the data by primary key.
5
+
6
+ Batcher is a tiny gem that allows you to batch process data and slice it by whatever you require.
7
+
8
+ Example
9
+ -------
10
+
11
+ ```ruby
12
+ class User < ActiveRecord::Base
13
+ end
14
+
15
+ Batcher(User.order('influence desc')).each do |user|
16
+ user.do_something
17
+ end
18
+ ```
19
+
20
+ The deafult batch size is 1000, but you can pass in a different batch size to tweak accordingly:
21
+
22
+ ```ruby
23
+ Batcher(User.order('influence desc'), batch_size: 2000).each do |user|
24
+ user.do_something
25
+ end
26
+ ```
27
+
28
+ License
29
+ -------
30
+
31
+ Batcher is copyright 2011 Harold Gimenez and Stattleship. It is free software, and may be distributed under the terms specified in the LICENSE file.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "batcher/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "batcher"
7
+ s.version = Batcher::VERSION
8
+ s.authors = ["Harold Giménez"]
9
+ s.email = ["hgimenez@thoughtbot.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Data batch processing for ActiveRecord}
12
+ s.description = %q{Allows you to process many records in batches. Similar to #find_each in ActiveRecord, but allows you to specify a slicing attribute that is not the table's primary key}
13
+
14
+ s.rubyforge_project = "batcher"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency "activerecord"
22
+
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "pg"
25
+ end
@@ -0,0 +1,6 @@
1
+ require "batcher/version"
2
+ require "batcher/process"
3
+
4
+ def Batcher(scope, opts = {})
5
+ Batcher::Process.new(scope, opts)
6
+ end
@@ -0,0 +1,29 @@
1
+ module Batcher
2
+ class Process
3
+ attr_accessor :batch_size
4
+
5
+ def initialize(scope, opts = {})
6
+ @scope = scope
7
+ @batch_size = opts[:batch_size] || 1000
8
+ end
9
+
10
+ def each(&block)
11
+ iterator.each do |object|
12
+ yield object
13
+ end
14
+ end
15
+
16
+ def iterator
17
+ Enumerator.new do |yielder|
18
+ offset = 0
19
+ count = @scope.count
20
+ begin
21
+ @scope.limit(batch_size).offset(offset).each do |record|
22
+ yielder << record
23
+ end
24
+ offset += batch_size
25
+ end until (batch_size + offset - 1) > count
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ module Batcher
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ class User < ActiveRecord::Base
4
+ end
5
+
6
+ describe Batcher do
7
+ it 'provides an interface to Process' do
8
+ batcher = Batcher(User.where(name: 'foo'))
9
+ batcher.should be_kind_of(Batcher::Process)
10
+ end
11
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+ require 'batcher'
3
+
4
+ class User < ActiveRecord::Base
5
+ end
6
+
7
+ describe Batcher::Process do
8
+ it 'can be set a batch size' do
9
+ batcher = Batcher::Process.new(stub, batch_size: 42)
10
+ batcher.batch_size.should be == 42
11
+ end
12
+
13
+ it 'defaults to a batch size of 1000' do
14
+ batcher = Batcher::Process.new(stub)
15
+ batcher.batch_size.should be == 1000
16
+ end
17
+
18
+ it 'yields each object in the provided scope' do
19
+ jimis = 3.times.map { User.create(name: 'Jimi') }
20
+ 3.times { User.create(name: 'Eric') }
21
+
22
+ batcher = Batcher::Process.new(User.where(name: 'Jimi'), batch_size: 2)
23
+
24
+ yielded_objects = []
25
+ batcher.each do |object|
26
+ yielded_objects << object
27
+ end
28
+
29
+ yielded_objects.should have(3).objects
30
+ yielded_objects.should =~ jimis
31
+ end
32
+ end
@@ -0,0 +1,33 @@
1
+ require 'rspec'
2
+ require 'active_record'
3
+
4
+ module DBHelpers
5
+ def build_and_migrate_database
6
+ create_db
7
+ ActiveRecord::Schema.define do
8
+ create_table :users, force: true do |t|
9
+ t.string :name
10
+ end
11
+ end
12
+ end
13
+
14
+ def create_db
15
+ ActiveRecord::Base.establish_connection(
16
+ :adapter => 'postgresql',
17
+ :encoding => 'unicode',
18
+ :database => 'template1'
19
+ )
20
+ result = ActiveRecord::Base.connection.select_all %{SELECT * FROM pg_catalog.pg_database WHERE datname = 'batcher_test'}
21
+ unless result.size > 0
22
+ ActiveRecord::Base.connection.execute 'CREATE DATABASE batcher_test;'
23
+ end
24
+ end
25
+ end
26
+
27
+ RSpec.configure do |config|
28
+ config.include DBHelpers
29
+
30
+ config.before(:all) do
31
+ build_and_migrate_database
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: batcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Harold Giménez
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-16 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activerecord
16
+ requirement: &2157706000 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2157706000
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2157705580 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2157705580
36
+ - !ruby/object:Gem::Dependency
37
+ name: pg
38
+ requirement: &2157705040 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2157705040
47
+ description: ! 'Allows you to process many records in batches. Similar to #find_each
48
+ in ActiveRecord, but allows you to specify a slicing attribute that is not the table''s
49
+ primary key'
50
+ email:
51
+ - hgimenez@thoughtbot.com
52
+ executables: []
53
+ extensions: []
54
+ extra_rdoc_files: []
55
+ files:
56
+ - .gitignore
57
+ - Gemfile
58
+ - LICENSE
59
+ - README.md
60
+ - Rakefile
61
+ - batcher.gemspec
62
+ - lib/batcher.rb
63
+ - lib/batcher/process.rb
64
+ - lib/batcher/version.rb
65
+ - spec/batcher_spec.rb
66
+ - spec/process_spec.rb
67
+ - spec/spec_helper.rb
68
+ homepage: ''
69
+ licenses: []
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project: batcher
88
+ rubygems_version: 1.8.6
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Data batch processing for ActiveRecord
92
+ test_files:
93
+ - spec/batcher_spec.rb
94
+ - spec/process_spec.rb
95
+ - spec/spec_helper.rb