model_iterator 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +10 -0
- data/LICENSE.md +20 -0
- data/README.md +35 -0
- data/Rakefile +134 -0
- data/lib/model_iterator.rb +213 -0
- data/model_iterator.gemspec +58 -0
- data/test/helper.rb +40 -0
- data/test/init_test.rb +80 -0
- data/test/iterate_test.rb +83 -0
- metadata +91 -0
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012-* rick olson
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# ModelIterator
|
2
|
+
|
3
|
+
Basic library for iterating through large ActiveRecord datasets. For instance,
|
4
|
+
let's say you add a new feature, and you need to backfill data for existing
|
5
|
+
records:
|
6
|
+
|
7
|
+
iter = ModelIterator.new(User, :redis => $redis)
|
8
|
+
iter.each do |user|
|
9
|
+
backfill(user)
|
10
|
+
end
|
11
|
+
|
12
|
+
ModelIterator selects the records in batches (100 by default), and loops
|
13
|
+
through the table filtering based on the ID.
|
14
|
+
|
15
|
+
SELECT * FROM users WHERE id > 0 LIMIT 100
|
16
|
+
SELECT * FROM users WHERE id > 100 LIMIT 100
|
17
|
+
SELECT * FROM users WHERE id > 200 LIMIT 100
|
18
|
+
|
19
|
+
Each record's ID is tracked in Redis immediately after being processed. If
|
20
|
+
jobs crash, you can fix code, and re-run from where you left off.
|
21
|
+
|
22
|
+
This code was ported from GitHub, where it's been frequently used for nearly
|
23
|
+
two years.
|
24
|
+
|
25
|
+
## Note on Patches/Pull Requests
|
26
|
+
|
27
|
+
1. Fork the project.
|
28
|
+
2. Make your feature addition or bug fix.
|
29
|
+
3. Add tests for it. This is important so I don't break it in a future version
|
30
|
+
unintentionally.
|
31
|
+
4. Commit, do not mess with rakefile, version, or history. (if you want to have
|
32
|
+
your own version, that is fine but bump version in a commit by itself I can
|
33
|
+
ignore when I pull)
|
34
|
+
5. Send me a pull request. Bonus points for topic branches.
|
35
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
#
|
7
|
+
# Helper functions
|
8
|
+
#
|
9
|
+
#############################################################################
|
10
|
+
|
11
|
+
def name
|
12
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
13
|
+
end
|
14
|
+
|
15
|
+
def version
|
16
|
+
line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
|
17
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def date
|
21
|
+
Date.today.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def rubyforge_project
|
25
|
+
name
|
26
|
+
end
|
27
|
+
|
28
|
+
def gemspec_file
|
29
|
+
"#{name}.gemspec"
|
30
|
+
end
|
31
|
+
|
32
|
+
def gem_file
|
33
|
+
"#{name}-#{version}.gem"
|
34
|
+
end
|
35
|
+
|
36
|
+
def replace_header(head, header_name)
|
37
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#############################################################################
|
41
|
+
#
|
42
|
+
# Standard tasks
|
43
|
+
#
|
44
|
+
#############################################################################
|
45
|
+
|
46
|
+
task :default => :test
|
47
|
+
|
48
|
+
require 'rake/testtask'
|
49
|
+
Rake::TestTask.new(:test) do |test|
|
50
|
+
test.libs << 'lib' << 'test'
|
51
|
+
test.pattern = 'test/**/*_test.rb'
|
52
|
+
test.verbose = true
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "Open an irb session preloaded with this library"
|
56
|
+
task :console do
|
57
|
+
sh "irb -rubygems -r ./lib/#{name}.rb"
|
58
|
+
end
|
59
|
+
|
60
|
+
#############################################################################
|
61
|
+
#
|
62
|
+
# Custom tasks (add your own tasks here)
|
63
|
+
#
|
64
|
+
#############################################################################
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
#############################################################################
|
69
|
+
#
|
70
|
+
# Packaging tasks
|
71
|
+
#
|
72
|
+
#############################################################################
|
73
|
+
|
74
|
+
desc "Create tag v#{version} and build and push #{gem_file} to Rubygems"
|
75
|
+
task :release => :build do
|
76
|
+
unless `git branch` =~ /^\* master$/
|
77
|
+
puts "You must be on the master branch to release!"
|
78
|
+
exit!
|
79
|
+
end
|
80
|
+
sh "git commit --allow-empty -a -m 'Release #{version}'"
|
81
|
+
sh "git tag v#{version}"
|
82
|
+
sh "git push origin master"
|
83
|
+
sh "git push origin v#{version}"
|
84
|
+
sh "gem push pkg/#{gem_file}"
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Build #{gem_file} into the pkg directory"
|
88
|
+
task :build => :gemspec do
|
89
|
+
sh "mkdir -p pkg"
|
90
|
+
sh "gem build #{gemspec_file}"
|
91
|
+
sh "mv #{gem_file} pkg"
|
92
|
+
end
|
93
|
+
|
94
|
+
desc "Generate #{gemspec_file}"
|
95
|
+
task :gemspec => :validate do
|
96
|
+
# read spec file and split out manifest section
|
97
|
+
spec = File.read(gemspec_file)
|
98
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
99
|
+
|
100
|
+
# replace name version and date
|
101
|
+
replace_header(head, :name)
|
102
|
+
replace_header(head, :version)
|
103
|
+
replace_header(head, :date)
|
104
|
+
#comment this out if your rubyforge_project has a different name
|
105
|
+
replace_header(head, :rubyforge_project)
|
106
|
+
|
107
|
+
# determine file list from git ls-files
|
108
|
+
files = `git ls-files`.
|
109
|
+
split("\n").
|
110
|
+
sort.
|
111
|
+
reject { |file| file =~ /^\./ }.
|
112
|
+
reject { |file| file =~ /^(rdoc|pkg)/ }.
|
113
|
+
map { |file| " #{file}" }.
|
114
|
+
join("\n")
|
115
|
+
|
116
|
+
# piece file back together and write
|
117
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
118
|
+
spec = [head, manifest, tail].join(" # = MANIFEST =\n")
|
119
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
120
|
+
puts "Updated #{gemspec_file}"
|
121
|
+
end
|
122
|
+
|
123
|
+
desc "Validate #{gemspec_file}"
|
124
|
+
task :validate do
|
125
|
+
libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
|
126
|
+
unless libfiles.empty?
|
127
|
+
puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
|
128
|
+
exit!
|
129
|
+
end
|
130
|
+
unless Dir['VERSION*'].empty?
|
131
|
+
puts "A `VERSION` file at root level violates Gem best practices."
|
132
|
+
exit!
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# Iterates over large models, storing state in Redis.
|
2
|
+
class ModelIterator
|
3
|
+
VERSION = "1.0.0"
|
4
|
+
|
5
|
+
class MaxIterations < StandardError
|
6
|
+
attr_reader :iterator
|
7
|
+
def initialize(iter)
|
8
|
+
@iterator = iter
|
9
|
+
super "Hit the max (#{iter.max}), stopping at id #{iter.current_id}."
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Gets a reference to the ActiveRecord::Base class that is iterated.
|
14
|
+
#
|
15
|
+
# Returns a Class.
|
16
|
+
attr_reader :klass
|
17
|
+
|
18
|
+
# Gets or sets the number of records that are returned in each database
|
19
|
+
# query.
|
20
|
+
#
|
21
|
+
# Returns a Fixnum.
|
22
|
+
attr_accessor :limit
|
23
|
+
|
24
|
+
# Gets a String SQL Where clause fragment. Use `?` for variable
|
25
|
+
# substitution.
|
26
|
+
#
|
27
|
+
# Returns a String.
|
28
|
+
attr_reader :clause
|
29
|
+
|
30
|
+
# Gets an Array of values to be sql-escaped and joined with the clause.
|
31
|
+
#
|
32
|
+
# Returns an Array of unescaped sql values.
|
33
|
+
attr_reader :clause_args
|
34
|
+
|
35
|
+
# Gets a String used to prefix the redis keys used by this object.
|
36
|
+
attr_reader :prefix
|
37
|
+
|
38
|
+
# Gets a Fixnum value of the maximum iterations to run, or 0.
|
39
|
+
attr_reader :max
|
40
|
+
|
41
|
+
# Gets the String name of the ID field.
|
42
|
+
attr_reader :id_field
|
43
|
+
|
44
|
+
# Gets the String fully qualified ID field (with the table name).
|
45
|
+
attr_reader :id_clause
|
46
|
+
|
47
|
+
# Gets the :joins value for ActiveRecord::Base.find.
|
48
|
+
attr_reader :joins
|
49
|
+
|
50
|
+
# Gets or sets a Proc that is called with each model instance while
|
51
|
+
# iterating. This is set automatically by #each.
|
52
|
+
attr_accessor :job
|
53
|
+
|
54
|
+
# Gets or Sets the Redis client object.
|
55
|
+
attr_accessor :redis
|
56
|
+
|
57
|
+
# Initializes a ModelIterator instance.
|
58
|
+
#
|
59
|
+
# klass - ActiveRecord::Base class to iterate.
|
60
|
+
# clause - String SQL WHERE clause, with '?' placeholders for values.
|
61
|
+
# *values - Optional array of values to be added to a custom SQL WHERE
|
62
|
+
# clause.
|
63
|
+
# options - Optional Hash options.
|
64
|
+
# :redis - A Redis object for storing the state.
|
65
|
+
# :order - Symbol specifying the order to iterate. :asc or
|
66
|
+
# :desc. Default: :asc
|
67
|
+
# :id_field - String name of the ID column. Default: "id"
|
68
|
+
# :id_clause - String name of the fully qualified ID column.
|
69
|
+
# Prepends the model's table name to the front of
|
70
|
+
# the ID field. Default: "table_name.id"
|
71
|
+
# :start_id - Fixnum to start iterating from. Default: 1
|
72
|
+
# :prefix - Custom String prefix for redis keys.
|
73
|
+
# :select - Optional String of the columns to retrieve.
|
74
|
+
# :joins - Optional Symbol or Hash :joins option for
|
75
|
+
# ActiveRecord::Base.find.
|
76
|
+
# :max - Optional Fixnum of the maximum number of iterations.
|
77
|
+
# Use max * limit to process a known number of records
|
78
|
+
# at a time.
|
79
|
+
# :limit - Fixnum limit of objects to fetch from the db.
|
80
|
+
# Default: 100
|
81
|
+
#
|
82
|
+
# ModelIterator.new(Repository, :start_id => 5000)
|
83
|
+
# ModelIterator.new(Repository, 'public=?', true, :start_id => 1000)
|
84
|
+
#
|
85
|
+
def initialize(klass, *args)
|
86
|
+
@klass = klass
|
87
|
+
@options = if args.last.respond_to?(:fetch)
|
88
|
+
args.pop
|
89
|
+
else
|
90
|
+
{}
|
91
|
+
end
|
92
|
+
@redis = @options[:redis]
|
93
|
+
@id_field = @options[:id_field] || klass.primary_key
|
94
|
+
@id_clause = @options[:id_clause] || "#{klass.table_name}.#{@id_field}"
|
95
|
+
@order = @options[:order] == :desc ? :desc : :asc
|
96
|
+
op = @order == :asc ? '>' : '<'
|
97
|
+
@max = @options[:max].to_i
|
98
|
+
@joins = @options[:joins]
|
99
|
+
@clause = args.empty? ?
|
100
|
+
"#{@id_clause} #{op} ?" :
|
101
|
+
"#{@id_clause} #{op} ? AND (#{args.shift})"
|
102
|
+
@clause_args = args
|
103
|
+
@current_id = @options[:start_id]
|
104
|
+
@limit = @options[:limit] || 100
|
105
|
+
@job = @prefix = @key = nil
|
106
|
+
end
|
107
|
+
|
108
|
+
# Public: Points to the latest record that was yielded, by database ID.
|
109
|
+
#
|
110
|
+
# refresh - Boolean that determines if the instance variable cache should
|
111
|
+
# be reset first. Default: false.
|
112
|
+
#
|
113
|
+
# Returns a Fixnum.
|
114
|
+
def current_id(refresh = false)
|
115
|
+
@current_id = nil if refresh
|
116
|
+
@current_id ||= @redis.get(key).to_i
|
117
|
+
end
|
118
|
+
|
119
|
+
# Public: Sets the latest processed Integer ID.
|
120
|
+
attr_writer :current_id
|
121
|
+
|
122
|
+
# Public: Iterates through the whole dataset, yielding individual records as
|
123
|
+
# they are received. This calls #records multiple times, setting the
|
124
|
+
# #current_id after each run. If an exception is raised, the ModelIterator
|
125
|
+
# instance can safely be restarted, since all state is stored in Redis.
|
126
|
+
#
|
127
|
+
# &block - Block that gets called with each ActiveRecord::Base instance.
|
128
|
+
#
|
129
|
+
# Returns nothing.
|
130
|
+
def each
|
131
|
+
@job = block = (block_given? ? Proc.new : @job)
|
132
|
+
each_set do |records|
|
133
|
+
records.each do |record|
|
134
|
+
block.call(record)
|
135
|
+
@current_id = record.send(@id_field)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
cleanup
|
139
|
+
end
|
140
|
+
|
141
|
+
# Public: Iterates through the whole dataset. This calls #records multiple
|
142
|
+
# times, but does not set the #current_id after each record.
|
143
|
+
#
|
144
|
+
# &block - Block that gets called with each ActiveRecord::Base instance.
|
145
|
+
#
|
146
|
+
# Returns nothing.
|
147
|
+
def each_set(&block)
|
148
|
+
loops = 0
|
149
|
+
while records = self.records
|
150
|
+
begin
|
151
|
+
block.call(records)
|
152
|
+
loops += 1
|
153
|
+
if @max > 0 && loops >= @max
|
154
|
+
raise MaxIterations, self
|
155
|
+
end
|
156
|
+
ensure
|
157
|
+
@redis.set(key, @current_id) if @current_id
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Public: Simple alias for #each with no block. Useful if the job errors,
|
163
|
+
# and you want to retry it again from where it left off.
|
164
|
+
alias run each
|
165
|
+
|
166
|
+
# Public: Cleans up any redis keys.
|
167
|
+
#
|
168
|
+
# Returns nothing.
|
169
|
+
def cleanup
|
170
|
+
@redis.del(key)
|
171
|
+
@current_id = nil
|
172
|
+
end
|
173
|
+
|
174
|
+
def prefix
|
175
|
+
@prefix = [@options[:prefix], self.class.name, @klass.name].
|
176
|
+
compact.join(":")
|
177
|
+
end
|
178
|
+
|
179
|
+
def key
|
180
|
+
@key ||= "#{prefix}:current"
|
181
|
+
end
|
182
|
+
|
183
|
+
# Public: Gets an ActiveRecord :connections value, ready for
|
184
|
+
# ActiveRecord::Base.all.
|
185
|
+
#
|
186
|
+
# Returns an Array with a String query clause, and unescaped db values.
|
187
|
+
def conditions
|
188
|
+
[@clause, current_id, *@clause_args]
|
189
|
+
end
|
190
|
+
|
191
|
+
# Public: Queries the database for the next page of records.
|
192
|
+
#
|
193
|
+
# Returns an Array of ActiveRecord::Base instances if any results are
|
194
|
+
# returned, or nil.
|
195
|
+
def records
|
196
|
+
arr = @klass.all(find_options)
|
197
|
+
arr.empty? ? nil : arr
|
198
|
+
end
|
199
|
+
|
200
|
+
# Public: Builds the ActiveRecord::Base.find options for a single query.
|
201
|
+
#
|
202
|
+
# Returns a Hash.
|
203
|
+
def find_options
|
204
|
+
opt = {:conditions => conditions, :limit => @limit, :order => "#{@id_clause} #{@order}"}
|
205
|
+
if columns = @options[:select]
|
206
|
+
opt[:select] = columns
|
207
|
+
end
|
208
|
+
opt[:joins] = @joins if @joins
|
209
|
+
opt
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
## This is the rakegem gemspec template. Make sure you read and understand
|
2
|
+
## all of the comments. Some sections require modification, and others can
|
3
|
+
## be deleted if you don't need them. Once you understand the contents of
|
4
|
+
## this file, feel free to delete any comments that begin with two hash marks.
|
5
|
+
## You can find comprehensive Gem::Specification documentation, at
|
6
|
+
## http://docs.rubygems.org/read/chapter/20
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
|
10
|
+
|
11
|
+
## Leave these as is they will be modified for you by the rake gemspec task.
|
12
|
+
## If your rubyforge_project name is different, then edit it and comment out
|
13
|
+
## the sub! line in the Rakefile
|
14
|
+
s.name = 'model_iterator'
|
15
|
+
s.version = '1.0.0'
|
16
|
+
s.date = '2012-09-07'
|
17
|
+
s.rubyforge_project = 'model_iterator'
|
18
|
+
|
19
|
+
## Make sure your summary is short. The description may be as long
|
20
|
+
## as you like.
|
21
|
+
s.summary = "Iterate through large ActiveRecord datasets"
|
22
|
+
s.description = "Iterate through large ActiveRecord datasets"
|
23
|
+
|
24
|
+
## List the primary authors. If there are a bunch of authors, it's probably
|
25
|
+
## better to set the email to an email list or something. If you don't have
|
26
|
+
## a custom homepage, consider using your GitHub URL or the like.
|
27
|
+
s.authors = ["Rick Olson"]
|
28
|
+
s.email = 'technoweenie@gmail.com'
|
29
|
+
s.homepage = 'http://github.com/technoweenie/model_iterator'
|
30
|
+
|
31
|
+
## This gets added to the $LOAD_PATH so that 'lib/NAME.rb' can be required as
|
32
|
+
## require 'NAME.rb' or'/lib/NAME/file.rb' can be as require 'NAME/file.rb'
|
33
|
+
s.require_paths = %w[lib]
|
34
|
+
|
35
|
+
s.add_development_dependency 'rake'
|
36
|
+
s.add_development_dependency 'test-unit'
|
37
|
+
|
38
|
+
## Leave this section as-is. It will be automatically generated from the
|
39
|
+
## contents of your Git repository via the gemspec task. DO NOT REMOVE
|
40
|
+
## THE MANIFEST COMMENTS, they are used as delimiters by the task.
|
41
|
+
# = MANIFEST =
|
42
|
+
s.files = %w[
|
43
|
+
Gemfile
|
44
|
+
LICENSE.md
|
45
|
+
README.md
|
46
|
+
Rakefile
|
47
|
+
lib/model_iterator.rb
|
48
|
+
model_iterator.gemspec
|
49
|
+
test/helper.rb
|
50
|
+
test/init_test.rb
|
51
|
+
test/iterate_test.rb
|
52
|
+
]
|
53
|
+
# = MANIFEST =
|
54
|
+
|
55
|
+
## Test files will be grabbed from the file list. Make sure the path glob
|
56
|
+
## matches what you actually use.
|
57
|
+
s.test_files = s.files.select { |path| path =~ %r{^test/*/.+\.rb} }
|
58
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'active_record'
|
4
|
+
require File.expand_path("../../lib/model_iterator", __FILE__)
|
5
|
+
|
6
|
+
class ModelIterator::TestCase < Test::Unit::TestCase
|
7
|
+
class Model < ActiveRecord::Base
|
8
|
+
establish_connection :adapter => 'sqlite3', :database => ':memory:'
|
9
|
+
connection.create_table table_name do |c|
|
10
|
+
c.column :name, :string
|
11
|
+
end
|
12
|
+
|
13
|
+
%w(a b c).each do |s|
|
14
|
+
create!(:name => s)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class RedisClient
|
19
|
+
def initialize(hash = nil)
|
20
|
+
@hash = hash || {}
|
21
|
+
end
|
22
|
+
|
23
|
+
def [](key)
|
24
|
+
@hash[key]
|
25
|
+
end
|
26
|
+
alias get []
|
27
|
+
|
28
|
+
def []=(key, value)
|
29
|
+
@hash[key] = value
|
30
|
+
end
|
31
|
+
alias set []=
|
32
|
+
|
33
|
+
def delete(key)
|
34
|
+
@hash.delete(key)
|
35
|
+
end
|
36
|
+
|
37
|
+
alias del delete
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
data/test/init_test.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.expand_path("../helper", __FILE__)
|
2
|
+
|
3
|
+
class InitializationTest < ModelIterator::TestCase
|
4
|
+
def setup
|
5
|
+
@iter = ModelIterator.new(Model, :redis => RedisClient.new)
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_sets_klass
|
9
|
+
assert_equal Model, @iter.klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_sets_current_id
|
13
|
+
assert_equal 0, @iter.current_id
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_sets_conditions
|
17
|
+
assert_equal ['models.id > ?', @iter.current_id], @iter.conditions
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_sets_limit
|
21
|
+
assert_equal 100, @iter.limit
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_sets_redis_prefix
|
25
|
+
assert_equal 'ModelIterator:ModelIterator::TestCase::Model', @iter.prefix
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class InitializationTestWithOptions < ModelIterator::TestCase
|
30
|
+
def setup
|
31
|
+
@iter = ModelIterator.new Model, :redis => RedisClient.new,
|
32
|
+
:start_id => 5, :limit => 10, :prefix => 'foo'
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_sets_klass
|
36
|
+
assert_equal Model, @iter.klass
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_sets_current_id
|
40
|
+
assert_equal 5, @iter.current_id
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_sets_conditions
|
44
|
+
assert_equal ['models.id > ?', @iter.current_id], @iter.conditions
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_sets_limit
|
48
|
+
assert_equal 10, @iter.limit
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_sets_redis_prefix
|
52
|
+
assert_equal 'foo:ModelIterator:ModelIterator::TestCase::Model', @iter.prefix
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class InitializationTestWithCustomWhereClause < ModelIterator::TestCase
|
57
|
+
def setup
|
58
|
+
@iter = ModelIterator.new Model,
|
59
|
+
'public = ?',
|
60
|
+
true,
|
61
|
+
:redis => RedisClient.new, :start_id => 5, :limit => 10
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_sets_klass
|
65
|
+
assert_equal Model, @iter.klass
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_sets_current_id
|
69
|
+
assert_equal 5, @iter.current_id
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_sets_conditions
|
73
|
+
assert_equal ['models.id > ? AND (public = ?)', @iter.current_id, true], @iter.conditions
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_sets_limit
|
77
|
+
assert_equal 10, @iter.limit
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.expand_path("../helper", __FILE__)
|
2
|
+
|
3
|
+
class IterateTest < ModelIterator::TestCase
|
4
|
+
def test_finds_current_iteration_of_records
|
5
|
+
iter = ModelIterator.new Model, :redis => RedisClient.new
|
6
|
+
assert_equal %w(a b c), iter.records.map(&:name)
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_loops_through_all_records
|
10
|
+
names = []
|
11
|
+
iter = ModelIterator.new Model, :redis => RedisClient.new, :limit => 1
|
12
|
+
iter.each do |m|
|
13
|
+
names << m.name
|
14
|
+
end
|
15
|
+
|
16
|
+
assert_equal %w(a b c), names
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_loops_through_filtered_records
|
20
|
+
names = []
|
21
|
+
iter = ModelIterator.new Model, 'name != ?', 'a',
|
22
|
+
:redis => RedisClient.new, :limit => 1
|
23
|
+
iter.each do |m|
|
24
|
+
names << m.name
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal %w(b c), names
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_loops_through_records_in_reverse
|
31
|
+
names = []
|
32
|
+
iter = ModelIterator.new Model, :redis => RedisClient.new, :limit => 1,
|
33
|
+
:start_id => 100000, :order => :desc
|
34
|
+
iter.each do |m|
|
35
|
+
names << m.name
|
36
|
+
end
|
37
|
+
|
38
|
+
assert_equal %w(c b a), names
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_loops_through_known_number_of_records
|
42
|
+
names = []
|
43
|
+
iter = ModelIterator.new Model, :redis => RedisClient.new,
|
44
|
+
:limit => 1, :start_id => 0, :max => 2
|
45
|
+
|
46
|
+
assert_raises ModelIterator::MaxIterations do
|
47
|
+
iter.each do |m|
|
48
|
+
names << m.name
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
assert_equal %w(a b), names
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_allows_restart_of_records_after_error
|
56
|
+
redis = RedisClient.new
|
57
|
+
names = []
|
58
|
+
iter = ModelIterator.new Model, :redis => redis, :start_id => 0
|
59
|
+
badjob = lambda do |m|
|
60
|
+
raise(ExpectedError) if m.id != 1
|
61
|
+
names << m.name
|
62
|
+
end
|
63
|
+
|
64
|
+
2.times do
|
65
|
+
assert_raises ExpectedError do
|
66
|
+
iter.each(&badjob)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
assert_equal badjob, iter.job
|
71
|
+
assert_equal %w(a), names
|
72
|
+
|
73
|
+
iter = ModelIterator.new Model, :redis => redis, :limit => 1
|
74
|
+
assert_equal 1, iter.current_id
|
75
|
+
iter.job = lambda { |m| names << m.name }
|
76
|
+
iter.run
|
77
|
+
|
78
|
+
assert_equal %w(a b c), names
|
79
|
+
end
|
80
|
+
|
81
|
+
class ExpectedError < StandardError; end
|
82
|
+
end
|
83
|
+
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: model_iterator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Rick Olson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-09-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: test-unit
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: Iterate through large ActiveRecord datasets
|
47
|
+
email: technoweenie@gmail.com
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- Gemfile
|
53
|
+
- LICENSE.md
|
54
|
+
- README.md
|
55
|
+
- Rakefile
|
56
|
+
- lib/model_iterator.rb
|
57
|
+
- model_iterator.gemspec
|
58
|
+
- test/helper.rb
|
59
|
+
- test/init_test.rb
|
60
|
+
- test/iterate_test.rb
|
61
|
+
homepage: http://github.com/technoweenie/model_iterator
|
62
|
+
licenses: []
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options: []
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ! '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
hash: 3722461387104018248
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ! '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 1.3.5
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project: model_iterator
|
84
|
+
rubygems_version: 1.8.23
|
85
|
+
signing_key:
|
86
|
+
specification_version: 2
|
87
|
+
summary: Iterate through large ActiveRecord datasets
|
88
|
+
test_files:
|
89
|
+
- test/helper.rb
|
90
|
+
- test/init_test.rb
|
91
|
+
- test/iterate_test.rb
|