parallel_batch 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,4 +1,45 @@
1
- ParallelBatch runs concurrent batches on tables with huge amount of rows.
1
+ Parallel batch distributes your batches across servers.
2
+
3
+ It is based on ActiveRecord and relies on the database to ensure records are processed only once.
4
+
5
+ To get started include Parallel batch into your Gemfile and run bundle install.
6
+
7
+ gem 'parallel_batch'
8
+
9
+ Then run the following migration to create the parallel_batches table.
10
+
11
+ class CreateParallelBatches < ActiveRecord::Migration
12
+ def self.up
13
+ create_table :parallel_batches do |t|
14
+ t.string :type
15
+ t.string :offset # It works either with integer or string IDs.
16
+ t.timestamps
17
+ end
18
+
19
+ add_index :parallel_batches, :type, :unique: true
20
+ end
21
+
22
+ def self.down
23
+ drop_table :parallel_batches
24
+ end
25
+ end
26
+
27
+ Your parallel batch must be a sub class of ParallelBatch. It must implement scope and perform methods. Perform is called for each record selected by your defined scope.
28
+
29
+ # File: app/models/track_batch.rb
30
+ class TrackBatch < ParallelBatch
31
+ def scope
32
+ Track.not_encoded
33
+ end
34
+
35
+ def perform(track)
36
+ track.encode_mp3
37
+ end
38
+ end
39
+
40
+ For starting your batches SSH into your servers and run the following command.
41
+
42
+ TrackBatch.start(5) # Create 5 workers in their own process.
2
43
 
3
44
  To Contribute:
4
45
 
@@ -2,36 +2,31 @@
2
2
 
3
3
  class ParallelBatch < ActiveRecord::Base
4
4
 
5
- #################
6
- ### Constants ###
7
- #################
8
-
9
- VERSION = "0.0.1"
10
-
11
5
  #####################
12
6
  ### Class methods ###
13
7
  #####################
14
8
 
15
9
  def self.find_or_create!
16
- first || create!(offset: 0)
10
+ first || create!
17
11
  # When starting many batches at the same time we are pretty sure to get a MySQL
18
12
  # error reporting a duplicated entry. That's why we are retrying one time only.
19
13
  rescue ActiveRecord::StatementInvalid
20
- first || create!(offset: 0)
14
+ first || create!
21
15
  end
22
16
 
23
17
  def self.start(concurrency = 1)
24
- concurrency.times { fork { start_fork } }
18
+ concurrency.times { Process.detach(fork { start_fork }) }
25
19
  end
26
20
 
27
21
  def self.start_fork
28
22
  puts "#{self} has started with pid #{Process.pid}"
29
23
  ActiveRecord::Base.connection.reconnect!
24
+ Process.daemon(false)
30
25
  find_or_create!.run
31
26
  end
32
27
 
33
28
  def self.reset
34
- find_or_create!.update_attributes!(offset: null)
29
+ find_or_create!.update_attributes!(offset: nil)
35
30
  end
36
31
 
37
32
  ########################
@@ -53,7 +48,7 @@ class ParallelBatch < ActiveRecord::Base
53
48
 
54
49
  def run
55
50
  while records = next_batch
56
- records.each { |record| perform(record) }
51
+ records.each { |record| perform(record) rescue nil }
57
52
  end
58
53
  end
59
54
 
@@ -66,7 +61,7 @@ class ParallelBatch < ActiveRecord::Base
66
61
  end
67
62
 
68
63
  def batch_size
69
- 1000
64
+ 100
70
65
  end
71
66
 
72
67
  end
@@ -0,0 +1,5 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ class ParallelBatch
4
+ VERSION = '0.0.2'.freeze
5
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parallel_batch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-17 00:00:00.000000000 Z
12
+ date: 2012-11-02 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Run safely concurent batches
15
15
  email:
@@ -24,6 +24,7 @@ files:
24
24
  - README
25
25
  - Rakefile
26
26
  - lib/parallel_batch.rb
27
+ - lib/parallel_batch/version.rb
27
28
  - parallel_batch.gemspec
28
29
  - test/parallel_batch_test.rb
29
30
  homepage: https://github.com/officialfm/parallel_batch