massive 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/.rspec +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +141 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +196 -0
- data/Rakefile +8 -0
- data/lib/massive.rb +63 -0
- data/lib/massive/cancelling.rb +20 -0
- data/lib/massive/file.rb +80 -0
- data/lib/massive/file_job.rb +9 -0
- data/lib/massive/file_process.rb +7 -0
- data/lib/massive/file_step.rb +7 -0
- data/lib/massive/job.rb +115 -0
- data/lib/massive/locking.rb +27 -0
- data/lib/massive/memory_consumption.rb +15 -0
- data/lib/massive/notifications.rb +40 -0
- data/lib/massive/notifiers.rb +6 -0
- data/lib/massive/notifiers/base.rb +32 -0
- data/lib/massive/notifiers/pusher.rb +17 -0
- data/lib/massive/process.rb +69 -0
- data/lib/massive/process_serializer.rb +12 -0
- data/lib/massive/retry.rb +49 -0
- data/lib/massive/status.rb +59 -0
- data/lib/massive/step.rb +143 -0
- data/lib/massive/step_serializer.rb +12 -0
- data/lib/massive/timing_support.rb +10 -0
- data/lib/massive/version.rb +3 -0
- data/massive.gemspec +23 -0
- data/spec/fixtures/custom_job.rb +4 -0
- data/spec/fixtures/custom_step.rb +19 -0
- data/spec/models/massive/cancelling_spec.rb +83 -0
- data/spec/models/massive/file_job_spec.rb +24 -0
- data/spec/models/massive/file_spec.rb +209 -0
- data/spec/models/massive/file_step_spec.rb +22 -0
- data/spec/models/massive/job_spec.rb +319 -0
- data/spec/models/massive/locking_spec.rb +52 -0
- data/spec/models/massive/memory_consumption_spec.rb +24 -0
- data/spec/models/massive/notifications_spec.rb +107 -0
- data/spec/models/massive/notifiers/base_spec.rb +48 -0
- data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
- data/spec/models/massive/process_serializer_spec.rb +38 -0
- data/spec/models/massive/process_spec.rb +235 -0
- data/spec/models/massive/status_spec.rb +104 -0
- data/spec/models/massive/step_serializer_spec.rb +40 -0
- data/spec/models/massive/step_spec.rb +490 -0
- data/spec/models/massive/timing_support_spec.rb +55 -0
- data/spec/shared/step_context.rb +25 -0
- data/spec/spec_helper.rb +42 -0
- data/spec/support/mongoid.yml +78 -0
- metadata +175 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifiers
|
3
|
+
class Pusher < Base
|
4
|
+
protected
|
5
|
+
|
6
|
+
def send_notification(message, data, &block)
|
7
|
+
data = block.call if block_given?
|
8
|
+
|
9
|
+
client.trigger(id, message, data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def client
|
13
|
+
@client ||= options[:client] || ::Pusher
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Massive
|
2
|
+
class Process
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
field :cancelled_at, type: Time
|
7
|
+
|
8
|
+
embeds_many :steps, class_name: 'Massive::Step'
|
9
|
+
|
10
|
+
def self.find_step(process_id, step_id)
|
11
|
+
find(process_id).steps.find(step_id)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.find_job(process_id, step_id, job_id)
|
15
|
+
find_step(process_id, step_id).jobs.find(job_id)
|
16
|
+
end
|
17
|
+
|
18
|
+
def enqueue_next
|
19
|
+
next_step.try(:enqueue)
|
20
|
+
end
|
21
|
+
|
22
|
+
def next_step
|
23
|
+
step = steps.not_completed.not_started.first
|
24
|
+
step.try(:enqueued?) ? nil : step
|
25
|
+
end
|
26
|
+
|
27
|
+
def processed_percentage
|
28
|
+
total_weight > 0 ? total_steps_processed_percentage.to_f / total_weight : 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def completed?
|
32
|
+
steps.not_completed.none?
|
33
|
+
end
|
34
|
+
|
35
|
+
def cancelled?
|
36
|
+
cancelled_at? || redis.exists(cancelled_key)
|
37
|
+
end
|
38
|
+
|
39
|
+
def cancel
|
40
|
+
self.cancelled_at = Time.now
|
41
|
+
redis.setex(cancelled_key, 1.day, true)
|
42
|
+
save
|
43
|
+
end
|
44
|
+
|
45
|
+
def active_model_serializer
|
46
|
+
super || Massive::ProcessSerializer
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
def redis
|
51
|
+
Massive.redis
|
52
|
+
end
|
53
|
+
|
54
|
+
def cancelled_key
|
55
|
+
"#{self.class.name.underscore}:#{id}:cancelled"
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
def total_weight
|
60
|
+
steps.map(&:weight).sum
|
61
|
+
end
|
62
|
+
|
63
|
+
def total_steps_processed_percentage
|
64
|
+
steps.inject(0) do |result, step|
|
65
|
+
result += step.processed_percentage * step.weight
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Massive
|
2
|
+
module Retry
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
retry_interval 2
|
7
|
+
maximum_retries 10
|
8
|
+
|
9
|
+
def self.inherited(base)
|
10
|
+
super
|
11
|
+
|
12
|
+
base.retry_interval retry_interval
|
13
|
+
base.maximum_retries maximum_retries
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def retrying(&block)
|
18
|
+
self.retries = 0
|
19
|
+
|
20
|
+
begin
|
21
|
+
block.call
|
22
|
+
rescue Massive::Cancelled, SignalException
|
23
|
+
# re-raise cancelled and signal exceptions since they are not an actual error
|
24
|
+
raise
|
25
|
+
rescue StandardError => e
|
26
|
+
self.retries += 1
|
27
|
+
|
28
|
+
if self.retries < self.class.maximum_retries
|
29
|
+
Kernel.sleep self.class.retry_interval
|
30
|
+
retry
|
31
|
+
else
|
32
|
+
raise e
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module ClassMethods
|
38
|
+
def retry_interval(value=nil)
|
39
|
+
@retry_interval = value if value
|
40
|
+
@retry_interval
|
41
|
+
end
|
42
|
+
|
43
|
+
def maximum_retries(value=nil)
|
44
|
+
@maximum_retries = value if value
|
45
|
+
@maximum_retries
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Massive
|
2
|
+
module Status
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
field :started_at, type: Time
|
7
|
+
field :finished_at, type: Time
|
8
|
+
field :failed_at, type: Time
|
9
|
+
field :cancelled_at, type: Time
|
10
|
+
|
11
|
+
field :last_error, type: String
|
12
|
+
field :retries, type: Integer, default: 0
|
13
|
+
|
14
|
+
scope :started, ne(started_at: nil)
|
15
|
+
scope :not_started, where(started_at: nil)
|
16
|
+
scope :completed, ne(finished_at: nil)
|
17
|
+
scope :not_completed, where(finished_at: nil)
|
18
|
+
scope :failed, ne(failed_at: nil)
|
19
|
+
scope :cancelled, ne(cancelled_at: nil)
|
20
|
+
end
|
21
|
+
|
22
|
+
def start!
|
23
|
+
update_attributes(attributes_to_reset)
|
24
|
+
end
|
25
|
+
|
26
|
+
def started?
|
27
|
+
!failed? && started_at?
|
28
|
+
end
|
29
|
+
|
30
|
+
def completed?
|
31
|
+
!failed? && finished_at?
|
32
|
+
end
|
33
|
+
|
34
|
+
def failed?
|
35
|
+
failed_at?
|
36
|
+
end
|
37
|
+
|
38
|
+
def enqueued?
|
39
|
+
item = Resque.peek(self.class.queue)
|
40
|
+
item.present? && (item["class"] == self.class.name) && (item["args"] == args_for_resque)
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
def attributes_to_reset
|
46
|
+
{
|
47
|
+
started_at: Time.now,
|
48
|
+
finished_at: nil,
|
49
|
+
failed_at: nil,
|
50
|
+
cancelled_at: nil,
|
51
|
+
retries: 0,
|
52
|
+
last_error: nil
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def args_for_resque
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/massive/step.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module Massive
|
2
|
+
class Step
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
include Massive::Status
|
7
|
+
include Massive::MemoryConsumption
|
8
|
+
include Massive::TimingSupport
|
9
|
+
include Massive::Locking
|
10
|
+
include Massive::Notifications
|
11
|
+
|
12
|
+
embedded_in :process, class_name: 'Massive::Process'
|
13
|
+
embeds_many :jobs, class_name: 'Massive::Job'
|
14
|
+
|
15
|
+
field :total_count, type: Integer
|
16
|
+
field :weight, type: Integer, default: 1
|
17
|
+
field :job_class, type: String, default: -> { self.class.job_class }
|
18
|
+
field :execute_next, type: Boolean, default: false
|
19
|
+
|
20
|
+
define_model_callbacks :work
|
21
|
+
define_model_callbacks :complete
|
22
|
+
|
23
|
+
def self.perform(process_id, step_id)
|
24
|
+
Massive::Process.find_step(process_id, step_id).work
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.queue
|
28
|
+
:massive_step
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.calculates_total_count_with(&block)
|
32
|
+
define_method(:calculate_total_count, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.limit_ratio(value=nil)
|
36
|
+
@limit_ratio = value if value
|
37
|
+
@limit_ratio
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.job_class(value=nil)
|
41
|
+
@job_class = value if value
|
42
|
+
@job_class
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.inherited(child)
|
46
|
+
super
|
47
|
+
|
48
|
+
child.job_class self.job_class
|
49
|
+
child.limit_ratio self.limit_ratio
|
50
|
+
end
|
51
|
+
|
52
|
+
limit_ratio 3000 => 1000, 0 => 100
|
53
|
+
job_class 'Massive::Job'
|
54
|
+
|
55
|
+
def enqueue
|
56
|
+
Resque.enqueue(self.class, process.id.to_s, id.to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
def start!
|
60
|
+
super
|
61
|
+
notify(:start)
|
62
|
+
end
|
63
|
+
|
64
|
+
def work
|
65
|
+
start!
|
66
|
+
|
67
|
+
run_callbacks :work do
|
68
|
+
process_step
|
69
|
+
end
|
70
|
+
|
71
|
+
complete
|
72
|
+
end
|
73
|
+
|
74
|
+
def process_step
|
75
|
+
self.jobs = number_of_jobs.times.map do |index|
|
76
|
+
job_class.constantize.new(job_params(index))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def complete
|
81
|
+
if completed_all_jobs? && !locked?(:complete)
|
82
|
+
run_callbacks :complete do
|
83
|
+
update_attributes finished_at: Time.now, failed_at: nil, memory_consumption: current_memory_consumption
|
84
|
+
notify(:complete)
|
85
|
+
end
|
86
|
+
|
87
|
+
process.enqueue_next if execute_next?
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def completed_all_jobs?
|
92
|
+
reload if persisted?
|
93
|
+
|
94
|
+
jobs.all?(&:completed?)
|
95
|
+
end
|
96
|
+
|
97
|
+
def processed
|
98
|
+
jobs.map(&:processed).sum
|
99
|
+
end
|
100
|
+
|
101
|
+
def processed_percentage
|
102
|
+
total_count && total_count > 0 ? processed.to_f / total_count : 0
|
103
|
+
end
|
104
|
+
|
105
|
+
def processing_time
|
106
|
+
jobs.map(&:elapsed_time).sum
|
107
|
+
end
|
108
|
+
|
109
|
+
def limit
|
110
|
+
@limit ||= self.class.limit_ratio.find { |count, l| total_count >= count }.last
|
111
|
+
end
|
112
|
+
|
113
|
+
def calculate_total_count
|
114
|
+
0
|
115
|
+
end
|
116
|
+
|
117
|
+
def active_model_serializer
|
118
|
+
super || Massive::StepSerializer
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
|
123
|
+
def job_params(index)
|
124
|
+
{
|
125
|
+
offset: index * limit,
|
126
|
+
limit: limit,
|
127
|
+
step: self
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def number_of_jobs
|
132
|
+
(total_count.to_f / limit).ceil
|
133
|
+
end
|
134
|
+
|
135
|
+
def attributes_to_reset
|
136
|
+
super.merge(total_count: total_count || calculate_total_count)
|
137
|
+
end
|
138
|
+
|
139
|
+
def args_for_resque
|
140
|
+
[process.id.to_s, id.to_s]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Massive
|
2
|
+
class StepSerializer < ActiveModel::Serializer
|
3
|
+
attributes :id, :created_at, :updated_at, :started_at, :finished_at, :failed_at,
|
4
|
+
:last_error, :retries, :memory_consumption, :total_count,
|
5
|
+
:processed, :processed_percentage, :processing_time, :elapsed_time,
|
6
|
+
:notifier_id
|
7
|
+
|
8
|
+
def id
|
9
|
+
object.id.to_s
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/massive.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'massive/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "massive"
|
8
|
+
gem.version = Massive::VERSION
|
9
|
+
gem.authors = ["Vicente Mundim"]
|
10
|
+
gem.email = ["vicente.mundim@gmail.com"]
|
11
|
+
gem.description = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
|
12
|
+
gem.summary = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_dependency "resque"
|
20
|
+
gem.add_dependency "mongoid", "~> 3.1.x"
|
21
|
+
gem.add_dependency "file_processor", "0.2.0"
|
22
|
+
gem.add_dependency "active_model_serializers"
|
23
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class CustomStep < Massive::Step
|
2
|
+
calculates_total_count_with { 100 }
|
3
|
+
job_class 'CustomJob'
|
4
|
+
|
5
|
+
limit_ratio 3000 => 1500, 2000 => 1000, 0 => 100
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def job_params(index)
|
10
|
+
{
|
11
|
+
offset: index * limit,
|
12
|
+
limit: limit,
|
13
|
+
custom_param: "some_param"
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class InheritedStep < Massive::Step
|
19
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
class Cancellable
|
4
|
+
include Massive::Cancelling
|
5
|
+
|
6
|
+
attr_accessor :cancelled, :work_count, :work_done_count, :cancelled_exception
|
7
|
+
|
8
|
+
def initialize(work_count)
|
9
|
+
self.work_count = work_count
|
10
|
+
end
|
11
|
+
|
12
|
+
def cancelled?
|
13
|
+
cancelled == true
|
14
|
+
end
|
15
|
+
|
16
|
+
def work(&block)
|
17
|
+
self.work_done_count = 0
|
18
|
+
|
19
|
+
work_count.times do |iteration|
|
20
|
+
cancelling do
|
21
|
+
block.call(self, iteration)
|
22
|
+
self.work_done_count += 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
rescue Massive::Cancelled => e
|
26
|
+
self.cancelled_exception = e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe Massive::Cancelling do
|
31
|
+
let(:work_count) { 3 }
|
32
|
+
subject(:cancellable) { Cancellable.new(work_count) }
|
33
|
+
|
34
|
+
context "when it is never cancelled" do
|
35
|
+
it "does not cancel the work" do
|
36
|
+
cancellable.work { |cancellable| }
|
37
|
+
cancellable.work_done_count.should eq(cancellable.work_count)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not raises a cancelled exception" do
|
41
|
+
cancellable.work { |cancellable| }
|
42
|
+
cancellable.cancelled_exception.should be_nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "when it is cancelled before actually performing any work" do
|
47
|
+
before { cancellable.cancelled = true }
|
48
|
+
|
49
|
+
it "cancels the work before the first iteration" do
|
50
|
+
cancellable.work { |cancellable| }
|
51
|
+
cancellable.work_done_count.should eq(0)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "raises a cancelled exception" do
|
55
|
+
cancellable.work { |cancellable| }
|
56
|
+
cancellable.cancelled_exception.should be_present
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context "when it is cancelled while performing some work" do
|
61
|
+
it "cancels the work before performing the iteration" do
|
62
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
|
63
|
+
cancellable.work_done_count.should eq(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "raises a cancelled exception" do
|
67
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
|
68
|
+
cancellable.cancelled_exception.should be_present
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
context "when it is cancelled while performing the last iteration" do
|
73
|
+
it "performs all the work" do
|
74
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
|
75
|
+
cancellable.work_done_count.should eq(work_count)
|
76
|
+
end
|
77
|
+
|
78
|
+
it "does not raise a cancelled exception" do
|
79
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
|
80
|
+
cancellable.cancelled_exception.should be_nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|