massive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/.rspec +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +141 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +196 -0
- data/Rakefile +8 -0
- data/lib/massive.rb +63 -0
- data/lib/massive/cancelling.rb +20 -0
- data/lib/massive/file.rb +80 -0
- data/lib/massive/file_job.rb +9 -0
- data/lib/massive/file_process.rb +7 -0
- data/lib/massive/file_step.rb +7 -0
- data/lib/massive/job.rb +115 -0
- data/lib/massive/locking.rb +27 -0
- data/lib/massive/memory_consumption.rb +15 -0
- data/lib/massive/notifications.rb +40 -0
- data/lib/massive/notifiers.rb +6 -0
- data/lib/massive/notifiers/base.rb +32 -0
- data/lib/massive/notifiers/pusher.rb +17 -0
- data/lib/massive/process.rb +69 -0
- data/lib/massive/process_serializer.rb +12 -0
- data/lib/massive/retry.rb +49 -0
- data/lib/massive/status.rb +59 -0
- data/lib/massive/step.rb +143 -0
- data/lib/massive/step_serializer.rb +12 -0
- data/lib/massive/timing_support.rb +10 -0
- data/lib/massive/version.rb +3 -0
- data/massive.gemspec +23 -0
- data/spec/fixtures/custom_job.rb +4 -0
- data/spec/fixtures/custom_step.rb +19 -0
- data/spec/models/massive/cancelling_spec.rb +83 -0
- data/spec/models/massive/file_job_spec.rb +24 -0
- data/spec/models/massive/file_spec.rb +209 -0
- data/spec/models/massive/file_step_spec.rb +22 -0
- data/spec/models/massive/job_spec.rb +319 -0
- data/spec/models/massive/locking_spec.rb +52 -0
- data/spec/models/massive/memory_consumption_spec.rb +24 -0
- data/spec/models/massive/notifications_spec.rb +107 -0
- data/spec/models/massive/notifiers/base_spec.rb +48 -0
- data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
- data/spec/models/massive/process_serializer_spec.rb +38 -0
- data/spec/models/massive/process_spec.rb +235 -0
- data/spec/models/massive/status_spec.rb +104 -0
- data/spec/models/massive/step_serializer_spec.rb +40 -0
- data/spec/models/massive/step_spec.rb +490 -0
- data/spec/models/massive/timing_support_spec.rb +55 -0
- data/spec/shared/step_context.rb +25 -0
- data/spec/spec_helper.rb +42 -0
- data/spec/support/mongoid.yml +78 -0
- metadata +175 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifiers
|
3
|
+
class Pusher < Base
|
4
|
+
protected
|
5
|
+
|
6
|
+
def send_notification(message, data, &block)
|
7
|
+
data = block.call if block_given?
|
8
|
+
|
9
|
+
client.trigger(id, message, data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def client
|
13
|
+
@client ||= options[:client] || ::Pusher
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Massive
|
2
|
+
class Process
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
field :cancelled_at, type: Time
|
7
|
+
|
8
|
+
embeds_many :steps, class_name: 'Massive::Step'
|
9
|
+
|
10
|
+
def self.find_step(process_id, step_id)
|
11
|
+
find(process_id).steps.find(step_id)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.find_job(process_id, step_id, job_id)
|
15
|
+
find_step(process_id, step_id).jobs.find(job_id)
|
16
|
+
end
|
17
|
+
|
18
|
+
def enqueue_next
|
19
|
+
next_step.try(:enqueue)
|
20
|
+
end
|
21
|
+
|
22
|
+
def next_step
|
23
|
+
step = steps.not_completed.not_started.first
|
24
|
+
step.try(:enqueued?) ? nil : step
|
25
|
+
end
|
26
|
+
|
27
|
+
def processed_percentage
|
28
|
+
total_weight > 0 ? total_steps_processed_percentage.to_f / total_weight : 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def completed?
|
32
|
+
steps.not_completed.none?
|
33
|
+
end
|
34
|
+
|
35
|
+
def cancelled?
|
36
|
+
cancelled_at? || redis.exists(cancelled_key)
|
37
|
+
end
|
38
|
+
|
39
|
+
def cancel
|
40
|
+
self.cancelled_at = Time.now
|
41
|
+
redis.setex(cancelled_key, 1.day, true)
|
42
|
+
save
|
43
|
+
end
|
44
|
+
|
45
|
+
def active_model_serializer
|
46
|
+
super || Massive::ProcessSerializer
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
def redis
|
51
|
+
Massive.redis
|
52
|
+
end
|
53
|
+
|
54
|
+
def cancelled_key
|
55
|
+
"#{self.class.name.underscore}:#{id}:cancelled"
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
def total_weight
|
60
|
+
steps.map(&:weight).sum
|
61
|
+
end
|
62
|
+
|
63
|
+
def total_steps_processed_percentage
|
64
|
+
steps.inject(0) do |result, step|
|
65
|
+
result += step.processed_percentage * step.weight
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Massive
|
2
|
+
module Retry
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
retry_interval 2
|
7
|
+
maximum_retries 10
|
8
|
+
|
9
|
+
def self.inherited(base)
|
10
|
+
super
|
11
|
+
|
12
|
+
base.retry_interval retry_interval
|
13
|
+
base.maximum_retries maximum_retries
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def retrying(&block)
|
18
|
+
self.retries = 0
|
19
|
+
|
20
|
+
begin
|
21
|
+
block.call
|
22
|
+
rescue Massive::Cancelled, SignalException
|
23
|
+
# re-raise cancelled and signal exceptions since they are not an actual error
|
24
|
+
raise
|
25
|
+
rescue StandardError => e
|
26
|
+
self.retries += 1
|
27
|
+
|
28
|
+
if self.retries < self.class.maximum_retries
|
29
|
+
Kernel.sleep self.class.retry_interval
|
30
|
+
retry
|
31
|
+
else
|
32
|
+
raise e
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module ClassMethods
|
38
|
+
def retry_interval(value=nil)
|
39
|
+
@retry_interval = value if value
|
40
|
+
@retry_interval
|
41
|
+
end
|
42
|
+
|
43
|
+
def maximum_retries(value=nil)
|
44
|
+
@maximum_retries = value if value
|
45
|
+
@maximum_retries
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Massive
|
2
|
+
module Status
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
field :started_at, type: Time
|
7
|
+
field :finished_at, type: Time
|
8
|
+
field :failed_at, type: Time
|
9
|
+
field :cancelled_at, type: Time
|
10
|
+
|
11
|
+
field :last_error, type: String
|
12
|
+
field :retries, type: Integer, default: 0
|
13
|
+
|
14
|
+
scope :started, ne(started_at: nil)
|
15
|
+
scope :not_started, where(started_at: nil)
|
16
|
+
scope :completed, ne(finished_at: nil)
|
17
|
+
scope :not_completed, where(finished_at: nil)
|
18
|
+
scope :failed, ne(failed_at: nil)
|
19
|
+
scope :cancelled, ne(cancelled_at: nil)
|
20
|
+
end
|
21
|
+
|
22
|
+
def start!
|
23
|
+
update_attributes(attributes_to_reset)
|
24
|
+
end
|
25
|
+
|
26
|
+
def started?
|
27
|
+
!failed? && started_at?
|
28
|
+
end
|
29
|
+
|
30
|
+
def completed?
|
31
|
+
!failed? && finished_at?
|
32
|
+
end
|
33
|
+
|
34
|
+
def failed?
|
35
|
+
failed_at?
|
36
|
+
end
|
37
|
+
|
38
|
+
def enqueued?
|
39
|
+
item = Resque.peek(self.class.queue)
|
40
|
+
item.present? && (item["class"] == self.class.name) && (item["args"] == args_for_resque)
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
def attributes_to_reset
|
46
|
+
{
|
47
|
+
started_at: Time.now,
|
48
|
+
finished_at: nil,
|
49
|
+
failed_at: nil,
|
50
|
+
cancelled_at: nil,
|
51
|
+
retries: 0,
|
52
|
+
last_error: nil
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def args_for_resque
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/massive/step.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module Massive
|
2
|
+
class Step
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
include Massive::Status
|
7
|
+
include Massive::MemoryConsumption
|
8
|
+
include Massive::TimingSupport
|
9
|
+
include Massive::Locking
|
10
|
+
include Massive::Notifications
|
11
|
+
|
12
|
+
embedded_in :process, class_name: 'Massive::Process'
|
13
|
+
embeds_many :jobs, class_name: 'Massive::Job'
|
14
|
+
|
15
|
+
field :total_count, type: Integer
|
16
|
+
field :weight, type: Integer, default: 1
|
17
|
+
field :job_class, type: String, default: -> { self.class.job_class }
|
18
|
+
field :execute_next, type: Boolean, default: false
|
19
|
+
|
20
|
+
define_model_callbacks :work
|
21
|
+
define_model_callbacks :complete
|
22
|
+
|
23
|
+
def self.perform(process_id, step_id)
|
24
|
+
Massive::Process.find_step(process_id, step_id).work
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.queue
|
28
|
+
:massive_step
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.calculates_total_count_with(&block)
|
32
|
+
define_method(:calculate_total_count, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.limit_ratio(value=nil)
|
36
|
+
@limit_ratio = value if value
|
37
|
+
@limit_ratio
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.job_class(value=nil)
|
41
|
+
@job_class = value if value
|
42
|
+
@job_class
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.inherited(child)
|
46
|
+
super
|
47
|
+
|
48
|
+
child.job_class self.job_class
|
49
|
+
child.limit_ratio self.limit_ratio
|
50
|
+
end
|
51
|
+
|
52
|
+
limit_ratio 3000 => 1000, 0 => 100
|
53
|
+
job_class 'Massive::Job'
|
54
|
+
|
55
|
+
def enqueue
|
56
|
+
Resque.enqueue(self.class, process.id.to_s, id.to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
def start!
|
60
|
+
super
|
61
|
+
notify(:start)
|
62
|
+
end
|
63
|
+
|
64
|
+
def work
|
65
|
+
start!
|
66
|
+
|
67
|
+
run_callbacks :work do
|
68
|
+
process_step
|
69
|
+
end
|
70
|
+
|
71
|
+
complete
|
72
|
+
end
|
73
|
+
|
74
|
+
def process_step
|
75
|
+
self.jobs = number_of_jobs.times.map do |index|
|
76
|
+
job_class.constantize.new(job_params(index))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def complete
|
81
|
+
if completed_all_jobs? && !locked?(:complete)
|
82
|
+
run_callbacks :complete do
|
83
|
+
update_attributes finished_at: Time.now, failed_at: nil, memory_consumption: current_memory_consumption
|
84
|
+
notify(:complete)
|
85
|
+
end
|
86
|
+
|
87
|
+
process.enqueue_next if execute_next?
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def completed_all_jobs?
|
92
|
+
reload if persisted?
|
93
|
+
|
94
|
+
jobs.all?(&:completed?)
|
95
|
+
end
|
96
|
+
|
97
|
+
def processed
|
98
|
+
jobs.map(&:processed).sum
|
99
|
+
end
|
100
|
+
|
101
|
+
def processed_percentage
|
102
|
+
total_count && total_count > 0 ? processed.to_f / total_count : 0
|
103
|
+
end
|
104
|
+
|
105
|
+
def processing_time
|
106
|
+
jobs.map(&:elapsed_time).sum
|
107
|
+
end
|
108
|
+
|
109
|
+
def limit
|
110
|
+
@limit ||= self.class.limit_ratio.find { |count, l| total_count >= count }.last
|
111
|
+
end
|
112
|
+
|
113
|
+
def calculate_total_count
|
114
|
+
0
|
115
|
+
end
|
116
|
+
|
117
|
+
def active_model_serializer
|
118
|
+
super || Massive::StepSerializer
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
|
123
|
+
def job_params(index)
|
124
|
+
{
|
125
|
+
offset: index * limit,
|
126
|
+
limit: limit,
|
127
|
+
step: self
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def number_of_jobs
|
132
|
+
(total_count.to_f / limit).ceil
|
133
|
+
end
|
134
|
+
|
135
|
+
def attributes_to_reset
|
136
|
+
super.merge(total_count: total_count || calculate_total_count)
|
137
|
+
end
|
138
|
+
|
139
|
+
def args_for_resque
|
140
|
+
[process.id.to_s, id.to_s]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Massive
|
2
|
+
class StepSerializer < ActiveModel::Serializer
|
3
|
+
attributes :id, :created_at, :updated_at, :started_at, :finished_at, :failed_at,
|
4
|
+
:last_error, :retries, :memory_consumption, :total_count,
|
5
|
+
:processed, :processed_percentage, :processing_time, :elapsed_time,
|
6
|
+
:notifier_id
|
7
|
+
|
8
|
+
def id
|
9
|
+
object.id.to_s
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/massive.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'massive/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "massive"
|
8
|
+
gem.version = Massive::VERSION
|
9
|
+
gem.authors = ["Vicente Mundim"]
|
10
|
+
gem.email = ["vicente.mundim@gmail.com"]
|
11
|
+
gem.description = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
|
12
|
+
gem.summary = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_dependency "resque"
|
20
|
+
gem.add_dependency "mongoid", "~> 3.1.x"
|
21
|
+
gem.add_dependency "file_processor", "0.2.0"
|
22
|
+
gem.add_dependency "active_model_serializers"
|
23
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class CustomStep < Massive::Step
|
2
|
+
calculates_total_count_with { 100 }
|
3
|
+
job_class 'CustomJob'
|
4
|
+
|
5
|
+
limit_ratio 3000 => 1500, 2000 => 1000, 0 => 100
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def job_params(index)
|
10
|
+
{
|
11
|
+
offset: index * limit,
|
12
|
+
limit: limit,
|
13
|
+
custom_param: "some_param"
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class InheritedStep < Massive::Step
|
19
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
class Cancellable
|
4
|
+
include Massive::Cancelling
|
5
|
+
|
6
|
+
attr_accessor :cancelled, :work_count, :work_done_count, :cancelled_exception
|
7
|
+
|
8
|
+
def initialize(work_count)
|
9
|
+
self.work_count = work_count
|
10
|
+
end
|
11
|
+
|
12
|
+
def cancelled?
|
13
|
+
cancelled == true
|
14
|
+
end
|
15
|
+
|
16
|
+
def work(&block)
|
17
|
+
self.work_done_count = 0
|
18
|
+
|
19
|
+
work_count.times do |iteration|
|
20
|
+
cancelling do
|
21
|
+
block.call(self, iteration)
|
22
|
+
self.work_done_count += 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
rescue Massive::Cancelled => e
|
26
|
+
self.cancelled_exception = e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe Massive::Cancelling do
|
31
|
+
let(:work_count) { 3 }
|
32
|
+
subject(:cancellable) { Cancellable.new(work_count) }
|
33
|
+
|
34
|
+
context "when it is never cancelled" do
|
35
|
+
it "does not cancel the work" do
|
36
|
+
cancellable.work { |cancellable| }
|
37
|
+
cancellable.work_done_count.should eq(cancellable.work_count)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not raises a cancelled exception" do
|
41
|
+
cancellable.work { |cancellable| }
|
42
|
+
cancellable.cancelled_exception.should be_nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "when it is cancelled before actually performing any work" do
|
47
|
+
before { cancellable.cancelled = true }
|
48
|
+
|
49
|
+
it "cancels the work before the first iteration" do
|
50
|
+
cancellable.work { |cancellable| }
|
51
|
+
cancellable.work_done_count.should eq(0)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "raises a cancelled exception" do
|
55
|
+
cancellable.work { |cancellable| }
|
56
|
+
cancellable.cancelled_exception.should be_present
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context "when it is cancelled while performing some work" do
|
61
|
+
it "cancels the work before performing the iteration" do
|
62
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
|
63
|
+
cancellable.work_done_count.should eq(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "raises a cancelled exception" do
|
67
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
|
68
|
+
cancellable.cancelled_exception.should be_present
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
context "when it is cancelled while performing the last iteration" do
|
73
|
+
it "performs all the work" do
|
74
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
|
75
|
+
cancellable.work_done_count.should eq(work_count)
|
76
|
+
end
|
77
|
+
|
78
|
+
it "does not raise a cancelled exception" do
|
79
|
+
cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
|
80
|
+
cancellable.cancelled_exception.should be_nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|