massive 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,17 @@
1
+ module Massive
2
+ module Notifiers
3
+ class Pusher < Base
4
+ protected
5
+
6
+ def send_notification(message, data, &block)
7
+ data = block.call if block_given?
8
+
9
+ client.trigger(id, message, data)
10
+ end
11
+
12
+ def client
13
+ @client ||= options[:client] || ::Pusher
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,69 @@
1
+ module Massive
2
+ class Process
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ field :cancelled_at, type: Time
7
+
8
+ embeds_many :steps, class_name: 'Massive::Step'
9
+
10
+ def self.find_step(process_id, step_id)
11
+ find(process_id).steps.find(step_id)
12
+ end
13
+
14
+ def self.find_job(process_id, step_id, job_id)
15
+ find_step(process_id, step_id).jobs.find(job_id)
16
+ end
17
+
18
+ def enqueue_next
19
+ next_step.try(:enqueue)
20
+ end
21
+
22
+ def next_step
23
+ step = steps.not_completed.not_started.first
24
+ step.try(:enqueued?) ? nil : step
25
+ end
26
+
27
+ def processed_percentage
28
+ total_weight > 0 ? total_steps_processed_percentage.to_f / total_weight : 0
29
+ end
30
+
31
+ def completed?
32
+ steps.not_completed.none?
33
+ end
34
+
35
+ def cancelled?
36
+ cancelled_at? || redis.exists(cancelled_key)
37
+ end
38
+
39
+ def cancel
40
+ self.cancelled_at = Time.now
41
+ redis.setex(cancelled_key, 1.day, true)
42
+ save
43
+ end
44
+
45
+ def active_model_serializer
46
+ super || Massive::ProcessSerializer
47
+ end
48
+
49
+ protected
50
+ def redis
51
+ Massive.redis
52
+ end
53
+
54
+ def cancelled_key
55
+ "#{self.class.name.underscore}:#{id}:cancelled"
56
+ end
57
+
58
+ private
59
+ def total_weight
60
+ steps.map(&:weight).sum
61
+ end
62
+
63
+ def total_steps_processed_percentage
64
+ steps.inject(0) do |result, step|
65
+ result += step.processed_percentage * step.weight
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,12 @@
1
+ module Massive
2
+ class ProcessSerializer < ActiveModel::Serializer
3
+ attributes :id, :created_at, :updated_at, :processed_percentage
4
+ attribute :completed?, key: :completed
5
+
6
+ has_many :steps
7
+
8
+ def id
9
+ object.id.to_s
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,49 @@
1
+ module Massive
2
+ module Retry
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ retry_interval 2
7
+ maximum_retries 10
8
+
9
+ def self.inherited(base)
10
+ super
11
+
12
+ base.retry_interval retry_interval
13
+ base.maximum_retries maximum_retries
14
+ end
15
+ end
16
+
17
+ def retrying(&block)
18
+ self.retries = 0
19
+
20
+ begin
21
+ block.call
22
+ rescue Massive::Cancelled, SignalException
23
+ # re-raise cancelled and signal exceptions since they are not an actual error
24
+ raise
25
+ rescue StandardError => e
26
+ self.retries += 1
27
+
28
+ if self.retries < self.class.maximum_retries
29
+ Kernel.sleep self.class.retry_interval
30
+ retry
31
+ else
32
+ raise e
33
+ end
34
+ end
35
+ end
36
+
37
+ module ClassMethods
38
+ def retry_interval(value=nil)
39
+ @retry_interval = value if value
40
+ @retry_interval
41
+ end
42
+
43
+ def maximum_retries(value=nil)
44
+ @maximum_retries = value if value
45
+ @maximum_retries
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,59 @@
1
+ module Massive
2
+ module Status
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ field :started_at, type: Time
7
+ field :finished_at, type: Time
8
+ field :failed_at, type: Time
9
+ field :cancelled_at, type: Time
10
+
11
+ field :last_error, type: String
12
+ field :retries, type: Integer, default: 0
13
+
14
+ scope :started, ne(started_at: nil)
15
+ scope :not_started, where(started_at: nil)
16
+ scope :completed, ne(finished_at: nil)
17
+ scope :not_completed, where(finished_at: nil)
18
+ scope :failed, ne(failed_at: nil)
19
+ scope :cancelled, ne(cancelled_at: nil)
20
+ end
21
+
22
+ def start!
23
+ update_attributes(attributes_to_reset)
24
+ end
25
+
26
+ def started?
27
+ !failed? && started_at?
28
+ end
29
+
30
+ def completed?
31
+ !failed? && finished_at?
32
+ end
33
+
34
+ def failed?
35
+ failed_at?
36
+ end
37
+
38
+ def enqueued?
39
+ item = Resque.peek(self.class.queue)
40
+ item.present? && (item["class"] == self.class.name) && (item["args"] == args_for_resque)
41
+ end
42
+
43
+ protected
44
+
45
+ def attributes_to_reset
46
+ {
47
+ started_at: Time.now,
48
+ finished_at: nil,
49
+ failed_at: nil,
50
+ cancelled_at: nil,
51
+ retries: 0,
52
+ last_error: nil
53
+ }
54
+ end
55
+
56
+ def args_for_resque
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,143 @@
1
+ module Massive
2
+ class Step
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ include Massive::Status
7
+ include Massive::MemoryConsumption
8
+ include Massive::TimingSupport
9
+ include Massive::Locking
10
+ include Massive::Notifications
11
+
12
+ embedded_in :process, class_name: 'Massive::Process'
13
+ embeds_many :jobs, class_name: 'Massive::Job'
14
+
15
+ field :total_count, type: Integer
16
+ field :weight, type: Integer, default: 1
17
+ field :job_class, type: String, default: -> { self.class.job_class }
18
+ field :execute_next, type: Boolean, default: false
19
+
20
+ define_model_callbacks :work
21
+ define_model_callbacks :complete
22
+
23
+ def self.perform(process_id, step_id)
24
+ Massive::Process.find_step(process_id, step_id).work
25
+ end
26
+
27
+ def self.queue
28
+ :massive_step
29
+ end
30
+
31
+ def self.calculates_total_count_with(&block)
32
+ define_method(:calculate_total_count, &block)
33
+ end
34
+
35
+ def self.limit_ratio(value=nil)
36
+ @limit_ratio = value if value
37
+ @limit_ratio
38
+ end
39
+
40
+ def self.job_class(value=nil)
41
+ @job_class = value if value
42
+ @job_class
43
+ end
44
+
45
+ def self.inherited(child)
46
+ super
47
+
48
+ child.job_class self.job_class
49
+ child.limit_ratio self.limit_ratio
50
+ end
51
+
52
+ limit_ratio 3000 => 1000, 0 => 100
53
+ job_class 'Massive::Job'
54
+
55
+ def enqueue
56
+ Resque.enqueue(self.class, process.id.to_s, id.to_s)
57
+ end
58
+
59
+ def start!
60
+ super
61
+ notify(:start)
62
+ end
63
+
64
+ def work
65
+ start!
66
+
67
+ run_callbacks :work do
68
+ process_step
69
+ end
70
+
71
+ complete
72
+ end
73
+
74
+ def process_step
75
+ self.jobs = number_of_jobs.times.map do |index|
76
+ job_class.constantize.new(job_params(index))
77
+ end
78
+ end
79
+
80
+ def complete
81
+ if completed_all_jobs? && !locked?(:complete)
82
+ run_callbacks :complete do
83
+ update_attributes finished_at: Time.now, failed_at: nil, memory_consumption: current_memory_consumption
84
+ notify(:complete)
85
+ end
86
+
87
+ process.enqueue_next if execute_next?
88
+ end
89
+ end
90
+
91
+ def completed_all_jobs?
92
+ reload if persisted?
93
+
94
+ jobs.all?(&:completed?)
95
+ end
96
+
97
+ def processed
98
+ jobs.map(&:processed).sum
99
+ end
100
+
101
+ def processed_percentage
102
+ total_count && total_count > 0 ? processed.to_f / total_count : 0
103
+ end
104
+
105
+ def processing_time
106
+ jobs.map(&:elapsed_time).sum
107
+ end
108
+
109
+ def limit
110
+ @limit ||= self.class.limit_ratio.find { |count, l| total_count >= count }.last
111
+ end
112
+
113
+ def calculate_total_count
114
+ 0
115
+ end
116
+
117
+ def active_model_serializer
118
+ super || Massive::StepSerializer
119
+ end
120
+
121
+ protected
122
+
123
+ def job_params(index)
124
+ {
125
+ offset: index * limit,
126
+ limit: limit,
127
+ step: self
128
+ }
129
+ end
130
+
131
+ def number_of_jobs
132
+ (total_count.to_f / limit).ceil
133
+ end
134
+
135
+ def attributes_to_reset
136
+ super.merge(total_count: total_count || calculate_total_count)
137
+ end
138
+
139
+ def args_for_resque
140
+ [process.id.to_s, id.to_s]
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,12 @@
1
+ module Massive
2
+ class StepSerializer < ActiveModel::Serializer
3
+ attributes :id, :created_at, :updated_at, :started_at, :finished_at, :failed_at,
4
+ :last_error, :retries, :memory_consumption, :total_count,
5
+ :processed, :processed_percentage, :processing_time, :elapsed_time,
6
+ :notifier_id
7
+
8
+ def id
9
+ object.id.to_s
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ module Massive
2
+ module TimingSupport
3
+ def elapsed_time
4
+ start = started_at || 0
5
+ finish = finished_at || Time.now
6
+
7
+ started_at? ? finish - start : 0
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module Massive
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'massive/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "massive"
8
+ gem.version = Massive::VERSION
9
+ gem.authors = ["Vicente Mundim"]
10
+ gem.email = ["vicente.mundim@gmail.com"]
11
+ gem.description = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
12
+ gem.summary = %q{Parallelize processing of large files and/or data using Resque, Redis and MongoDB}
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_dependency "resque"
20
+ gem.add_dependency "mongoid", "~> 3.1.x"
21
+ gem.add_dependency "file_processor", "0.2.0"
22
+ gem.add_dependency "active_model_serializers"
23
+ end
@@ -0,0 +1,4 @@
1
+ class CustomJob < Massive::Job
2
+ retry_interval 5
3
+ maximum_retries 20
4
+ end
@@ -0,0 +1,19 @@
1
+ class CustomStep < Massive::Step
2
+ calculates_total_count_with { 100 }
3
+ job_class 'CustomJob'
4
+
5
+ limit_ratio 3000 => 1500, 2000 => 1000, 0 => 100
6
+
7
+ protected
8
+
9
+ def job_params(index)
10
+ {
11
+ offset: index * limit,
12
+ limit: limit,
13
+ custom_param: "some_param"
14
+ }
15
+ end
16
+ end
17
+
18
+ class InheritedStep < Massive::Step
19
+ end
@@ -0,0 +1,83 @@
1
+ require "spec_helper"
2
+
3
+ class Cancellable
4
+ include Massive::Cancelling
5
+
6
+ attr_accessor :cancelled, :work_count, :work_done_count, :cancelled_exception
7
+
8
+ def initialize(work_count)
9
+ self.work_count = work_count
10
+ end
11
+
12
+ def cancelled?
13
+ cancelled == true
14
+ end
15
+
16
+ def work(&block)
17
+ self.work_done_count = 0
18
+
19
+ work_count.times do |iteration|
20
+ cancelling do
21
+ block.call(self, iteration)
22
+ self.work_done_count += 1
23
+ end
24
+ end
25
+ rescue Massive::Cancelled => e
26
+ self.cancelled_exception = e
27
+ end
28
+ end
29
+
30
+ describe Massive::Cancelling do
31
+ let(:work_count) { 3 }
32
+ subject(:cancellable) { Cancellable.new(work_count) }
33
+
34
+ context "when it is never cancelled" do
35
+ it "does not cancel the work" do
36
+ cancellable.work { |cancellable| }
37
+ cancellable.work_done_count.should eq(cancellable.work_count)
38
+ end
39
+
40
+ it "does not raises a cancelled exception" do
41
+ cancellable.work { |cancellable| }
42
+ cancellable.cancelled_exception.should be_nil
43
+ end
44
+ end
45
+
46
+ context "when it is cancelled before actually performing any work" do
47
+ before { cancellable.cancelled = true }
48
+
49
+ it "cancels the work before the first iteration" do
50
+ cancellable.work { |cancellable| }
51
+ cancellable.work_done_count.should eq(0)
52
+ end
53
+
54
+ it "raises a cancelled exception" do
55
+ cancellable.work { |cancellable| }
56
+ cancellable.cancelled_exception.should be_present
57
+ end
58
+ end
59
+
60
+ context "when it is cancelled while performing some work" do
61
+ it "cancels the work before performing the iteration" do
62
+ cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
63
+ cancellable.work_done_count.should eq(2)
64
+ end
65
+
66
+ it "raises a cancelled exception" do
67
+ cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 2) }
68
+ cancellable.cancelled_exception.should be_present
69
+ end
70
+ end
71
+
72
+ context "when it is cancelled while performing the last iteration" do
73
+ it "performs all the work" do
74
+ cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
75
+ cancellable.work_done_count.should eq(work_count)
76
+ end
77
+
78
+ it "does not raise a cancelled exception" do
79
+ cancellable.work { |cancellable, iteration| cancellable.cancelled = (iteration == work_count - 1) }
80
+ cancellable.cancelled_exception.should be_nil
81
+ end
82
+ end
83
+ end