massive 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/.rspec +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +141 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +196 -0
- data/Rakefile +8 -0
- data/lib/massive.rb +63 -0
- data/lib/massive/cancelling.rb +20 -0
- data/lib/massive/file.rb +80 -0
- data/lib/massive/file_job.rb +9 -0
- data/lib/massive/file_process.rb +7 -0
- data/lib/massive/file_step.rb +7 -0
- data/lib/massive/job.rb +115 -0
- data/lib/massive/locking.rb +27 -0
- data/lib/massive/memory_consumption.rb +15 -0
- data/lib/massive/notifications.rb +40 -0
- data/lib/massive/notifiers.rb +6 -0
- data/lib/massive/notifiers/base.rb +32 -0
- data/lib/massive/notifiers/pusher.rb +17 -0
- data/lib/massive/process.rb +69 -0
- data/lib/massive/process_serializer.rb +12 -0
- data/lib/massive/retry.rb +49 -0
- data/lib/massive/status.rb +59 -0
- data/lib/massive/step.rb +143 -0
- data/lib/massive/step_serializer.rb +12 -0
- data/lib/massive/timing_support.rb +10 -0
- data/lib/massive/version.rb +3 -0
- data/massive.gemspec +23 -0
- data/spec/fixtures/custom_job.rb +4 -0
- data/spec/fixtures/custom_step.rb +19 -0
- data/spec/models/massive/cancelling_spec.rb +83 -0
- data/spec/models/massive/file_job_spec.rb +24 -0
- data/spec/models/massive/file_spec.rb +209 -0
- data/spec/models/massive/file_step_spec.rb +22 -0
- data/spec/models/massive/job_spec.rb +319 -0
- data/spec/models/massive/locking_spec.rb +52 -0
- data/spec/models/massive/memory_consumption_spec.rb +24 -0
- data/spec/models/massive/notifications_spec.rb +107 -0
- data/spec/models/massive/notifiers/base_spec.rb +48 -0
- data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
- data/spec/models/massive/process_serializer_spec.rb +38 -0
- data/spec/models/massive/process_spec.rb +235 -0
- data/spec/models/massive/status_spec.rb +104 -0
- data/spec/models/massive/step_serializer_spec.rb +40 -0
- data/spec/models/massive/step_spec.rb +490 -0
- data/spec/models/massive/timing_support_spec.rb +55 -0
- data/spec/shared/step_context.rb +25 -0
- data/spec/spec_helper.rb +42 -0
- data/spec/support/mongoid.yml +78 -0
- metadata +175 -0
data/Rakefile
ADDED
data/lib/massive.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "massive/version"
|
2
|
+
|
3
|
+
module Massive
|
4
|
+
autoload :MemoryConsumption, 'massive/memory_consumption'
|
5
|
+
autoload :TimingSupport, 'massive/timing_support'
|
6
|
+
autoload :Status, 'massive/status'
|
7
|
+
autoload :Locking, 'massive/locking'
|
8
|
+
autoload :Retry, 'massive/retry'
|
9
|
+
autoload :Cancelling, 'massive/cancelling'
|
10
|
+
|
11
|
+
autoload :Process, 'massive/process'
|
12
|
+
autoload :Step, 'massive/step'
|
13
|
+
autoload :Job, 'massive/job'
|
14
|
+
|
15
|
+
autoload :File, 'massive/file'
|
16
|
+
autoload :FileProcess, 'massive/file_process'
|
17
|
+
autoload :FileStep, 'massive/file_step'
|
18
|
+
autoload :FileJob, 'massive/file_job'
|
19
|
+
|
20
|
+
autoload :Notifications, 'massive/notifications'
|
21
|
+
autoload :Notifiers, 'massive/notifiers'
|
22
|
+
|
23
|
+
autoload :ProcessSerializer, 'massive/process_serializer'
|
24
|
+
autoload :StepSerializer, 'massive/step_serializer'
|
25
|
+
|
26
|
+
class Cancelled < StandardError; end
|
27
|
+
|
28
|
+
def self.redis
|
29
|
+
@redis ||= Resque.redis
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.fog_credentials
|
33
|
+
@fog_credentials
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.fog_credentials=(values=nil)
|
37
|
+
@fog_credentials = values
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.fog_authenticated_url_expiration
|
41
|
+
@fog_authenticated_url_expiration
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.fog_authenticated_url_expiration=(value=nil)
|
45
|
+
@fog_authenticated_url_expiration = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.fog_directory
|
49
|
+
@fog_directory
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.fog_directory=(directory=nil)
|
53
|
+
@fog_directory = directory
|
54
|
+
end
|
55
|
+
|
56
|
+
self.fog_directory = 'massive'
|
57
|
+
self.fog_authenticated_url_expiration = 1 * 60 * 60
|
58
|
+
end
|
59
|
+
|
60
|
+
require "resque"
|
61
|
+
require "mongoid"
|
62
|
+
require "active_model_serializers"
|
63
|
+
require "file_processor"
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Massive
|
2
|
+
module Cancelling
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
# Override this to provide logic for whether it should be cancelled or not
|
6
|
+
def cancelled?
|
7
|
+
end
|
8
|
+
|
9
|
+
def cancelling(&block)
|
10
|
+
raise Massive::Cancelled.new(cancelled_exception_message) if cancelled?
|
11
|
+
block.call
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def cancelled_exception_message
|
17
|
+
"Cancelled #{self.class.name} - #{self.id if respond_to?(:id)}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/massive/file.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
module Massive
|
2
|
+
class File
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
embedded_in :process
|
7
|
+
|
8
|
+
field :url, type: String
|
9
|
+
field :filename, type: String
|
10
|
+
|
11
|
+
field :encoding, type: String
|
12
|
+
field :col_sep, type: String
|
13
|
+
field :total_count, type: Integer
|
14
|
+
field :use_headers, type: Boolean, default: true
|
15
|
+
|
16
|
+
field :headers, type: Array, default: -> { [] }
|
17
|
+
field :sample_data, type: Array, default: -> { [] }
|
18
|
+
|
19
|
+
def processor
|
20
|
+
@processor ||= FileProcessor::CSV.new(url, processor_options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def gather_info!
|
24
|
+
clear_info
|
25
|
+
|
26
|
+
self.encoding = processor.detected_encoding
|
27
|
+
self.col_sep = processor.col_sep
|
28
|
+
self.total_count = processor.total_count
|
29
|
+
self.headers = processor.shift && processor.headers if use_headers?
|
30
|
+
|
31
|
+
processor.process_range(limit: 3) do |row|
|
32
|
+
self.sample_data << (use_headers? ? row.fields : row)
|
33
|
+
end
|
34
|
+
|
35
|
+
save
|
36
|
+
end
|
37
|
+
|
38
|
+
def url
|
39
|
+
read_attribute(:url).presence || authenticated_url
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def clear_info
|
45
|
+
[:encoding, :col_sep, :total_count, :headers].each { |attr| self[attr] = nil }
|
46
|
+
|
47
|
+
sample_data.clear
|
48
|
+
end
|
49
|
+
|
50
|
+
def processor_options
|
51
|
+
{
|
52
|
+
headers: use_headers?,
|
53
|
+
encoding: encoding,
|
54
|
+
col_sep: col_sep
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def authenticated_url
|
59
|
+
if can_use_fog?
|
60
|
+
fog_file.respond_to?(:url) ? fog_file.url(Time.current.to_i + Massive.fog_authenticated_url_expiration) : fog_file.public_url
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def can_use_fog?
|
65
|
+
filename && Massive.fog_credentials.present?
|
66
|
+
end
|
67
|
+
|
68
|
+
def fog_connection
|
69
|
+
@fog_connection ||= Fog::Storage.new(Massive.fog_credentials)
|
70
|
+
end
|
71
|
+
|
72
|
+
def fog_directory
|
73
|
+
@fog_directory ||= fog_connection.directories.get(Massive.fog_directory)
|
74
|
+
end
|
75
|
+
|
76
|
+
def fog_file
|
77
|
+
@fog_file ||= fog_directory.files.get(filename)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/massive/job.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
module Massive
|
2
|
+
class Job
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
include Massive::Status
|
7
|
+
include Massive::MemoryConsumption
|
8
|
+
include Massive::TimingSupport
|
9
|
+
include Massive::Retry
|
10
|
+
include Massive::Cancelling
|
11
|
+
|
12
|
+
embedded_in :step, class_name: 'Massive::Step'
|
13
|
+
|
14
|
+
field :processed, type: Integer, default: 0
|
15
|
+
field :offset, type: Integer, default: 0
|
16
|
+
field :limit, type: Integer, default: -1
|
17
|
+
|
18
|
+
delegate :process, :notify, to: :step
|
19
|
+
|
20
|
+
define_model_callbacks :work
|
21
|
+
|
22
|
+
after_create :enqueue
|
23
|
+
|
24
|
+
def self.perform(process_id, step_id, job_id)
|
25
|
+
Massive::Process.find_job(process_id, step_id, job_id).work
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.queue
|
29
|
+
:massive_job
|
30
|
+
end
|
31
|
+
|
32
|
+
def enqueue
|
33
|
+
Resque.enqueue(self.class, process.id.to_s, step.id.to_s, id.to_s)
|
34
|
+
end
|
35
|
+
|
36
|
+
def work
|
37
|
+
handle_errors do
|
38
|
+
cancelling do
|
39
|
+
start!
|
40
|
+
|
41
|
+
run_callbacks :work do
|
42
|
+
each_item do |item, index|
|
43
|
+
retrying do
|
44
|
+
cancelling do
|
45
|
+
process_each(item, index)
|
46
|
+
increment_processed
|
47
|
+
notify(:progress)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
finish!
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def finish!
|
59
|
+
update_attributes(finished_at: Time.now, memory_consumption: current_memory_consumption)
|
60
|
+
|
61
|
+
step.complete
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_item(&block)
|
65
|
+
# iterate through each item within offset/limit range
|
66
|
+
end
|
67
|
+
|
68
|
+
def process_each(item, index)
|
69
|
+
# process an item
|
70
|
+
end
|
71
|
+
|
72
|
+
protected
|
73
|
+
|
74
|
+
def attributes_to_reset
|
75
|
+
super.merge(processed: 0)
|
76
|
+
end
|
77
|
+
|
78
|
+
def cancelled?
|
79
|
+
process.cancelled?
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def handle_errors(&block)
|
85
|
+
block.call
|
86
|
+
rescue Massive::Cancelled => e
|
87
|
+
assign_attributes(cancelled_at: Time.now)
|
88
|
+
step.update_attributes(cancelled_at: Time.now)
|
89
|
+
|
90
|
+
notify(:cancelled)
|
91
|
+
rescue StandardError, SignalException => e
|
92
|
+
step.failed_at = Time.now
|
93
|
+
|
94
|
+
assign_attributes(
|
95
|
+
last_error: e.message,
|
96
|
+
failed_at: Time.now,
|
97
|
+
processed: 0,
|
98
|
+
retries: retries
|
99
|
+
)
|
100
|
+
|
101
|
+
step.save
|
102
|
+
notify(:failed)
|
103
|
+
|
104
|
+
raise e
|
105
|
+
end
|
106
|
+
|
107
|
+
def increment_processed
|
108
|
+
inc(:processed, 1)
|
109
|
+
end
|
110
|
+
|
111
|
+
def args_for_resque
|
112
|
+
[process.id.to_s, step.id.to_s, id.to_s]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Massive
|
2
|
+
module Locking
|
3
|
+
def locked?(key, expire_in=60 * 1000)
|
4
|
+
lock_key = lock_key_for(key)
|
5
|
+
|
6
|
+
!redis.setnx(lock_key, Time.now.to_i + (expire_in)/1000).tap do |result|
|
7
|
+
expire(lock_key, expire_in) if result
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def lock_key_for(key)
|
14
|
+
"#{self.class.name.underscore}:#{id}:#{key}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def expire(lock_key, expire_in)
|
18
|
+
redis.pexpire(lock_key, expire_in)
|
19
|
+
rescue Redis::CommandError
|
20
|
+
redis.expire(lock_key, (expire_in/1000).to_i)
|
21
|
+
end
|
22
|
+
|
23
|
+
def redis
|
24
|
+
@redis ||= Massive.redis
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Massive
|
2
|
+
module MemoryConsumption
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
field :memory_consumption, type: Integer, default: 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def current_memory_consumption
|
10
|
+
IO.popen("ps -o rss= -p #{::Process.pid}") { |io| io.gets.to_i }
|
11
|
+
rescue StandardError
|
12
|
+
0
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifications
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
notifier :base
|
7
|
+
end
|
8
|
+
|
9
|
+
def notify(message)
|
10
|
+
if active_model_serializer
|
11
|
+
notifier.notify(message) do
|
12
|
+
active_model_serializer.new(reload)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def notifier
|
18
|
+
@notifier ||= self.class.notifier_class.new(notifier_id, self.class.notifier_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def notifier_id
|
22
|
+
"#{self.class.name.underscore.gsub('/', '-')}-#{id}"
|
23
|
+
end
|
24
|
+
|
25
|
+
module ClassMethods
|
26
|
+
def notifier(name, options={})
|
27
|
+
@notifier_class = name.is_a?(Class) ? name : "massive/notifiers/#{name}".camelize.constantize
|
28
|
+
@notifier_options = options
|
29
|
+
end
|
30
|
+
|
31
|
+
def notifier_class
|
32
|
+
@notifier_class || Massive::Notifiers::Base
|
33
|
+
end
|
34
|
+
|
35
|
+
def notifier_options
|
36
|
+
@notifier_options
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifiers
|
3
|
+
class Base
|
4
|
+
include Massive::Locking
|
5
|
+
|
6
|
+
attr_accessor :id, :last, :options
|
7
|
+
|
8
|
+
def initialize(id, options={})
|
9
|
+
self.id = id
|
10
|
+
self.last = {}
|
11
|
+
|
12
|
+
self.options = options || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def notify(message, data=nil, &block)
|
16
|
+
send_notification(message, data, &block) unless locked?(message, expiration)
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def send_notification(message, data, &block)
|
22
|
+
data = block.call if block_given?
|
23
|
+
|
24
|
+
self.last = { message: message, data: data }
|
25
|
+
end
|
26
|
+
|
27
|
+
def expiration
|
28
|
+
options[:expiration] || 1000 # 1 second between each notification
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|