massive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/.rspec +3 -0
- data/.rvmrc +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +141 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +196 -0
- data/Rakefile +8 -0
- data/lib/massive.rb +63 -0
- data/lib/massive/cancelling.rb +20 -0
- data/lib/massive/file.rb +80 -0
- data/lib/massive/file_job.rb +9 -0
- data/lib/massive/file_process.rb +7 -0
- data/lib/massive/file_step.rb +7 -0
- data/lib/massive/job.rb +115 -0
- data/lib/massive/locking.rb +27 -0
- data/lib/massive/memory_consumption.rb +15 -0
- data/lib/massive/notifications.rb +40 -0
- data/lib/massive/notifiers.rb +6 -0
- data/lib/massive/notifiers/base.rb +32 -0
- data/lib/massive/notifiers/pusher.rb +17 -0
- data/lib/massive/process.rb +69 -0
- data/lib/massive/process_serializer.rb +12 -0
- data/lib/massive/retry.rb +49 -0
- data/lib/massive/status.rb +59 -0
- data/lib/massive/step.rb +143 -0
- data/lib/massive/step_serializer.rb +12 -0
- data/lib/massive/timing_support.rb +10 -0
- data/lib/massive/version.rb +3 -0
- data/massive.gemspec +23 -0
- data/spec/fixtures/custom_job.rb +4 -0
- data/spec/fixtures/custom_step.rb +19 -0
- data/spec/models/massive/cancelling_spec.rb +83 -0
- data/spec/models/massive/file_job_spec.rb +24 -0
- data/spec/models/massive/file_spec.rb +209 -0
- data/spec/models/massive/file_step_spec.rb +22 -0
- data/spec/models/massive/job_spec.rb +319 -0
- data/spec/models/massive/locking_spec.rb +52 -0
- data/spec/models/massive/memory_consumption_spec.rb +24 -0
- data/spec/models/massive/notifications_spec.rb +107 -0
- data/spec/models/massive/notifiers/base_spec.rb +48 -0
- data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
- data/spec/models/massive/process_serializer_spec.rb +38 -0
- data/spec/models/massive/process_spec.rb +235 -0
- data/spec/models/massive/status_spec.rb +104 -0
- data/spec/models/massive/step_serializer_spec.rb +40 -0
- data/spec/models/massive/step_spec.rb +490 -0
- data/spec/models/massive/timing_support_spec.rb +55 -0
- data/spec/shared/step_context.rb +25 -0
- data/spec/spec_helper.rb +42 -0
- data/spec/support/mongoid.yml +78 -0
- metadata +175 -0
data/Rakefile
ADDED
data/lib/massive.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "massive/version"
|
2
|
+
|
3
|
+
module Massive
|
4
|
+
autoload :MemoryConsumption, 'massive/memory_consumption'
|
5
|
+
autoload :TimingSupport, 'massive/timing_support'
|
6
|
+
autoload :Status, 'massive/status'
|
7
|
+
autoload :Locking, 'massive/locking'
|
8
|
+
autoload :Retry, 'massive/retry'
|
9
|
+
autoload :Cancelling, 'massive/cancelling'
|
10
|
+
|
11
|
+
autoload :Process, 'massive/process'
|
12
|
+
autoload :Step, 'massive/step'
|
13
|
+
autoload :Job, 'massive/job'
|
14
|
+
|
15
|
+
autoload :File, 'massive/file'
|
16
|
+
autoload :FileProcess, 'massive/file_process'
|
17
|
+
autoload :FileStep, 'massive/file_step'
|
18
|
+
autoload :FileJob, 'massive/file_job'
|
19
|
+
|
20
|
+
autoload :Notifications, 'massive/notifications'
|
21
|
+
autoload :Notifiers, 'massive/notifiers'
|
22
|
+
|
23
|
+
autoload :ProcessSerializer, 'massive/process_serializer'
|
24
|
+
autoload :StepSerializer, 'massive/step_serializer'
|
25
|
+
|
26
|
+
class Cancelled < StandardError; end
|
27
|
+
|
28
|
+
def self.redis
|
29
|
+
@redis ||= Resque.redis
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.fog_credentials
|
33
|
+
@fog_credentials
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.fog_credentials=(values=nil)
|
37
|
+
@fog_credentials = values
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.fog_authenticated_url_expiration
|
41
|
+
@fog_authenticated_url_expiration
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.fog_authenticated_url_expiration=(value=nil)
|
45
|
+
@fog_authenticated_url_expiration = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.fog_directory
|
49
|
+
@fog_directory
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.fog_directory=(directory=nil)
|
53
|
+
@fog_directory = directory
|
54
|
+
end
|
55
|
+
|
56
|
+
self.fog_directory = 'massive'
|
57
|
+
self.fog_authenticated_url_expiration = 1 * 60 * 60
|
58
|
+
end
|
59
|
+
|
60
|
+
require "resque"
|
61
|
+
require "mongoid"
|
62
|
+
require "active_model_serializers"
|
63
|
+
require "file_processor"
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Massive
|
2
|
+
module Cancelling
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
# Override this to provide logic for whether it should be cancelled or not
|
6
|
+
def cancelled?
|
7
|
+
end
|
8
|
+
|
9
|
+
def cancelling(&block)
|
10
|
+
raise Massive::Cancelled.new(cancelled_exception_message) if cancelled?
|
11
|
+
block.call
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def cancelled_exception_message
|
17
|
+
"Cancelled #{self.class.name} - #{self.id if respond_to?(:id)}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/massive/file.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
module Massive
|
2
|
+
class File
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
embedded_in :process
|
7
|
+
|
8
|
+
field :url, type: String
|
9
|
+
field :filename, type: String
|
10
|
+
|
11
|
+
field :encoding, type: String
|
12
|
+
field :col_sep, type: String
|
13
|
+
field :total_count, type: Integer
|
14
|
+
field :use_headers, type: Boolean, default: true
|
15
|
+
|
16
|
+
field :headers, type: Array, default: -> { [] }
|
17
|
+
field :sample_data, type: Array, default: -> { [] }
|
18
|
+
|
19
|
+
def processor
|
20
|
+
@processor ||= FileProcessor::CSV.new(url, processor_options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def gather_info!
|
24
|
+
clear_info
|
25
|
+
|
26
|
+
self.encoding = processor.detected_encoding
|
27
|
+
self.col_sep = processor.col_sep
|
28
|
+
self.total_count = processor.total_count
|
29
|
+
self.headers = processor.shift && processor.headers if use_headers?
|
30
|
+
|
31
|
+
processor.process_range(limit: 3) do |row|
|
32
|
+
self.sample_data << (use_headers? ? row.fields : row)
|
33
|
+
end
|
34
|
+
|
35
|
+
save
|
36
|
+
end
|
37
|
+
|
38
|
+
def url
|
39
|
+
read_attribute(:url).presence || authenticated_url
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def clear_info
|
45
|
+
[:encoding, :col_sep, :total_count, :headers].each { |attr| self[attr] = nil }
|
46
|
+
|
47
|
+
sample_data.clear
|
48
|
+
end
|
49
|
+
|
50
|
+
def processor_options
|
51
|
+
{
|
52
|
+
headers: use_headers?,
|
53
|
+
encoding: encoding,
|
54
|
+
col_sep: col_sep
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def authenticated_url
|
59
|
+
if can_use_fog?
|
60
|
+
fog_file.respond_to?(:url) ? fog_file.url(Time.current.to_i + Massive.fog_authenticated_url_expiration) : fog_file.public_url
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def can_use_fog?
|
65
|
+
filename && Massive.fog_credentials.present?
|
66
|
+
end
|
67
|
+
|
68
|
+
def fog_connection
|
69
|
+
@fog_connection ||= Fog::Storage.new(Massive.fog_credentials)
|
70
|
+
end
|
71
|
+
|
72
|
+
def fog_directory
|
73
|
+
@fog_directory ||= fog_connection.directories.get(Massive.fog_directory)
|
74
|
+
end
|
75
|
+
|
76
|
+
def fog_file
|
77
|
+
@fog_file ||= fog_directory.files.get(filename)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/massive/job.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
module Massive
|
2
|
+
class Job
|
3
|
+
include Mongoid::Document
|
4
|
+
include Mongoid::Timestamps
|
5
|
+
|
6
|
+
include Massive::Status
|
7
|
+
include Massive::MemoryConsumption
|
8
|
+
include Massive::TimingSupport
|
9
|
+
include Massive::Retry
|
10
|
+
include Massive::Cancelling
|
11
|
+
|
12
|
+
embedded_in :step, class_name: 'Massive::Step'
|
13
|
+
|
14
|
+
field :processed, type: Integer, default: 0
|
15
|
+
field :offset, type: Integer, default: 0
|
16
|
+
field :limit, type: Integer, default: -1
|
17
|
+
|
18
|
+
delegate :process, :notify, to: :step
|
19
|
+
|
20
|
+
define_model_callbacks :work
|
21
|
+
|
22
|
+
after_create :enqueue
|
23
|
+
|
24
|
+
def self.perform(process_id, step_id, job_id)
|
25
|
+
Massive::Process.find_job(process_id, step_id, job_id).work
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.queue
|
29
|
+
:massive_job
|
30
|
+
end
|
31
|
+
|
32
|
+
def enqueue
|
33
|
+
Resque.enqueue(self.class, process.id.to_s, step.id.to_s, id.to_s)
|
34
|
+
end
|
35
|
+
|
36
|
+
def work
|
37
|
+
handle_errors do
|
38
|
+
cancelling do
|
39
|
+
start!
|
40
|
+
|
41
|
+
run_callbacks :work do
|
42
|
+
each_item do |item, index|
|
43
|
+
retrying do
|
44
|
+
cancelling do
|
45
|
+
process_each(item, index)
|
46
|
+
increment_processed
|
47
|
+
notify(:progress)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
finish!
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def finish!
|
59
|
+
update_attributes(finished_at: Time.now, memory_consumption: current_memory_consumption)
|
60
|
+
|
61
|
+
step.complete
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_item(&block)
|
65
|
+
# iterate through each item within offset/limit range
|
66
|
+
end
|
67
|
+
|
68
|
+
def process_each(item, index)
|
69
|
+
# process an item
|
70
|
+
end
|
71
|
+
|
72
|
+
protected
|
73
|
+
|
74
|
+
def attributes_to_reset
|
75
|
+
super.merge(processed: 0)
|
76
|
+
end
|
77
|
+
|
78
|
+
def cancelled?
|
79
|
+
process.cancelled?
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def handle_errors(&block)
|
85
|
+
block.call
|
86
|
+
rescue Massive::Cancelled => e
|
87
|
+
assign_attributes(cancelled_at: Time.now)
|
88
|
+
step.update_attributes(cancelled_at: Time.now)
|
89
|
+
|
90
|
+
notify(:cancelled)
|
91
|
+
rescue StandardError, SignalException => e
|
92
|
+
step.failed_at = Time.now
|
93
|
+
|
94
|
+
assign_attributes(
|
95
|
+
last_error: e.message,
|
96
|
+
failed_at: Time.now,
|
97
|
+
processed: 0,
|
98
|
+
retries: retries
|
99
|
+
)
|
100
|
+
|
101
|
+
step.save
|
102
|
+
notify(:failed)
|
103
|
+
|
104
|
+
raise e
|
105
|
+
end
|
106
|
+
|
107
|
+
def increment_processed
|
108
|
+
inc(:processed, 1)
|
109
|
+
end
|
110
|
+
|
111
|
+
def args_for_resque
|
112
|
+
[process.id.to_s, step.id.to_s, id.to_s]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Massive
|
2
|
+
module Locking
|
3
|
+
def locked?(key, expire_in=60 * 1000)
|
4
|
+
lock_key = lock_key_for(key)
|
5
|
+
|
6
|
+
!redis.setnx(lock_key, Time.now.to_i + (expire_in)/1000).tap do |result|
|
7
|
+
expire(lock_key, expire_in) if result
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def lock_key_for(key)
|
14
|
+
"#{self.class.name.underscore}:#{id}:#{key}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def expire(lock_key, expire_in)
|
18
|
+
redis.pexpire(lock_key, expire_in)
|
19
|
+
rescue Redis::CommandError
|
20
|
+
redis.expire(lock_key, (expire_in/1000).to_i)
|
21
|
+
end
|
22
|
+
|
23
|
+
def redis
|
24
|
+
@redis ||= Massive.redis
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Massive
|
2
|
+
module MemoryConsumption
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
field :memory_consumption, type: Integer, default: 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def current_memory_consumption
|
10
|
+
IO.popen("ps -o rss= -p #{::Process.pid}") { |io| io.gets.to_i }
|
11
|
+
rescue StandardError
|
12
|
+
0
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifications
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
notifier :base
|
7
|
+
end
|
8
|
+
|
9
|
+
def notify(message)
|
10
|
+
if active_model_serializer
|
11
|
+
notifier.notify(message) do
|
12
|
+
active_model_serializer.new(reload)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def notifier
|
18
|
+
@notifier ||= self.class.notifier_class.new(notifier_id, self.class.notifier_options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def notifier_id
|
22
|
+
"#{self.class.name.underscore.gsub('/', '-')}-#{id}"
|
23
|
+
end
|
24
|
+
|
25
|
+
module ClassMethods
|
26
|
+
def notifier(name, options={})
|
27
|
+
@notifier_class = name.is_a?(Class) ? name : "massive/notifiers/#{name}".camelize.constantize
|
28
|
+
@notifier_options = options
|
29
|
+
end
|
30
|
+
|
31
|
+
def notifier_class
|
32
|
+
@notifier_class || Massive::Notifiers::Base
|
33
|
+
end
|
34
|
+
|
35
|
+
def notifier_options
|
36
|
+
@notifier_options
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Massive
|
2
|
+
module Notifiers
|
3
|
+
class Base
|
4
|
+
include Massive::Locking
|
5
|
+
|
6
|
+
attr_accessor :id, :last, :options
|
7
|
+
|
8
|
+
def initialize(id, options={})
|
9
|
+
self.id = id
|
10
|
+
self.last = {}
|
11
|
+
|
12
|
+
self.options = options || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def notify(message, data=nil, &block)
|
16
|
+
send_notification(message, data, &block) unless locked?(message, expiration)
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def send_notification(message, data, &block)
|
22
|
+
data = block.call if block_given?
|
23
|
+
|
24
|
+
self.last = { message: message, data: data }
|
25
|
+
end
|
26
|
+
|
27
|
+
def expiration
|
28
|
+
options[:expiration] || 1000 # 1 second between each notification
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|