massive 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "rspec/core/rake_task"
4
+
5
+ desc "Run all examples"
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task :default => :spec
@@ -0,0 +1,63 @@
1
+ require "massive/version"
2
+
3
+ module Massive
4
+ autoload :MemoryConsumption, 'massive/memory_consumption'
5
+ autoload :TimingSupport, 'massive/timing_support'
6
+ autoload :Status, 'massive/status'
7
+ autoload :Locking, 'massive/locking'
8
+ autoload :Retry, 'massive/retry'
9
+ autoload :Cancelling, 'massive/cancelling'
10
+
11
+ autoload :Process, 'massive/process'
12
+ autoload :Step, 'massive/step'
13
+ autoload :Job, 'massive/job'
14
+
15
+ autoload :File, 'massive/file'
16
+ autoload :FileProcess, 'massive/file_process'
17
+ autoload :FileStep, 'massive/file_step'
18
+ autoload :FileJob, 'massive/file_job'
19
+
20
+ autoload :Notifications, 'massive/notifications'
21
+ autoload :Notifiers, 'massive/notifiers'
22
+
23
+ autoload :ProcessSerializer, 'massive/process_serializer'
24
+ autoload :StepSerializer, 'massive/step_serializer'
25
+
26
+ class Cancelled < StandardError; end
27
+
28
+ def self.redis
29
+ @redis ||= Resque.redis
30
+ end
31
+
32
+ def self.fog_credentials
33
+ @fog_credentials
34
+ end
35
+
36
+ def self.fog_credentials=(values=nil)
37
+ @fog_credentials = values
38
+ end
39
+
40
+ def self.fog_authenticated_url_expiration
41
+ @fog_authenticated_url_expiration
42
+ end
43
+
44
+ def self.fog_authenticated_url_expiration=(value=nil)
45
+ @fog_authenticated_url_expiration = value
46
+ end
47
+
48
+ def self.fog_directory
49
+ @fog_directory
50
+ end
51
+
52
+ def self.fog_directory=(directory=nil)
53
+ @fog_directory = directory
54
+ end
55
+
56
+ self.fog_directory = 'massive'
57
+ self.fog_authenticated_url_expiration = 1 * 60 * 60
58
+ end
59
+
60
+ require "resque"
61
+ require "mongoid"
62
+ require "active_model_serializers"
63
+ require "file_processor"
@@ -0,0 +1,20 @@
1
+ module Massive
2
+ module Cancelling
3
+ extend ActiveSupport::Concern
4
+
5
+ # Override this to provide logic for whether it should be cancelled or not
6
+ def cancelled?
7
+ end
8
+
9
+ def cancelling(&block)
10
+ raise Massive::Cancelled.new(cancelled_exception_message) if cancelled?
11
+ block.call
12
+ end
13
+
14
+ private
15
+
16
+ def cancelled_exception_message
17
+ "Cancelled #{self.class.name} - #{self.id if respond_to?(:id)}"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,80 @@
1
+ module Massive
2
+ class File
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ embedded_in :process
7
+
8
+ field :url, type: String
9
+ field :filename, type: String
10
+
11
+ field :encoding, type: String
12
+ field :col_sep, type: String
13
+ field :total_count, type: Integer
14
+ field :use_headers, type: Boolean, default: true
15
+
16
+ field :headers, type: Array, default: -> { [] }
17
+ field :sample_data, type: Array, default: -> { [] }
18
+
19
+ def processor
20
+ @processor ||= FileProcessor::CSV.new(url, processor_options)
21
+ end
22
+
23
+ def gather_info!
24
+ clear_info
25
+
26
+ self.encoding = processor.detected_encoding
27
+ self.col_sep = processor.col_sep
28
+ self.total_count = processor.total_count
29
+ self.headers = processor.shift && processor.headers if use_headers?
30
+
31
+ processor.process_range(limit: 3) do |row|
32
+ self.sample_data << (use_headers? ? row.fields : row)
33
+ end
34
+
35
+ save
36
+ end
37
+
38
+ def url
39
+ read_attribute(:url).presence || authenticated_url
40
+ end
41
+
42
+ private
43
+
44
+ def clear_info
45
+ [:encoding, :col_sep, :total_count, :headers].each { |attr| self[attr] = nil }
46
+
47
+ sample_data.clear
48
+ end
49
+
50
+ def processor_options
51
+ {
52
+ headers: use_headers?,
53
+ encoding: encoding,
54
+ col_sep: col_sep
55
+ }
56
+ end
57
+
58
+ def authenticated_url
59
+ if can_use_fog?
60
+ fog_file.respond_to?(:url) ? fog_file.url(Time.current.to_i + Massive.fog_authenticated_url_expiration) : fog_file.public_url
61
+ end
62
+ end
63
+
64
+ def can_use_fog?
65
+ filename && Massive.fog_credentials.present?
66
+ end
67
+
68
+ def fog_connection
69
+ @fog_connection ||= Fog::Storage.new(Massive.fog_credentials)
70
+ end
71
+
72
+ def fog_directory
73
+ @fog_directory ||= fog_connection.directories.get(Massive.fog_directory)
74
+ end
75
+
76
+ def fog_file
77
+ @fog_file ||= fog_directory.files.get(filename)
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,9 @@
1
+ module Massive
2
+ class FileJob < Job
3
+ delegate :file, to: :step
4
+
5
+ def each_item(&block)
6
+ file.processor.process_range(offset: offset + processed, limit: limit - processed, &block)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ module Massive
2
+ class FileProcess < Massive::Process
3
+ embeds_one :file, class_name: 'Massive::File', autobuild: true
4
+
5
+ accepts_nested_attributes_for :file
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Massive
2
+ class FileStep < Step
3
+ calculates_total_count_with { file.total_count }
4
+
5
+ delegate :file, to: :process
6
+ end
7
+ end
@@ -0,0 +1,115 @@
1
+ module Massive
2
+ class Job
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ include Massive::Status
7
+ include Massive::MemoryConsumption
8
+ include Massive::TimingSupport
9
+ include Massive::Retry
10
+ include Massive::Cancelling
11
+
12
+ embedded_in :step, class_name: 'Massive::Step'
13
+
14
+ field :processed, type: Integer, default: 0
15
+ field :offset, type: Integer, default: 0
16
+ field :limit, type: Integer, default: -1
17
+
18
+ delegate :process, :notify, to: :step
19
+
20
+ define_model_callbacks :work
21
+
22
+ after_create :enqueue
23
+
24
+ def self.perform(process_id, step_id, job_id)
25
+ Massive::Process.find_job(process_id, step_id, job_id).work
26
+ end
27
+
28
+ def self.queue
29
+ :massive_job
30
+ end
31
+
32
+ def enqueue
33
+ Resque.enqueue(self.class, process.id.to_s, step.id.to_s, id.to_s)
34
+ end
35
+
36
+ def work
37
+ handle_errors do
38
+ cancelling do
39
+ start!
40
+
41
+ run_callbacks :work do
42
+ each_item do |item, index|
43
+ retrying do
44
+ cancelling do
45
+ process_each(item, index)
46
+ increment_processed
47
+ notify(:progress)
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ finish!
54
+ end
55
+ end
56
+ end
57
+
58
+ def finish!
59
+ update_attributes(finished_at: Time.now, memory_consumption: current_memory_consumption)
60
+
61
+ step.complete
62
+ end
63
+
64
+ def each_item(&block)
65
+ # iterate through each item within offset/limit range
66
+ end
67
+
68
+ def process_each(item, index)
69
+ # process an item
70
+ end
71
+
72
+ protected
73
+
74
+ def attributes_to_reset
75
+ super.merge(processed: 0)
76
+ end
77
+
78
+ def cancelled?
79
+ process.cancelled?
80
+ end
81
+
82
+ private
83
+
84
+ def handle_errors(&block)
85
+ block.call
86
+ rescue Massive::Cancelled => e
87
+ assign_attributes(cancelled_at: Time.now)
88
+ step.update_attributes(cancelled_at: Time.now)
89
+
90
+ notify(:cancelled)
91
+ rescue StandardError, SignalException => e
92
+ step.failed_at = Time.now
93
+
94
+ assign_attributes(
95
+ last_error: e.message,
96
+ failed_at: Time.now,
97
+ processed: 0,
98
+ retries: retries
99
+ )
100
+
101
+ step.save
102
+ notify(:failed)
103
+
104
+ raise e
105
+ end
106
+
107
+ def increment_processed
108
+ inc(:processed, 1)
109
+ end
110
+
111
+ def args_for_resque
112
+ [process.id.to_s, step.id.to_s, id.to_s]
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,27 @@
1
+ module Massive
2
+ module Locking
3
+ def locked?(key, expire_in=60 * 1000)
4
+ lock_key = lock_key_for(key)
5
+
6
+ !redis.setnx(lock_key, Time.now.to_i + (expire_in)/1000).tap do |result|
7
+ expire(lock_key, expire_in) if result
8
+ end
9
+ end
10
+
11
+ protected
12
+
13
+ def lock_key_for(key)
14
+ "#{self.class.name.underscore}:#{id}:#{key}"
15
+ end
16
+
17
+ def expire(lock_key, expire_in)
18
+ redis.pexpire(lock_key, expire_in)
19
+ rescue Redis::CommandError
20
+ redis.expire(lock_key, (expire_in/1000).to_i)
21
+ end
22
+
23
+ def redis
24
+ @redis ||= Massive.redis
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ module Massive
2
+ module MemoryConsumption
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ field :memory_consumption, type: Integer, default: 0
7
+ end
8
+
9
+ def current_memory_consumption
10
+ IO.popen("ps -o rss= -p #{::Process.pid}") { |io| io.gets.to_i }
11
+ rescue StandardError
12
+ 0
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,40 @@
1
+ module Massive
2
+ module Notifications
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ notifier :base
7
+ end
8
+
9
+ def notify(message)
10
+ if active_model_serializer
11
+ notifier.notify(message) do
12
+ active_model_serializer.new(reload)
13
+ end
14
+ end
15
+ end
16
+
17
+ def notifier
18
+ @notifier ||= self.class.notifier_class.new(notifier_id, self.class.notifier_options)
19
+ end
20
+
21
+ def notifier_id
22
+ "#{self.class.name.underscore.gsub('/', '-')}-#{id}"
23
+ end
24
+
25
+ module ClassMethods
26
+ def notifier(name, options={})
27
+ @notifier_class = name.is_a?(Class) ? name : "massive/notifiers/#{name}".camelize.constantize
28
+ @notifier_options = options
29
+ end
30
+
31
+ def notifier_class
32
+ @notifier_class || Massive::Notifiers::Base
33
+ end
34
+
35
+ def notifier_options
36
+ @notifier_options
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,6 @@
1
+ module Massive
2
+ module Notifiers
3
+ autoload :Base, 'massive/notifiers/base'
4
+ autoload :Pusher, 'massive/notifiers/pusher'
5
+ end
6
+ end
@@ -0,0 +1,32 @@
1
+ module Massive
2
+ module Notifiers
3
+ class Base
4
+ include Massive::Locking
5
+
6
+ attr_accessor :id, :last, :options
7
+
8
+ def initialize(id, options={})
9
+ self.id = id
10
+ self.last = {}
11
+
12
+ self.options = options || {}
13
+ end
14
+
15
+ def notify(message, data=nil, &block)
16
+ send_notification(message, data, &block) unless locked?(message, expiration)
17
+ end
18
+
19
+ protected
20
+
21
+ def send_notification(message, data, &block)
22
+ data = block.call if block_given?
23
+
24
+ self.last = { message: message, data: data }
25
+ end
26
+
27
+ def expiration
28
+ options[:expiration] || 1000 # 1 second between each notification
29
+ end
30
+ end
31
+ end
32
+ end