massive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +3 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +7 -0
  6. data/Gemfile +19 -0
  7. data/Gemfile.lock +141 -0
  8. data/Guardfile +9 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +196 -0
  11. data/Rakefile +8 -0
  12. data/lib/massive.rb +63 -0
  13. data/lib/massive/cancelling.rb +20 -0
  14. data/lib/massive/file.rb +80 -0
  15. data/lib/massive/file_job.rb +9 -0
  16. data/lib/massive/file_process.rb +7 -0
  17. data/lib/massive/file_step.rb +7 -0
  18. data/lib/massive/job.rb +115 -0
  19. data/lib/massive/locking.rb +27 -0
  20. data/lib/massive/memory_consumption.rb +15 -0
  21. data/lib/massive/notifications.rb +40 -0
  22. data/lib/massive/notifiers.rb +6 -0
  23. data/lib/massive/notifiers/base.rb +32 -0
  24. data/lib/massive/notifiers/pusher.rb +17 -0
  25. data/lib/massive/process.rb +69 -0
  26. data/lib/massive/process_serializer.rb +12 -0
  27. data/lib/massive/retry.rb +49 -0
  28. data/lib/massive/status.rb +59 -0
  29. data/lib/massive/step.rb +143 -0
  30. data/lib/massive/step_serializer.rb +12 -0
  31. data/lib/massive/timing_support.rb +10 -0
  32. data/lib/massive/version.rb +3 -0
  33. data/massive.gemspec +23 -0
  34. data/spec/fixtures/custom_job.rb +4 -0
  35. data/spec/fixtures/custom_step.rb +19 -0
  36. data/spec/models/massive/cancelling_spec.rb +83 -0
  37. data/spec/models/massive/file_job_spec.rb +24 -0
  38. data/spec/models/massive/file_spec.rb +209 -0
  39. data/spec/models/massive/file_step_spec.rb +22 -0
  40. data/spec/models/massive/job_spec.rb +319 -0
  41. data/spec/models/massive/locking_spec.rb +52 -0
  42. data/spec/models/massive/memory_consumption_spec.rb +24 -0
  43. data/spec/models/massive/notifications_spec.rb +107 -0
  44. data/spec/models/massive/notifiers/base_spec.rb +48 -0
  45. data/spec/models/massive/notifiers/pusher_spec.rb +49 -0
  46. data/spec/models/massive/process_serializer_spec.rb +38 -0
  47. data/spec/models/massive/process_spec.rb +235 -0
  48. data/spec/models/massive/status_spec.rb +104 -0
  49. data/spec/models/massive/step_serializer_spec.rb +40 -0
  50. data/spec/models/massive/step_spec.rb +490 -0
  51. data/spec/models/massive/timing_support_spec.rb +55 -0
  52. data/spec/shared/step_context.rb +25 -0
  53. data/spec/spec_helper.rb +42 -0
  54. data/spec/support/mongoid.yml +78 -0
  55. metadata +175 -0
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "rspec/core/rake_task"
4
+
5
+ desc "Run all examples"
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task :default => :spec
@@ -0,0 +1,63 @@
1
+ require "massive/version"
2
+
3
+ module Massive
4
+ autoload :MemoryConsumption, 'massive/memory_consumption'
5
+ autoload :TimingSupport, 'massive/timing_support'
6
+ autoload :Status, 'massive/status'
7
+ autoload :Locking, 'massive/locking'
8
+ autoload :Retry, 'massive/retry'
9
+ autoload :Cancelling, 'massive/cancelling'
10
+
11
+ autoload :Process, 'massive/process'
12
+ autoload :Step, 'massive/step'
13
+ autoload :Job, 'massive/job'
14
+
15
+ autoload :File, 'massive/file'
16
+ autoload :FileProcess, 'massive/file_process'
17
+ autoload :FileStep, 'massive/file_step'
18
+ autoload :FileJob, 'massive/file_job'
19
+
20
+ autoload :Notifications, 'massive/notifications'
21
+ autoload :Notifiers, 'massive/notifiers'
22
+
23
+ autoload :ProcessSerializer, 'massive/process_serializer'
24
+ autoload :StepSerializer, 'massive/step_serializer'
25
+
26
+ class Cancelled < StandardError; end
27
+
28
+ def self.redis
29
+ @redis ||= Resque.redis
30
+ end
31
+
32
+ def self.fog_credentials
33
+ @fog_credentials
34
+ end
35
+
36
+ def self.fog_credentials=(values=nil)
37
+ @fog_credentials = values
38
+ end
39
+
40
+ def self.fog_authenticated_url_expiration
41
+ @fog_authenticated_url_expiration
42
+ end
43
+
44
+ def self.fog_authenticated_url_expiration=(value=nil)
45
+ @fog_authenticated_url_expiration = value
46
+ end
47
+
48
+ def self.fog_directory
49
+ @fog_directory
50
+ end
51
+
52
+ def self.fog_directory=(directory=nil)
53
+ @fog_directory = directory
54
+ end
55
+
56
+ self.fog_directory = 'massive'
57
+ self.fog_authenticated_url_expiration = 1 * 60 * 60
58
+ end
59
+
60
+ require "resque"
61
+ require "mongoid"
62
+ require "active_model_serializers"
63
+ require "file_processor"
@@ -0,0 +1,20 @@
1
+ module Massive
2
+ module Cancelling
3
+ extend ActiveSupport::Concern
4
+
5
+ # Override this to provide logic for whether it should be cancelled or not
6
+ def cancelled?
7
+ end
8
+
9
+ def cancelling(&block)
10
+ raise Massive::Cancelled.new(cancelled_exception_message) if cancelled?
11
+ block.call
12
+ end
13
+
14
+ private
15
+
16
+ def cancelled_exception_message
17
+ "Cancelled #{self.class.name} - #{self.id if respond_to?(:id)}"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,80 @@
1
+ module Massive
2
+ class File
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ embedded_in :process
7
+
8
+ field :url, type: String
9
+ field :filename, type: String
10
+
11
+ field :encoding, type: String
12
+ field :col_sep, type: String
13
+ field :total_count, type: Integer
14
+ field :use_headers, type: Boolean, default: true
15
+
16
+ field :headers, type: Array, default: -> { [] }
17
+ field :sample_data, type: Array, default: -> { [] }
18
+
19
+ def processor
20
+ @processor ||= FileProcessor::CSV.new(url, processor_options)
21
+ end
22
+
23
+ def gather_info!
24
+ clear_info
25
+
26
+ self.encoding = processor.detected_encoding
27
+ self.col_sep = processor.col_sep
28
+ self.total_count = processor.total_count
29
+ self.headers = processor.shift && processor.headers if use_headers?
30
+
31
+ processor.process_range(limit: 3) do |row|
32
+ self.sample_data << (use_headers? ? row.fields : row)
33
+ end
34
+
35
+ save
36
+ end
37
+
38
+ def url
39
+ read_attribute(:url).presence || authenticated_url
40
+ end
41
+
42
+ private
43
+
44
+ def clear_info
45
+ [:encoding, :col_sep, :total_count, :headers].each { |attr| self[attr] = nil }
46
+
47
+ sample_data.clear
48
+ end
49
+
50
+ def processor_options
51
+ {
52
+ headers: use_headers?,
53
+ encoding: encoding,
54
+ col_sep: col_sep
55
+ }
56
+ end
57
+
58
+ def authenticated_url
59
+ if can_use_fog?
60
+ fog_file.respond_to?(:url) ? fog_file.url(Time.current.to_i + Massive.fog_authenticated_url_expiration) : fog_file.public_url
61
+ end
62
+ end
63
+
64
+ def can_use_fog?
65
+ filename && Massive.fog_credentials.present?
66
+ end
67
+
68
+ def fog_connection
69
+ @fog_connection ||= Fog::Storage.new(Massive.fog_credentials)
70
+ end
71
+
72
+ def fog_directory
73
+ @fog_directory ||= fog_connection.directories.get(Massive.fog_directory)
74
+ end
75
+
76
+ def fog_file
77
+ @fog_file ||= fog_directory.files.get(filename)
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,9 @@
1
+ module Massive
2
+ class FileJob < Job
3
+ delegate :file, to: :step
4
+
5
+ def each_item(&block)
6
+ file.processor.process_range(offset: offset + processed, limit: limit - processed, &block)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ module Massive
2
+ class FileProcess < Massive::Process
3
+ embeds_one :file, class_name: 'Massive::File', autobuild: true
4
+
5
+ accepts_nested_attributes_for :file
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Massive
2
+ class FileStep < Step
3
+ calculates_total_count_with { file.total_count }
4
+
5
+ delegate :file, to: :process
6
+ end
7
+ end
@@ -0,0 +1,115 @@
1
+ module Massive
2
+ class Job
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+
6
+ include Massive::Status
7
+ include Massive::MemoryConsumption
8
+ include Massive::TimingSupport
9
+ include Massive::Retry
10
+ include Massive::Cancelling
11
+
12
+ embedded_in :step, class_name: 'Massive::Step'
13
+
14
+ field :processed, type: Integer, default: 0
15
+ field :offset, type: Integer, default: 0
16
+ field :limit, type: Integer, default: -1
17
+
18
+ delegate :process, :notify, to: :step
19
+
20
+ define_model_callbacks :work
21
+
22
+ after_create :enqueue
23
+
24
+ def self.perform(process_id, step_id, job_id)
25
+ Massive::Process.find_job(process_id, step_id, job_id).work
26
+ end
27
+
28
+ def self.queue
29
+ :massive_job
30
+ end
31
+
32
+ def enqueue
33
+ Resque.enqueue(self.class, process.id.to_s, step.id.to_s, id.to_s)
34
+ end
35
+
36
+ def work
37
+ handle_errors do
38
+ cancelling do
39
+ start!
40
+
41
+ run_callbacks :work do
42
+ each_item do |item, index|
43
+ retrying do
44
+ cancelling do
45
+ process_each(item, index)
46
+ increment_processed
47
+ notify(:progress)
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ finish!
54
+ end
55
+ end
56
+ end
57
+
58
+ def finish!
59
+ update_attributes(finished_at: Time.now, memory_consumption: current_memory_consumption)
60
+
61
+ step.complete
62
+ end
63
+
64
+ def each_item(&block)
65
+ # iterate through each item within offset/limit range
66
+ end
67
+
68
+ def process_each(item, index)
69
+ # process an item
70
+ end
71
+
72
+ protected
73
+
74
+ def attributes_to_reset
75
+ super.merge(processed: 0)
76
+ end
77
+
78
+ def cancelled?
79
+ process.cancelled?
80
+ end
81
+
82
+ private
83
+
84
+ def handle_errors(&block)
85
+ block.call
86
+ rescue Massive::Cancelled => e
87
+ assign_attributes(cancelled_at: Time.now)
88
+ step.update_attributes(cancelled_at: Time.now)
89
+
90
+ notify(:cancelled)
91
+ rescue StandardError, SignalException => e
92
+ step.failed_at = Time.now
93
+
94
+ assign_attributes(
95
+ last_error: e.message,
96
+ failed_at: Time.now,
97
+ processed: 0,
98
+ retries: retries
99
+ )
100
+
101
+ step.save
102
+ notify(:failed)
103
+
104
+ raise e
105
+ end
106
+
107
+ def increment_processed
108
+ inc(:processed, 1)
109
+ end
110
+
111
+ def args_for_resque
112
+ [process.id.to_s, step.id.to_s, id.to_s]
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,27 @@
1
+ module Massive
2
+ module Locking
3
+ def locked?(key, expire_in=60 * 1000)
4
+ lock_key = lock_key_for(key)
5
+
6
+ !redis.setnx(lock_key, Time.now.to_i + (expire_in)/1000).tap do |result|
7
+ expire(lock_key, expire_in) if result
8
+ end
9
+ end
10
+
11
+ protected
12
+
13
+ def lock_key_for(key)
14
+ "#{self.class.name.underscore}:#{id}:#{key}"
15
+ end
16
+
17
+ def expire(lock_key, expire_in)
18
+ redis.pexpire(lock_key, expire_in)
19
+ rescue Redis::CommandError
20
+ redis.expire(lock_key, (expire_in/1000).to_i)
21
+ end
22
+
23
+ def redis
24
+ @redis ||= Massive.redis
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ module Massive
2
+ module MemoryConsumption
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ field :memory_consumption, type: Integer, default: 0
7
+ end
8
+
9
+ def current_memory_consumption
10
+ IO.popen("ps -o rss= -p #{::Process.pid}") { |io| io.gets.to_i }
11
+ rescue StandardError
12
+ 0
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,40 @@
1
+ module Massive
2
+ module Notifications
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ notifier :base
7
+ end
8
+
9
+ def notify(message)
10
+ if active_model_serializer
11
+ notifier.notify(message) do
12
+ active_model_serializer.new(reload)
13
+ end
14
+ end
15
+ end
16
+
17
+ def notifier
18
+ @notifier ||= self.class.notifier_class.new(notifier_id, self.class.notifier_options)
19
+ end
20
+
21
+ def notifier_id
22
+ "#{self.class.name.underscore.gsub('/', '-')}-#{id}"
23
+ end
24
+
25
+ module ClassMethods
26
+ def notifier(name, options={})
27
+ @notifier_class = name.is_a?(Class) ? name : "massive/notifiers/#{name}".camelize.constantize
28
+ @notifier_options = options
29
+ end
30
+
31
+ def notifier_class
32
+ @notifier_class || Massive::Notifiers::Base
33
+ end
34
+
35
+ def notifier_options
36
+ @notifier_options
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,6 @@
1
+ module Massive
2
+ module Notifiers
3
+ autoload :Base, 'massive/notifiers/base'
4
+ autoload :Pusher, 'massive/notifiers/pusher'
5
+ end
6
+ end
@@ -0,0 +1,32 @@
1
+ module Massive
2
+ module Notifiers
3
+ class Base
4
+ include Massive::Locking
5
+
6
+ attr_accessor :id, :last, :options
7
+
8
+ def initialize(id, options={})
9
+ self.id = id
10
+ self.last = {}
11
+
12
+ self.options = options || {}
13
+ end
14
+
15
+ def notify(message, data=nil, &block)
16
+ send_notification(message, data, &block) unless locked?(message, expiration)
17
+ end
18
+
19
+ protected
20
+
21
+ def send_notification(message, data, &block)
22
+ data = block.call if block_given?
23
+
24
+ self.last = { message: message, data: data }
25
+ end
26
+
27
+ def expiration
28
+ options[:expiration] || 1000 # 1 second between each notification
29
+ end
30
+ end
31
+ end
32
+ end