rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,157 @@
1
+ require 'active_support/concern'
2
+ module RocketJob
3
+ module Batch
4
+ # Model attributes
5
+ module Model
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ #
10
+ # User definable attributes
11
+ #
12
+ # The following attributes are set when the job is created
13
+
14
+ # Number of records to include in each slice that is processed
15
+ # Note:
16
+ # slice_size is only used by SlicedJob#upload & Sliced::Input#upload
17
+ # When slices are supplied directly, their size is not modified to match this number
18
+ field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
19
+
20
+ # Whether to retain nil results.
21
+ #
22
+ # Only applicable if `collect_output` is `true`
23
+ # Set to `false` to prevent collecting output from the perform
24
+ # method when it returns `nil`.
25
+ field :collect_nil_output, type: Boolean, default: true, class_attribute: true
26
+
27
+ # Optional Array<Symbol> list of categories that this job can output to
28
+ #
29
+ # By using categories the output from #perform can be placed in different
30
+ # output collections, and therefore different output files
31
+ #
32
+ # Categories must be declared in advance to avoid a #perform method
33
+ # accidentally writing its results to an unknown category
34
+ field :output_categories, type: Array, default: [:main], class_attribute: true
35
+
36
+ # Optional Array<Symbol> list of categories that this job can load input data into
37
+ field :input_categories, type: Array, default: [:main], class_attribute: true
38
+
39
+ # The file name of the uploaded file, if any.
40
+ # Set by #upload if a file name was supplied, but can also be set explicitly.
41
+ # May or may not include the fully qualified path name.
42
+ field :upload_file_name, type: String
43
+
44
+ #
45
+ # Values that jobs can also update during processing
46
+ #
47
+
48
+ # Number of records in this job
49
+ # Note:
50
+ # A record_count of nil means it has not been set and workers will
51
+ # _not_ complete the job when processing slices.
52
+ # This allows workers to start processing slices while slices are still
53
+ # being uploaded
54
+ field :record_count, type: Integer
55
+
56
+ #
57
+ # Read-only attributes
58
+ #
59
+
60
+ # Breaks the :running state up into multiple sub-states:
61
+ # :running -> :before -> :processing -> :after -> :complete
62
+ field :sub_state, type: Symbol
63
+
64
+ validates_presence_of :slice_size
65
+
66
+ validates_each :output_categories, :input_categories do |record, attr, value|
67
+ # Under some circumstances ActiveModel is passing in a nil value even though the
68
+ # attributes have default values
69
+ Array(value).each do |category|
70
+ record.errors.add(attr, 'must only contain Symbol values') unless category.kind_of?(Symbol)
71
+ record.errors.add(attr, 'must only consist of lowercase characters, digits, and _') unless category.to_s =~ /\A[a-z_0-9]+\Z/
72
+ end
73
+ end
74
+ end
75
+
76
+ # Returns [Integer] percent of records completed so far
77
+ # Returns 0 if the total record count has not yet been set
78
+ def percent_complete
79
+ return 100 if completed?
80
+ return 0 unless record_count.to_i > 0
81
+
82
+ # Approximate number of input records
83
+ input_records = input.count.to_f * slice_size
84
+ if input_records > record_count
85
+ # Sanity check in case slice_size is not being adhered to
86
+ 99
87
+ else
88
+ ((1.0 - (input_records.to_f / record_count)) * 100).to_i
89
+ end
90
+ end
91
+
92
+ # Returns [Hash] status of this job
93
+ def status(time_zone = 'Eastern Time (US & Canada)')
94
+ h = {}
95
+ case
96
+ when queued?
97
+ h['queued_slices'] = input.queued.count
98
+ when running? || paused? || failed?
99
+ h['active_slices'] = worker_count
100
+ h['failed_slices'] = input.failed.count
101
+ h['queued_slices'] = input.queued.count
102
+ # Very high level estimated time left
103
+ if record_count && running? && (record_count > 0)
104
+ percent = percent_complete
105
+ if percent >= 5
106
+ secs = seconds.to_f
107
+ h['est_remaining_duration'] = RocketJob.seconds_as_duration((((secs / percent) * 100) - secs))
108
+ end
109
+ end
110
+ when completed?
111
+ secs = seconds.to_f
112
+ h['records_per_hour'] = ((record_count.to_f / secs) * 60 * 60).round if record_count && (record_count > 0) && (secs > 0.0)
113
+ end
114
+ h['output_slices'] = output.count if collect_output? && !completed?
115
+ h.merge!(super(time_zone))
116
+ h.delete('result')
117
+ # Worker name should be retrieved from the slices when processing
118
+ h.delete('worker_name') if sub_state == :processing
119
+ h
120
+ end
121
+
122
+ # Returns [Array<String>] names of workers currently working this job.
123
+ def worker_names
124
+ return [] unless running?
125
+
126
+ case sub_state
127
+ when :before, :after
128
+ worker_name
129
+ when :processing
130
+ input.running.collect { |slice| slice.worker_name }
131
+ else
132
+ []
133
+ end
134
+ end
135
+
136
+ # Returns [Integer] the number of workers currently working on this job.
137
+ def worker_count
138
+ return 0 unless running?
139
+ # Cache the number of workers for 1 second.
140
+ return @worker_count if @worker_count_last && (@worker_count_last == Time.now.to_i)
141
+
142
+ @worker_count =
143
+ case sub_state
144
+ when :before, :after
145
+ 1
146
+ when :processing
147
+ input.running.count
148
+ else
149
+ 0
150
+ end
151
+ @worker_count_last = Time.now.to_i
152
+ @worker_count
153
+ end
154
+
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,99 @@
1
+ require 'optparse'
2
+ require 'csv'
3
+ require 'yaml'
4
+ module RocketJob
5
+ module Batch
6
+ class Performance
7
+ attr_accessor :count, :servers, :workers, :version, :ruby, :environment, :mongo_config, :compress, :encrypt, :slice_size
8
+
9
+ def initialize
10
+ @count = 10_000_000
11
+ @environment = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
12
+ @mongo_config = 'config/mongoid.yml'
13
+ @compress = false
14
+ @encrypt = false
15
+ @slice_size = 1000
16
+ end
17
+
18
+ def run_test_case(count = self.count)
19
+ servers = RocketJob::Server.count
20
+ raise 'Please start workers before starting the performance test' if servers == 0
21
+
22
+ count_running_workers
23
+
24
+ puts "Loading job with #{count} records/lines"
25
+ args = {log_level: :warn, slice_size: slice_size}
26
+ if defined?(::RocketJob::Enterprise)
27
+ args[:compress] = compress
28
+ args[:encrypt] = encrypt
29
+ end
30
+ job = RocketJob::Jobs::PerformanceJob.new(args)
31
+ job.upload do |writer|
32
+ count.times { |i| writer << i }
33
+ end
34
+ job.save!
35
+
36
+ puts 'Waiting for job to complete'
37
+ while (!job.reload.completed?)
38
+ sleep 3
39
+ end
40
+
41
+ duration = job.completed_at - job.started_at
42
+ {count: count, duration: duration, records_per_second: (count.to_f / duration).round(3), workers: workers, servers: servers, compress: compress, encrypt: encrypt}
43
+ end
44
+
45
+ # Export the Results hash to a CSV file
46
+ def export_results(results)
47
+ ruby = defined?(JRuby) ? "jruby_#{JRUBY_VERSION}" : "ruby_#{RUBY_VERSION}"
48
+ version = RocketJob::VERSION
49
+
50
+ CSV.open("job_results_#{ruby}_v#{version}.csv", 'wb') do |csv|
51
+ csv << results.first.keys
52
+ results.each { |result| csv << result.values }
53
+ end
54
+ end
55
+
56
+ # Parse command line options
57
+ def parse(argv)
58
+ parser = OptionParser.new do |o|
59
+ o.on('-c', '--count COUNT', 'Count of records to enqueue') do |arg|
60
+ self.count = arg.to_i
61
+ end
62
+ o.on('-m', '--mongo MONGO_CONFIG_FILE_NAME', 'Location of mongoid.yml config file') do |arg|
63
+ self.mongo_config = arg
64
+ end
65
+ o.on('-e', '--environment ENVIRONMENT', 'The environment to run the app on (Default: RAILS_ENV || RACK_ENV || development)') do |arg|
66
+ self.environment = arg
67
+ end
68
+ o.on('-z', '--compress', 'Turn on compression') do |arg|
69
+ self.compress = true
70
+ end
71
+ o.on('-E', '--encrypt', 'Turn on encryption') do |arg|
72
+ self.encrypt = true
73
+ end
74
+ o.on('-s', '--slice_size COUNT', 'Slice size') do |arg|
75
+ self.slice_size = arg.to_i
76
+ end
77
+ end
78
+ parser.banner = 'rocketjob_batch_perf <options>'
79
+ parser.on_tail '-h', '--help', 'Show help' do
80
+ puts parser
81
+ exit 1
82
+ end
83
+ parser.parse! argv
84
+ end
85
+
86
+ def count_running_workers
87
+ self.servers = 0
88
+ self.workers = 0
89
+ RocketJob::Server.running.each do |server|
90
+ next if server.zombie?
91
+ self.servers += 1
92
+ self.workers += server.heartbeat.workers
93
+ end
94
+ puts "Running: #{workers} workers, distributed across #{servers} servers"
95
+ end
96
+
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,8 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Structure to hold results that need to be written to different output collections
6
+ Result = Struct.new(:category, :value)
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # For holding multiple categorized Result's
6
+ class Results < Array
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,102 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # State machine for sliced jobs
6
+ module StateMachine
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ # Replace existing event and all callbacks for that event
11
+ aasm.state_machine.add_event(:retry, {}) do
12
+ # Retry before_batch
13
+ transitions from: :failed, to: :queued,
14
+ if: -> { (sub_state == :before) || sub_state.nil? },
15
+ after: :rocket_job_requeue_sub_state_before
16
+ # Retry perform and after_batch
17
+ transitions from: :failed, to: :running,
18
+ if: -> { sub_state != :before },
19
+ after: :rocket_job_requeue_sub_state_after
20
+ end
21
+
22
+ # Replace existing event and all callbacks for that event
23
+ aasm.state_machine.add_event(:requeue, {}) do
24
+ # Requeue perform
25
+ transitions from: :running, to: :running,
26
+ if: -> server_name { sub_state == :processing },
27
+ after: :rocket_job_requeue_sub_state_processing
28
+ # Requeue after_batch
29
+ transitions from: :running, to: :running,
30
+ if: -> server_name { worker_on_server?(server_name) && (sub_state == :after) },
31
+ after: :rocket_job_requeue_sub_state_after
32
+ # Requeue before_batch
33
+ transitions from: :running, to: :queued,
34
+ if: -> server_name { worker_on_server?(server_name) && (sub_state == :before) },
35
+ after: :rocket_job_requeue_sub_state_before
36
+ end
37
+
38
+ # Needed again here since the events have been overwritten above
39
+ before_retry :rocket_job_clear_exception
40
+
41
+ before_start :rocket_job_sub_state_before
42
+ before_complete :rocket_job_clear_sub_state
43
+ after_abort :cleanup!
44
+ after_retry :rocket_job_requeue_failed_slices
45
+ after_destroy :cleanup!
46
+ end
47
+
48
+ # Drop the input and output collections
49
+ def cleanup!
50
+ input_categories.each { |category| input(category).drop }
51
+ output_categories.each { |category| output(category).drop }
52
+ end
53
+
54
+ # A batch job can only be processed:
55
+ # - Whilst Queued (before processing).
56
+ # - During processing.
57
+ #
58
+ # I.e. Not during before_batch and after_batch.
59
+ def pausable?
60
+ queued? || paused? || running? && (sub_state == :processing)
61
+ end
62
+
63
+ private
64
+
65
+ # Is this job still being processed
66
+ def rocket_job_processing?
67
+ running? && (sub_state == :processing)
68
+ end
69
+
70
+ def rocket_job_sub_state_before
71
+ self.sub_state = :before unless self.sub_state
72
+ end
73
+
74
+ def rocket_job_clear_sub_state
75
+ self.sub_state = nil
76
+ end
77
+
78
+ # Called after a job in sub_state: :before is requeued
79
+ def rocket_job_requeue_sub_state_before
80
+ self.sub_state = nil
81
+ self.started_at = nil
82
+ self.worker_name = nil
83
+ end
84
+
85
+ def rocket_job_requeue_sub_state_after
86
+ self.sub_state = :processing
87
+ self.worker_name = nil
88
+ end
89
+
90
+ def rocket_job_requeue_sub_state_processing(worker_name)
91
+ self.worker_name = nil
92
+ input.requeue_running(worker_name)
93
+ end
94
+
95
+ # Also retry failed slices when the job itself is re-tried
96
+ def rocket_job_requeue_failed_slices
97
+ input.requeue_failed
98
+ end
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,88 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Allow statistics to be gathered while a batch job is running
6
+ module Statistics
7
+ extend ActiveSupport::Concern
8
+
9
+ class Stats
10
+ attr_reader :stats, :in_memory
11
+
12
+ # hash [Hash]
13
+ # Update an `in-memory` copy of the stats instead of gathering them inside `stats`.
14
+ def initialize(hash = nil)
15
+ @in_memory = hash
16
+ @stats = Hash.new(0) unless hash
17
+ end
18
+
19
+ def inc(hash)
20
+ hash.each_pair { |key, increment| inc_key(key, increment) }
21
+ self
22
+ end
23
+
24
+ def inc_key(key, increment = 1)
25
+ return if increment == 0
26
+ if in_memory
27
+ # For tests and in-process execution
28
+ inc_in_memory(key, increment)
29
+ elsif key && key != ''
30
+ stats["statistics.#{key}"] += increment
31
+ end
32
+ self
33
+ end
34
+
35
+ def empty?
36
+ stats.nil? || stats.empty?
37
+ end
38
+
39
+ private
40
+
41
+ # Navigates path and creates child hashes as needed at the end is reached
42
+ def inc_in_memory(key, increment)
43
+ paths = key.to_s.split('.')
44
+ last = paths.pop
45
+ return unless last
46
+
47
+ target = paths.inject(in_memory) { |target, key| target.key?(key) ? target[key] : target[key] = Hash.new(0) }
48
+ target[last] += increment
49
+ end
50
+ end
51
+
52
+ included do
53
+ field :statistics, type: Hash, default: -> { Hash.new(0) }
54
+
55
+ around_slice :statistics_capture
56
+ end
57
+
58
+ # Increment a statistic
59
+ def statistics_inc(key, increment = 1)
60
+ return if key.nil? || key == ''
61
+ # Being called within tests outside of a perform
62
+ @slice_statistics ||= Stats.new(new_record? ? statistics : nil)
63
+ key.is_a?(Hash) ? @slice_statistics.inc(key) : @slice_statistics.inc_key(key, increment)
64
+ end
65
+
66
+ private
67
+
68
+ # Capture the number of successful and failed tradelines
69
+ # as well as those with notices and alerts.
70
+ def statistics_capture
71
+ @slice_statistics = Stats.new(new_record? ? statistics : nil)
72
+ yield
73
+ collection.update_one({_id: id}, {'$inc' => @slice_statistics.stats}) unless @slice_statistics.empty?
74
+ end
75
+
76
+ # Overrides RocketJob::Batch::Logger#rocket_job_batch_log_payload
77
+ def rocket_job_batch_log_payload
78
+ h = {
79
+ from: aasm.from_state,
80
+ to: aasm.to_state,
81
+ event: aasm.current_event
82
+ }
83
+ h[:statistics] = statistics.dup if statistics.present? && (completed? || failed?)
84
+ h
85
+ end
86
+ end
87
+ end
88
+ end