rocketjob 3.5.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -0,0 +1,157 @@
1
+ require 'active_support/concern'
2
+ module RocketJob
3
+ module Batch
4
+ # Model attributes
5
+ module Model
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ #
10
+ # User definable attributes
11
+ #
12
+ # The following attributes are set when the job is created
13
+
14
+ # Number of records to include in each slice that is processed
15
+ # Note:
16
+ # slice_size is only used by SlicedJob#upload & Sliced::Input#upload
17
+ # When slices are supplied directly, their size is not modified to match this number
18
+ field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
19
+
20
+ # Whether to retain nil results.
21
+ #
22
+ # Only applicable if `collect_output` is `true`
23
+ # Set to `false` to prevent collecting output from the perform
24
+ # method when it returns `nil`.
25
+ field :collect_nil_output, type: Boolean, default: true, class_attribute: true
26
+
27
+ # Optional Array<Symbol> list of categories that this job can output to
28
+ #
29
+ # By using categories the output from #perform can be placed in different
30
+ # output collections, and therefore different output files
31
+ #
32
+ # Categories must be declared in advance to avoid a #perform method
33
+ # accidentally writing its results to an unknown category
34
+ field :output_categories, type: Array, default: [:main], class_attribute: true
35
+
36
+ # Optional Array<Symbol> list of categories that this job can load input data into
37
+ field :input_categories, type: Array, default: [:main], class_attribute: true
38
+
39
+ # The file name of the uploaded file, if any.
40
+ # Set by #upload if a file name was supplied, but can also be set explicitly.
41
+ # May or may not include the fully qualified path name.
42
+ field :upload_file_name, type: String
43
+
44
+ #
45
+ # Values that jobs can also update during processing
46
+ #
47
+
48
+ # Number of records in this job
49
+ # Note:
50
+ # A record_count of nil means it has not been set and workers will
51
+ # _not_ complete the job when processing slices.
52
+ # This allows workers to start processing slices while slices are still
53
+ # being uploaded
54
+ field :record_count, type: Integer
55
+
56
+ #
57
+ # Read-only attributes
58
+ #
59
+
60
+ # Breaks the :running state up into multiple sub-states:
61
+ # :running -> :before -> :processing -> :after -> :complete
62
+ field :sub_state, type: Symbol
63
+
64
+ validates_presence_of :slice_size
65
+
66
+ validates_each :output_categories, :input_categories do |record, attr, value|
67
+ # Under some circumstances ActiveModel is passing in a nil value even though the
68
+ # attributes have default values
69
+ Array(value).each do |category|
70
+ record.errors.add(attr, 'must only contain Symbol values') unless category.kind_of?(Symbol)
71
+ record.errors.add(attr, 'must only consist of lowercase characters, digits, and _') unless category.to_s =~ /\A[a-z_0-9]+\Z/
72
+ end
73
+ end
74
+ end
75
+
76
+ # Returns [Integer] percent of records completed so far
77
+ # Returns 0 if the total record count has not yet been set
78
+ def percent_complete
79
+ return 100 if completed?
80
+ return 0 unless record_count.to_i > 0
81
+
82
+ # Approximate number of input records
83
+ input_records = input.count.to_f * slice_size
84
+ if input_records > record_count
85
+ # Sanity check in case slice_size is not being adhered to
86
+ 99
87
+ else
88
+ ((1.0 - (input_records.to_f / record_count)) * 100).to_i
89
+ end
90
+ end
91
+
92
+ # Returns [Hash] status of this job
93
+ def status(time_zone = 'Eastern Time (US & Canada)')
94
+ h = {}
95
+ case
96
+ when queued?
97
+ h['queued_slices'] = input.queued.count
98
+ when running? || paused? || failed?
99
+ h['active_slices'] = worker_count
100
+ h['failed_slices'] = input.failed.count
101
+ h['queued_slices'] = input.queued.count
102
+ # Very high level estimated time left
103
+ if record_count && running? && (record_count > 0)
104
+ percent = percent_complete
105
+ if percent >= 5
106
+ secs = seconds.to_f
107
+ h['est_remaining_duration'] = RocketJob.seconds_as_duration((((secs / percent) * 100) - secs))
108
+ end
109
+ end
110
+ when completed?
111
+ secs = seconds.to_f
112
+ h['records_per_hour'] = ((record_count.to_f / secs) * 60 * 60).round if record_count && (record_count > 0) && (secs > 0.0)
113
+ end
114
+ h['output_slices'] = output.count if collect_output? && !completed?
115
+ h.merge!(super(time_zone))
116
+ h.delete('result')
117
+ # Worker name should be retrieved from the slices when processing
118
+ h.delete('worker_name') if sub_state == :processing
119
+ h
120
+ end
121
+
122
+ # Returns [Array<String>] names of workers currently working this job.
123
+ def worker_names
124
+ return [] unless running?
125
+
126
+ case sub_state
127
+ when :before, :after
128
+ worker_name
129
+ when :processing
130
+ input.running.collect { |slice| slice.worker_name }
131
+ else
132
+ []
133
+ end
134
+ end
135
+
136
+ # Returns [Integer] the number of workers currently working on this job.
137
+ def worker_count
138
+ return 0 unless running?
139
+ # Cache the number of workers for 1 second.
140
+ return @worker_count if @worker_count_last && (@worker_count_last == Time.now.to_i)
141
+
142
+ @worker_count =
143
+ case sub_state
144
+ when :before, :after
145
+ 1
146
+ when :processing
147
+ input.running.count
148
+ else
149
+ 0
150
+ end
151
+ @worker_count_last = Time.now.to_i
152
+ @worker_count
153
+ end
154
+
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,99 @@
1
+ require 'optparse'
2
+ require 'csv'
3
+ require 'yaml'
4
+ module RocketJob
5
+ module Batch
6
+ class Performance
7
+ attr_accessor :count, :servers, :workers, :version, :ruby, :environment, :mongo_config, :compress, :encrypt, :slice_size
8
+
9
+ def initialize
10
+ @count = 10_000_000
11
+ @environment = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
12
+ @mongo_config = 'config/mongoid.yml'
13
+ @compress = false
14
+ @encrypt = false
15
+ @slice_size = 1000
16
+ end
17
+
18
+ def run_test_case(count = self.count)
19
+ servers = RocketJob::Server.count
20
+ raise 'Please start workers before starting the performance test' if servers == 0
21
+
22
+ count_running_workers
23
+
24
+ puts "Loading job with #{count} records/lines"
25
+ args = {log_level: :warn, slice_size: slice_size}
26
+ if defined?(::RocketJob::Enterprise)
27
+ args[:compress] = compress
28
+ args[:encrypt] = encrypt
29
+ end
30
+ job = RocketJob::Jobs::PerformanceJob.new(args)
31
+ job.upload do |writer|
32
+ count.times { |i| writer << i }
33
+ end
34
+ job.save!
35
+
36
+ puts 'Waiting for job to complete'
37
+ while (!job.reload.completed?)
38
+ sleep 3
39
+ end
40
+
41
+ duration = job.completed_at - job.started_at
42
+ {count: count, duration: duration, records_per_second: (count.to_f / duration).round(3), workers: workers, servers: servers, compress: compress, encrypt: encrypt}
43
+ end
44
+
45
+ # Export the Results hash to a CSV file
46
+ def export_results(results)
47
+ ruby = defined?(JRuby) ? "jruby_#{JRUBY_VERSION}" : "ruby_#{RUBY_VERSION}"
48
+ version = RocketJob::VERSION
49
+
50
+ CSV.open("job_results_#{ruby}_v#{version}.csv", 'wb') do |csv|
51
+ csv << results.first.keys
52
+ results.each { |result| csv << result.values }
53
+ end
54
+ end
55
+
56
+ # Parse command line options
57
+ def parse(argv)
58
+ parser = OptionParser.new do |o|
59
+ o.on('-c', '--count COUNT', 'Count of records to enqueue') do |arg|
60
+ self.count = arg.to_i
61
+ end
62
+ o.on('-m', '--mongo MONGO_CONFIG_FILE_NAME', 'Location of mongoid.yml config file') do |arg|
63
+ self.mongo_config = arg
64
+ end
65
+ o.on('-e', '--environment ENVIRONMENT', 'The environment to run the app on (Default: RAILS_ENV || RACK_ENV || development)') do |arg|
66
+ self.environment = arg
67
+ end
68
+ o.on('-z', '--compress', 'Turn on compression') do |arg|
69
+ self.compress = true
70
+ end
71
+ o.on('-E', '--encrypt', 'Turn on encryption') do |arg|
72
+ self.encrypt = true
73
+ end
74
+ o.on('-s', '--slice_size COUNT', 'Slice size') do |arg|
75
+ self.slice_size = arg.to_i
76
+ end
77
+ end
78
+ parser.banner = 'rocketjob_batch_perf <options>'
79
+ parser.on_tail '-h', '--help', 'Show help' do
80
+ puts parser
81
+ exit 1
82
+ end
83
+ parser.parse! argv
84
+ end
85
+
86
+ def count_running_workers
87
+ self.servers = 0
88
+ self.workers = 0
89
+ RocketJob::Server.running.each do |server|
90
+ next if server.zombie?
91
+ self.servers += 1
92
+ self.workers += server.heartbeat.workers
93
+ end
94
+ puts "Running: #{workers} workers, distributed across #{servers} servers"
95
+ end
96
+
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,8 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Structure to hold results that need to be written to different output collections
6
+ Result = Struct.new(:category, :value)
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # For holding multiple categorized Result's
6
+ class Results < Array
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,102 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # State machine for sliced jobs
6
+ module StateMachine
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ # Replace existing event and all callbacks for that event
11
+ aasm.state_machine.add_event(:retry, {}) do
12
+ # Retry before_batch
13
+ transitions from: :failed, to: :queued,
14
+ if: -> { (sub_state == :before) || sub_state.nil? },
15
+ after: :rocket_job_requeue_sub_state_before
16
+ # Retry perform and after_batch
17
+ transitions from: :failed, to: :running,
18
+ if: -> { sub_state != :before },
19
+ after: :rocket_job_requeue_sub_state_after
20
+ end
21
+
22
+ # Replace existing event and all callbacks for that event
23
+ aasm.state_machine.add_event(:requeue, {}) do
24
+ # Requeue perform
25
+ transitions from: :running, to: :running,
26
+ if: -> server_name { sub_state == :processing },
27
+ after: :rocket_job_requeue_sub_state_processing
28
+ # Requeue after_batch
29
+ transitions from: :running, to: :running,
30
+ if: -> server_name { worker_on_server?(server_name) && (sub_state == :after) },
31
+ after: :rocket_job_requeue_sub_state_after
32
+ # Requeue before_batch
33
+ transitions from: :running, to: :queued,
34
+ if: -> server_name { worker_on_server?(server_name) && (sub_state == :before) },
35
+ after: :rocket_job_requeue_sub_state_before
36
+ end
37
+
38
+ # Needed again here since the events have been overwritten above
39
+ before_retry :rocket_job_clear_exception
40
+
41
+ before_start :rocket_job_sub_state_before
42
+ before_complete :rocket_job_clear_sub_state
43
+ after_abort :cleanup!
44
+ after_retry :rocket_job_requeue_failed_slices
45
+ after_destroy :cleanup!
46
+ end
47
+
48
+ # Drop the input and output collections
49
+ def cleanup!
50
+ input_categories.each { |category| input(category).drop }
51
+ output_categories.each { |category| output(category).drop }
52
+ end
53
+
54
+ # A batch job can only be processed:
55
+ # - Whilst Queued (before processing).
56
+ # - During processing.
57
+ #
58
+ # I.e. Not during before_batch and after_batch.
59
+ def pausable?
60
+ queued? || paused? || running? && (sub_state == :processing)
61
+ end
62
+
63
+ private
64
+
65
+ # Is this job still being processed
66
+ def rocket_job_processing?
67
+ running? && (sub_state == :processing)
68
+ end
69
+
70
+ def rocket_job_sub_state_before
71
+ self.sub_state = :before unless self.sub_state
72
+ end
73
+
74
+ def rocket_job_clear_sub_state
75
+ self.sub_state = nil
76
+ end
77
+
78
+ # Called after a job in sub_state: :before is requeued
79
+ def rocket_job_requeue_sub_state_before
80
+ self.sub_state = nil
81
+ self.started_at = nil
82
+ self.worker_name = nil
83
+ end
84
+
85
+ def rocket_job_requeue_sub_state_after
86
+ self.sub_state = :processing
87
+ self.worker_name = nil
88
+ end
89
+
90
+ def rocket_job_requeue_sub_state_processing(worker_name)
91
+ self.worker_name = nil
92
+ input.requeue_running(worker_name)
93
+ end
94
+
95
+ # Also retry failed slices when the job itself is re-tried
96
+ def rocket_job_requeue_failed_slices
97
+ input.requeue_failed
98
+ end
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,88 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # Allow statistics to be gathered while a batch job is running
6
+ module Statistics
7
+ extend ActiveSupport::Concern
8
+
9
+ class Stats
10
+ attr_reader :stats, :in_memory
11
+
12
+ # hash [Hash]
13
+ # Update an `in-memory` copy of the stats instead of gathering them inside `stats`.
14
+ def initialize(hash = nil)
15
+ @in_memory = hash
16
+ @stats = Hash.new(0) unless hash
17
+ end
18
+
19
+ def inc(hash)
20
+ hash.each_pair { |key, increment| inc_key(key, increment) }
21
+ self
22
+ end
23
+
24
+ def inc_key(key, increment = 1)
25
+ return if increment == 0
26
+ if in_memory
27
+ # For tests and in-process execution
28
+ inc_in_memory(key, increment)
29
+ elsif key && key != ''
30
+ stats["statistics.#{key}"] += increment
31
+ end
32
+ self
33
+ end
34
+
35
+ def empty?
36
+ stats.nil? || stats.empty?
37
+ end
38
+
39
+ private
40
+
41
+ # Navigates path and creates child hashes as needed at the end is reached
42
+ def inc_in_memory(key, increment)
43
+ paths = key.to_s.split('.')
44
+ last = paths.pop
45
+ return unless last
46
+
47
+ target = paths.inject(in_memory) { |target, key| target.key?(key) ? target[key] : target[key] = Hash.new(0) }
48
+ target[last] += increment
49
+ end
50
+ end
51
+
52
+ included do
53
+ field :statistics, type: Hash, default: -> { Hash.new(0) }
54
+
55
+ around_slice :statistics_capture
56
+ end
57
+
58
+ # Increment a statistic
59
+ def statistics_inc(key, increment = 1)
60
+ return if key.nil? || key == ''
61
+ # Being called within tests outside of a perform
62
+ @slice_statistics ||= Stats.new(new_record? ? statistics : nil)
63
+ key.is_a?(Hash) ? @slice_statistics.inc(key) : @slice_statistics.inc_key(key, increment)
64
+ end
65
+
66
+ private
67
+
68
+ # Capture the number of successful and failed tradelines
69
+ # as well as those with notices and alerts.
70
+ def statistics_capture
71
+ @slice_statistics = Stats.new(new_record? ? statistics : nil)
72
+ yield
73
+ collection.update_one({_id: id}, {'$inc' => @slice_statistics.stats}) unless @slice_statistics.empty?
74
+ end
75
+
76
+ # Overrides RocketJob::Batch::Logger#rocket_job_batch_log_payload
77
+ def rocket_job_batch_log_payload
78
+ h = {
79
+ from: aasm.from_state,
80
+ to: aasm.to_state,
81
+ event: aasm.current_event
82
+ }
83
+ h[:statistics] = statistics.dup if statistics.present? && (completed? || failed?)
84
+ h
85
+ end
86
+ end
87
+ end
88
+ end