rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd3505de7a318efb53880ef4c0f506a970d1ee30277f531bef7e2a9f257cf500
4
- data.tar.gz: '085e0371c60662b51ba53c4e5e6e1b2e016cdc2e37de1b51a437e5477f907a22'
3
+ metadata.gz: c3d1d8a5bd37991ac8ad2dd5ae28b833dc4cb59d69da1a1cc281c904ab15c3eb
4
+ data.tar.gz: 39336f01d701f34e4f25e00e03fa5f06299ec19d1f7c0b52af5aff6ae4b4327b
5
5
  SHA512:
6
- metadata.gz: a9a9441957a5405ed1d3d98bbe5bf0a45febbbdf8283c09de967f19abd1e5a7470cc871d66b804f8569ad90b48732fcd4d649e3d0c282b60be2b000bdd60adc8
7
- data.tar.gz: d47b9c8a4eba0fd88798b76926da8c4073b0c26c6743ae3b54883535f278ea3317d6fb7901d57caf8506f710b5fdb7151b9acae65e7e2427bf2607a0c926231a
6
+ metadata.gz: 42266518d00516c62ccd3d0139e6fa22427f48b14454a2f06f0c04f559620b9bbf71700f38b518db03b8290cd6ab7ad7353c47cfbb579aa2f5642ef2a9b2cfb5
7
+ data.tar.gz: 444e848667f4a09629b18b467713fe9ae26abb22fd68ceb9048e0adb617c3bbe066ec407c9922577f9ea6bc40271a9d6c84d6064456e5e0532ddfa8f0be85a69
data/README.md CHANGED
@@ -17,6 +17,59 @@ Checkout http://rocketjob.io/
17
17
  * Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
18
18
  * [Report bugs](https://github.com/rocketjob/rocketjob/issues)
19
19
 
20
+ ## Rocket Job 4
21
+
22
+ Rocket Job Pro is now open sourced and included within Rocket Job.
23
+
24
+ The `RocketJob::Batch` plugin now adds batch processing capabilites to break up a single task into many
25
+ concurrent workers processing slices of the entire job at the same time.
26
+
27
+
28
+ Example:
29
+
30
+ ```ruby
31
+ class MyJob < RocketJob::Job
32
+ include RocketJob::Batch
33
+
34
+ self.description = "Reverse names"
35
+ self.destroy_on_complete = false
36
+ self.collect_output = true
37
+
38
+ # Method to call by all available workers at the same time.
39
+ # Reverse the characters for each line:
40
+ def perform(line)
41
+ line.reverse
42
+ end
43
+ end
44
+ ```
45
+
46
+ Upload a file for processing, for example `names.csv` which could contain:
47
+
48
+ ```
49
+ jack
50
+ jane
51
+ bill
52
+ john
53
+ blake
54
+ chris
55
+ dave
56
+ marc
57
+ ```
58
+
59
+ To queue the above job for processing:
60
+
61
+ ```ruby
62
+ job = MyJob.new
63
+ job.upload('names.csv')
64
+ job.save!
65
+ ```
66
+
67
+ Once the job has completed, download the results into a file:
68
+
69
+ ```ruby
70
+ job.download('names_reversed.csv')
71
+ ```
72
+
20
73
  ## Contributing to the documentation
21
74
 
22
75
  To contribute to the documentation it is as easy as forking the repository
@@ -99,6 +152,14 @@ Rocket Job is tested and supported on the following Ruby platforms:
99
152
  - Ruby 2.1, 2.2, 2.3, 2.4, and above
100
153
  - JRuby 9.0.5 and above
101
154
 
155
+ ## Dependencies
156
+
157
+ * [MongoDB](https://www.mongodb.org)
158
+ * Persists job information.
159
+ * Version 2.7 or greater.
160
+ * [Semantic Logger](https://rocketjob.github.io/semantic_logger)
161
+ * Highly concurrent scalable logging.
162
+
102
163
  ## Versioning
103
164
 
104
165
  This project uses [Semantic Versioning](http://semver.org/).
@@ -109,4 +170,5 @@ This project uses [Semantic Versioning](http://semver.org/).
109
170
 
110
171
  ## Contributors
111
172
 
112
- * [Chris Lamb](https://github.com/lambcr)
173
+ [Contributors](https://github.com/rocketjob/rocketjob/graphs/contributors)
174
+
@@ -7,6 +7,7 @@ require 'rocket_job/cli'
7
7
  begin
8
8
  RocketJob::CLI.new(ARGV).run
9
9
  rescue Exception => exc
10
+ return if exc.class == SystemExit
10
11
  # Failsafe logger that writes to STDERR
11
12
  SemanticLogger.add_appender(io: STDERR, level: :error, formatter: :color)
12
13
  SemanticLogger['RocketJob'].error('Rocket Job shutting down due to exception', exc)
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rocketjob_batch'
3
+
4
+ # Log to console
5
+ SemanticLogger.add_appender(io: STDOUT, formatter: :color)
6
+
7
+ perf = RocketJob::Batch::Performance.new
8
+ perf.parse(ARGV)
9
+ RocketJob::Config.load!(perf.environment, perf.mongo_config)
10
+ results = perf.run_test_case
11
+ p results
@@ -0,0 +1,32 @@
1
+ require 'active_support/concern'
2
+ require 'rocket_job/batch/callbacks'
3
+ require 'rocket_job/batch/io'
4
+ require 'rocket_job/batch/logger'
5
+ require 'rocket_job/batch/model'
6
+ require 'rocket_job/batch/state_machine'
7
+ require 'rocket_job/batch/throttle'
8
+ require 'rocket_job/batch/throttle_running_slices'
9
+ require 'rocket_job/batch/worker'
10
+
11
+ module RocketJob
12
+ module Batch
13
+ extend ActiveSupport::Concern
14
+
15
+ include Model
16
+ include StateMachine
17
+ include Callbacks
18
+ include Logger
19
+ include Worker
20
+ include Throttle
21
+ include ThrottleRunningSlices
22
+ include IO
23
+
24
+ autoload :LowerPriority, 'rocket_job/batch/lower_priority'
25
+ autoload :Performance, 'rocket_job/batch/performance'
26
+ autoload :Statistics, 'rocket_job/batch/statistics'
27
+ autoload :Result, 'rocket_job/batch/result'
28
+ autoload :Results, 'rocket_job/batch/results'
29
+ autoload :Tabular, 'rocket_job/batch/tabular'
30
+ end
31
+ end
32
+
@@ -0,0 +1,40 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Callbacks
6
+ extend ActiveSupport::Concern
7
+ include ActiveSupport::Callbacks
8
+
9
+ included do
10
+ define_callbacks :slice
11
+
12
+ def self.before_slice(*filters, &blk)
13
+ set_callback(:slice, :before, *filters, &blk)
14
+ end
15
+
16
+ def self.after_slice(*filters, &blk)
17
+ set_callback(:slice, :after, *filters, &blk)
18
+ end
19
+
20
+ def self.around_slice(*filters, &blk)
21
+ set_callback(:slice, :around, *filters, &blk)
22
+ end
23
+
24
+ # before_batch and after_batch are called asynchronously.
25
+ # around_batch is not supported.
26
+ define_callbacks :before_batch
27
+ define_callbacks :after_batch
28
+
29
+ def self.before_batch(*filters, &blk)
30
+ set_callback(:before_batch, :before, *filters, &blk)
31
+ end
32
+
33
+ def self.after_batch(*filters, &blk)
34
+ set_callback(:after_batch, :after, *filters, &blk)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,154 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # IO methods for sliced jobs
6
+ module IO
7
+ extend ActiveSupport::Concern
8
+
9
+ # Returns [RocketJob::Sliced::Input] input collection for holding input slices
10
+ #
11
+ # Parameters:
12
+ # category [Symbol]
13
+ # The name of the category to access or upload data into
14
+ # Default: None ( Uses the single default input collection for this job )
15
+ # Validates: This value must be one of those listed in #input_categories
16
+ def input(category = :main)
17
+ raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}" unless input_categories.include?(category) || (category == :main)
18
+
19
+ collection_name = "rocket_job.inputs.#{id}"
20
+ collection_name << ".#{category}" unless category == :main
21
+
22
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
23
+ end
24
+
25
+ # Returns [RocketJob::Sliced::Output] output collection for holding output slices
26
+ # Returns nil if no output is being collected
27
+ #
28
+ # Parameters:
29
+ # category [Symbol]
30
+ # The name of the category to access or download data from
31
+ # Default: None ( Uses the single default output collection for this job )
32
+ # Validates: This value must be one of those listed in #output_categories
33
+ def output(category = :main)
34
+ raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}" unless output_categories.include?(category) || (category == :main)
35
+
36
+ collection_name = "rocket_job.outputs.#{id}"
37
+ collection_name << ".#{category}" unless category == :main
38
+
39
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
40
+ end
41
+
42
+ # Upload the supplied file_name or stream
43
+ #
44
+ # Updates the record_count after adding the records
45
+ #
46
+ # Options
47
+ # :file_name [String]
48
+ # When file_name_or_io is an IO, the original base file name if any.
49
+ # Default: nil
50
+ #
51
+ # See RocketJob::Sliced::Input#upload for remaining options
52
+ #
53
+ # Returns [Integer] the number of records uploaded
54
+ #
55
+ # Note:
56
+ # * Not thread-safe. Only call from one thread at a time
57
+ def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
58
+ if file_name
59
+ self.upload_file_name = file_name
60
+ elsif file_name_or_io.is_a?(String)
61
+ self.upload_file_name = file_name_or_io
62
+ end
63
+ count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
64
+ self.record_count = (record_count || 0) + count
65
+ count
66
+ end
67
+
68
+ # Upload the supplied slices for processing by workers
69
+ #
70
+ # Updates the record_count after adding the records
71
+ #
72
+ # Returns [Integer] the number of records uploaded
73
+ #
74
+ # Parameters
75
+ # `slice` [ Array<Hash | Array | String | Integer | Float | Symbol | Regexp | Time> ]
76
+ # All elements in `array` must be serializable to BSON
77
+ # For example the following types are not supported: Date
78
+ #
79
+ # Note:
80
+ # The caller should honor `:slice_size`, the entire slice is loaded as-is.
81
+ #
82
+ # Note:
83
+ # Not thread-safe. Only call from one thread at a time
84
+ def upload_slice(slice)
85
+ input.insert(slice)
86
+ count = slice.size
87
+ self.record_count = (record_count || 0) + count
88
+ count
89
+ end
90
+
91
+ # Download the output data into the supplied file_name or stream
92
+ #
93
+ # Parameters
94
+ # file_name_or_io [String|IO]
95
+ # The file_name of the file to write to, or an IO Stream that implements #write.
96
+ #
97
+ # options:
98
+ # category [Symbol]
99
+ # The category of output to download
100
+ # Default: :main
101
+ #
102
+ # See RocketJob::Sliced::Output#download for remaining options
103
+ #
104
+ # Returns [Integer] the number of records downloaded
105
+ def download(file_name_or_io = nil, category: :main, **args, &block)
106
+ raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
107
+
108
+ output(category).download(file_name_or_io, **args, &block)
109
+ end
110
+
111
+ # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
112
+ #
113
+ # If a block is supplied, the block is supplied with a writer that should be used to
114
+ # accumulate the results.
115
+ #
116
+ # Examples
117
+ #
118
+ # job.write_output('hello world')
119
+ #
120
+ # job.write_output do |writer|
121
+ # writer << 'hello world'
122
+ # end
123
+ #
124
+ # job.write_output do |writer|
125
+ # result = RocketJob::Batch::Results
126
+ # result << RocketJob::Batch::Result.new(:main, 'hello world')
127
+ # result << RocketJob::Batch::Result.new(:errors, 'errors')
128
+ # writer << result
129
+ # end
130
+ #
131
+ # result = RocketJob::Batch::Results
132
+ # result << RocketJob::Batch::Result.new(:main, 'hello world')
133
+ # result << RocketJob::Batch::Result.new(:errors, 'errors')
134
+ # job.write_output(result)
135
+ def write_output(result = nil, input_slice = nil, &block)
136
+ if block
137
+ RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
138
+ else
139
+ raise(ArgumentError, 'result parameter is required when no block is supplied') unless result
140
+ RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
141
+ end
142
+ end
143
+
144
+ private
145
+
146
+ def slice_arguments(collection_name)
147
+ {
148
+ collection_name: collection_name,
149
+ slice_size: slice_size
150
+ }
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,57 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Logger
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Log all state transitions
10
+ after_start :rocket_job_batch_log_state_change
11
+ after_complete :rocket_job_batch_log_state_change
12
+ after_fail :rocket_job_batch_log_state_change
13
+ after_retry :rocket_job_batch_log_state_change
14
+ after_pause :rocket_job_batch_log_state_change
15
+ after_resume :rocket_job_batch_log_state_change
16
+ after_abort :rocket_job_batch_log_state_change
17
+ after_requeue :rocket_job_batch_log_state_change
18
+
19
+ around_slice :rocket_job_batch_slice_logger
20
+
21
+ # Remove perform level logger and replace with slice level logger
22
+ skip_callback(:perform, :around, :rocket_job_around_logger)
23
+ end
24
+
25
+ private
26
+
27
+ # Add logging around processing of each slice
28
+ # - metric allows duration to be forwarded to statsd, etc.
29
+ # - log_exception logs entire exception if raised
30
+ # - on_exception_level changes log level from info to error on exception
31
+ # - silence noisy jobs by raising log level
32
+ def rocket_job_batch_slice_logger(&block)
33
+ logger.measure_info(
34
+ 'Completed slice',
35
+ metric: "#{self.class.name}/slice",
36
+ log_exception: :full,
37
+ on_exception_level: :error,
38
+ silence: log_level,
39
+ payload: {records: rocket_job_slice&.size},
40
+ &block
41
+ )
42
+ end
43
+
44
+ def rocket_job_batch_log_state_change
45
+ logger.info(aasm.current_event.to_s.camelcase, rocket_job_batch_log_payload)
46
+ end
47
+
48
+ def rocket_job_batch_log_payload
49
+ {
50
+ from: aasm.from_state,
51
+ to: aasm.to_state,
52
+ event: aasm.current_event
53
+ }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,54 @@
1
+ require 'active_support/concern'
2
+ module RocketJob
3
+ module Batch
4
+ # Automatically lower the priority for Jobs with a higher record_count.
5
+ #
6
+ # Note:
7
+ # - Add `:lower_priority` as a before_batch, but only once the `record_count` has been set.
8
+ # - If the `record_count` is not set by the time this plugins `before_batch`
9
+ # is called, then the priority will not be modified.
10
+ #
11
+ # class SampleJob < RocketJob::Job
12
+ # include RocketJob::Plugins::Batch
13
+ # include RocketJob::Plugins::Batch::LowerPriority
14
+ #
15
+ # before_batch :upload_data, :lower_priority
16
+ #
17
+ # def perform(record)
18
+ # record.reverse
19
+ # end
20
+ #
21
+ # private
22
+ #
23
+ # def upload_data
24
+ # upload do |stream|
25
+ # stream << 'abc'
26
+ # stream << 'def'
27
+ # stream << 'ghi'
28
+ # end
29
+ # end
30
+ # end
31
+ module LowerPriority
32
+ extend ActiveSupport::Concern
33
+
34
+ included do
35
+ unless public_method_defined?(:record_count=)
36
+ raise(ArgumentError, 'LowerPriority can only be used in conjunction with RocketJob::Plugins::Batch')
37
+ end
38
+
39
+ # For each of this many records lower the priority by 1.
40
+ class_attribute :lower_priority_count
41
+ self.lower_priority_count = 100_000
42
+ end
43
+
44
+ private
45
+
46
+ def lower_priority
47
+ return unless record_count
48
+
49
+ new_priority = priority + (record_count.to_f / lower_priority_count).to_i
50
+ self.priority = [new_priority, 100].min
51
+ end
52
+ end
53
+ end
54
+ end