rocketjob 3.5.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd3505de7a318efb53880ef4c0f506a970d1ee30277f531bef7e2a9f257cf500
4
- data.tar.gz: '085e0371c60662b51ba53c4e5e6e1b2e016cdc2e37de1b51a437e5477f907a22'
3
+ metadata.gz: c3d1d8a5bd37991ac8ad2dd5ae28b833dc4cb59d69da1a1cc281c904ab15c3eb
4
+ data.tar.gz: 39336f01d701f34e4f25e00e03fa5f06299ec19d1f7c0b52af5aff6ae4b4327b
5
5
  SHA512:
6
- metadata.gz: a9a9441957a5405ed1d3d98bbe5bf0a45febbbdf8283c09de967f19abd1e5a7470cc871d66b804f8569ad90b48732fcd4d649e3d0c282b60be2b000bdd60adc8
7
- data.tar.gz: d47b9c8a4eba0fd88798b76926da8c4073b0c26c6743ae3b54883535f278ea3317d6fb7901d57caf8506f710b5fdb7151b9acae65e7e2427bf2607a0c926231a
6
+ metadata.gz: 42266518d00516c62ccd3d0139e6fa22427f48b14454a2f06f0c04f559620b9bbf71700f38b518db03b8290cd6ab7ad7353c47cfbb579aa2f5642ef2a9b2cfb5
7
+ data.tar.gz: 444e848667f4a09629b18b467713fe9ae26abb22fd68ceb9048e0adb617c3bbe066ec407c9922577f9ea6bc40271a9d6c84d6064456e5e0532ddfa8f0be85a69
data/README.md CHANGED
@@ -17,6 +17,59 @@ Checkout http://rocketjob.io/
17
17
  * Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
18
18
  * [Report bugs](https://github.com/rocketjob/rocketjob/issues)
19
19
 
20
+ ## Rocket Job 4
21
+
22
+ Rocket Job Pro is now open sourced and included within Rocket Job.
23
+
24
+ The `RocketJob::Batch` plugin now adds batch processing capabilites to break up a single task into many
25
+ concurrent workers processing slices of the entire job at the same time.
26
+
27
+
28
+ Example:
29
+
30
+ ```ruby
31
+ class MyJob < RocketJob::Job
32
+ include RocketJob::Batch
33
+
34
+ self.description = "Reverse names"
35
+ self.destroy_on_complete = false
36
+ self.collect_output = true
37
+
38
+ # Method to call by all available workers at the same time.
39
+ # Reverse the characters for each line:
40
+ def perform(line)
41
+ line.reverse
42
+ end
43
+ end
44
+ ```
45
+
46
+ Upload a file for processing, for example `names.csv` which could contain:
47
+
48
+ ```
49
+ jack
50
+ jane
51
+ bill
52
+ john
53
+ blake
54
+ chris
55
+ dave
56
+ marc
57
+ ```
58
+
59
+ To queue the above job for processing:
60
+
61
+ ```ruby
62
+ job = MyJob.new
63
+ job.upload('names.csv')
64
+ job.save!
65
+ ```
66
+
67
+ Once the job has completed, download the results into a file:
68
+
69
+ ```ruby
70
+ job.download('names_reversed.csv')
71
+ ```
72
+
20
73
  ## Contributing to the documentation
21
74
 
22
75
  To contribute to the documentation it is as easy as forking the repository
@@ -99,6 +152,14 @@ Rocket Job is tested and supported on the following Ruby platforms:
99
152
  - Ruby 2.1, 2.2, 2.3, 2.4, and above
100
153
  - JRuby 9.0.5 and above
101
154
 
155
+ ## Dependencies
156
+
157
+ * [MongoDB](https://www.mongodb.org)
158
+ * Persists job information.
159
+ * Version 2.7 or greater.
160
+ * [Semantic Logger](https://rocketjob.github.io/semantic_logger)
161
+ * Highly concurrent scalable logging.
162
+
102
163
  ## Versioning
103
164
 
104
165
  This project uses [Semantic Versioning](http://semver.org/).
@@ -109,4 +170,5 @@ This project uses [Semantic Versioning](http://semver.org/).
109
170
 
110
171
  ## Contributors
111
172
 
112
- * [Chris Lamb](https://github.com/lambcr)
173
+ [Contributors](https://github.com/rocketjob/rocketjob/graphs/contributors)
174
+
@@ -7,6 +7,7 @@ require 'rocket_job/cli'
7
7
  begin
8
8
  RocketJob::CLI.new(ARGV).run
9
9
  rescue Exception => exc
10
+ return if exc.class == SystemExit
10
11
  # Failsafe logger that writes to STDERR
11
12
  SemanticLogger.add_appender(io: STDERR, level: :error, formatter: :color)
12
13
  SemanticLogger['RocketJob'].error('Rocket Job shutting down due to exception', exc)
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rocketjob_batch'
3
+
4
+ # Log to console
5
+ SemanticLogger.add_appender(io: STDOUT, formatter: :color)
6
+
7
+ perf = RocketJob::Batch::Performance.new
8
+ perf.parse(ARGV)
9
+ RocketJob::Config.load!(perf.environment, perf.mongo_config)
10
+ results = perf.run_test_case
11
+ p results
@@ -0,0 +1,32 @@
1
+ require 'active_support/concern'
2
+ require 'rocket_job/batch/callbacks'
3
+ require 'rocket_job/batch/io'
4
+ require 'rocket_job/batch/logger'
5
+ require 'rocket_job/batch/model'
6
+ require 'rocket_job/batch/state_machine'
7
+ require 'rocket_job/batch/throttle'
8
+ require 'rocket_job/batch/throttle_running_slices'
9
+ require 'rocket_job/batch/worker'
10
+
11
+ module RocketJob
12
+ module Batch
13
+ extend ActiveSupport::Concern
14
+
15
+ include Model
16
+ include StateMachine
17
+ include Callbacks
18
+ include Logger
19
+ include Worker
20
+ include Throttle
21
+ include ThrottleRunningSlices
22
+ include IO
23
+
24
+ autoload :LowerPriority, 'rocket_job/batch/lower_priority'
25
+ autoload :Performance, 'rocket_job/batch/performance'
26
+ autoload :Statistics, 'rocket_job/batch/statistics'
27
+ autoload :Result, 'rocket_job/batch/result'
28
+ autoload :Results, 'rocket_job/batch/results'
29
+ autoload :Tabular, 'rocket_job/batch/tabular'
30
+ end
31
+ end
32
+
@@ -0,0 +1,40 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Callbacks
6
+ extend ActiveSupport::Concern
7
+ include ActiveSupport::Callbacks
8
+
9
+ included do
10
+ define_callbacks :slice
11
+
12
+ def self.before_slice(*filters, &blk)
13
+ set_callback(:slice, :before, *filters, &blk)
14
+ end
15
+
16
+ def self.after_slice(*filters, &blk)
17
+ set_callback(:slice, :after, *filters, &blk)
18
+ end
19
+
20
+ def self.around_slice(*filters, &blk)
21
+ set_callback(:slice, :around, *filters, &blk)
22
+ end
23
+
24
+ # before_batch and after_batch are called asynchronously.
25
+ # around_batch is not supported.
26
+ define_callbacks :before_batch
27
+ define_callbacks :after_batch
28
+
29
+ def self.before_batch(*filters, &blk)
30
+ set_callback(:before_batch, :before, *filters, &blk)
31
+ end
32
+
33
+ def self.after_batch(*filters, &blk)
34
+ set_callback(:after_batch, :after, *filters, &blk)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,154 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ # IO methods for sliced jobs
6
+ module IO
7
+ extend ActiveSupport::Concern
8
+
9
+ # Returns [RocketJob::Sliced::Input] input collection for holding input slices
10
+ #
11
+ # Parameters:
12
+ # category [Symbol]
13
+ # The name of the category to access or upload data into
14
+ # Default: None ( Uses the single default input collection for this job )
15
+ # Validates: This value must be one of those listed in #input_categories
16
+ def input(category = :main)
17
+ raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}" unless input_categories.include?(category) || (category == :main)
18
+
19
+ collection_name = "rocket_job.inputs.#{id}"
20
+ collection_name << ".#{category}" unless category == :main
21
+
22
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
23
+ end
24
+
25
+ # Returns [RocketJob::Sliced::Output] output collection for holding output slices
26
+ # Returns nil if no output is being collected
27
+ #
28
+ # Parameters:
29
+ # category [Symbol]
30
+ # The name of the category to access or download data from
31
+ # Default: None ( Uses the single default output collection for this job )
32
+ # Validates: This value must be one of those listed in #output_categories
33
+ def output(category = :main)
34
+ raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}" unless output_categories.include?(category) || (category == :main)
35
+
36
+ collection_name = "rocket_job.outputs.#{id}"
37
+ collection_name << ".#{category}" unless category == :main
38
+
39
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
40
+ end
41
+
42
+ # Upload the supplied file_name or stream
43
+ #
44
+ # Updates the record_count after adding the records
45
+ #
46
+ # Options
47
+ # :file_name [String]
48
+ # When file_name_or_io is an IO, the original base file name if any.
49
+ # Default: nil
50
+ #
51
+ # See RocketJob::Sliced::Input#upload for remaining options
52
+ #
53
+ # Returns [Integer] the number of records uploaded
54
+ #
55
+ # Note:
56
+ # * Not thread-safe. Only call from one thread at a time
57
+ def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
58
+ if file_name
59
+ self.upload_file_name = file_name
60
+ elsif file_name_or_io.is_a?(String)
61
+ self.upload_file_name = file_name_or_io
62
+ end
63
+ count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
64
+ self.record_count = (record_count || 0) + count
65
+ count
66
+ end
67
+
68
+ # Upload the supplied slices for processing by workers
69
+ #
70
+ # Updates the record_count after adding the records
71
+ #
72
+ # Returns [Integer] the number of records uploaded
73
+ #
74
+ # Parameters
75
+ # `slice` [ Array<Hash | Array | String | Integer | Float | Symbol | Regexp | Time> ]
76
+ # All elements in `array` must be serializable to BSON
77
+ # For example the following types are not supported: Date
78
+ #
79
+ # Note:
80
+ # The caller should honor `:slice_size`, the entire slice is loaded as-is.
81
+ #
82
+ # Note:
83
+ # Not thread-safe. Only call from one thread at a time
84
+ def upload_slice(slice)
85
+ input.insert(slice)
86
+ count = slice.size
87
+ self.record_count = (record_count || 0) + count
88
+ count
89
+ end
90
+
91
+ # Download the output data into the supplied file_name or stream
92
+ #
93
+ # Parameters
94
+ # file_name_or_io [String|IO]
95
+ # The file_name of the file to write to, or an IO Stream that implements #write.
96
+ #
97
+ # options:
98
+ # category [Symbol]
99
+ # The category of output to download
100
+ # Default: :main
101
+ #
102
+ # See RocketJob::Sliced::Output#download for remaining options
103
+ #
104
+ # Returns [Integer] the number of records downloaded
105
+ def download(file_name_or_io = nil, category: :main, **args, &block)
106
+ raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
107
+
108
+ output(category).download(file_name_or_io, **args, &block)
109
+ end
110
+
111
+ # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
112
+ #
113
+ # If a block is supplied, the block is supplied with a writer that should be used to
114
+ # accumulate the results.
115
+ #
116
+ # Examples
117
+ #
118
+ # job.write_output('hello world')
119
+ #
120
+ # job.write_output do |writer|
121
+ # writer << 'hello world'
122
+ # end
123
+ #
124
+ # job.write_output do |writer|
125
+ # result = RocketJob::Batch::Results
126
+ # result << RocketJob::Batch::Result.new(:main, 'hello world')
127
+ # result << RocketJob::Batch::Result.new(:errors, 'errors')
128
+ # writer << result
129
+ # end
130
+ #
131
+ # result = RocketJob::Batch::Results
132
+ # result << RocketJob::Batch::Result.new(:main, 'hello world')
133
+ # result << RocketJob::Batch::Result.new(:errors, 'errors')
134
+ # job.write_output(result)
135
+ def write_output(result = nil, input_slice = nil, &block)
136
+ if block
137
+ RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
138
+ else
139
+ raise(ArgumentError, 'result parameter is required when no block is supplied') unless result
140
+ RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
141
+ end
142
+ end
143
+
144
+ private
145
+
146
+ def slice_arguments(collection_name)
147
+ {
148
+ collection_name: collection_name,
149
+ slice_size: slice_size
150
+ }
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,57 @@
1
+ require 'active_support/concern'
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Logger
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Log all state transitions
10
+ after_start :rocket_job_batch_log_state_change
11
+ after_complete :rocket_job_batch_log_state_change
12
+ after_fail :rocket_job_batch_log_state_change
13
+ after_retry :rocket_job_batch_log_state_change
14
+ after_pause :rocket_job_batch_log_state_change
15
+ after_resume :rocket_job_batch_log_state_change
16
+ after_abort :rocket_job_batch_log_state_change
17
+ after_requeue :rocket_job_batch_log_state_change
18
+
19
+ around_slice :rocket_job_batch_slice_logger
20
+
21
+ # Remove perform level logger and replace with slice level logger
22
+ skip_callback(:perform, :around, :rocket_job_around_logger)
23
+ end
24
+
25
+ private
26
+
27
+ # Add logging around processing of each slice
28
+ # - metric allows duration to be forwarded to statsd, etc.
29
+ # - log_exception logs entire exception if raised
30
+ # - on_exception_level changes log level from info to error on exception
31
+ # - silence noisy jobs by raising log level
32
+ def rocket_job_batch_slice_logger(&block)
33
+ logger.measure_info(
34
+ 'Completed slice',
35
+ metric: "#{self.class.name}/slice",
36
+ log_exception: :full,
37
+ on_exception_level: :error,
38
+ silence: log_level,
39
+ payload: {records: rocket_job_slice&.size},
40
+ &block
41
+ )
42
+ end
43
+
44
+ def rocket_job_batch_log_state_change
45
+ logger.info(aasm.current_event.to_s.camelcase, rocket_job_batch_log_payload)
46
+ end
47
+
48
+ def rocket_job_batch_log_payload
49
+ {
50
+ from: aasm.from_state,
51
+ to: aasm.to_state,
52
+ event: aasm.current_event
53
+ }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,54 @@
1
+ require 'active_support/concern'
2
+ module RocketJob
3
+ module Batch
4
+ # Automatically lower the priority for Jobs with a higher record_count.
5
+ #
6
+ # Note:
7
+ # - Add `:lower_priority` as a before_batch, but only once the `record_count` has been set.
8
+ # - If the `record_count` is not set by the time this plugins `before_batch`
9
+ # is called, then the priority will not be modified.
10
+ #
11
+ # class SampleJob < RocketJob::Job
12
+ # include RocketJob::Plugins::Batch
13
+ # include RocketJob::Plugins::Batch::LowerPriority
14
+ #
15
+ # before_batch :upload_data, :lower_priority
16
+ #
17
+ # def perform(record)
18
+ # record.reverse
19
+ # end
20
+ #
21
+ # private
22
+ #
23
+ # def upload_data
24
+ # upload do |stream|
25
+ # stream << 'abc'
26
+ # stream << 'def'
27
+ # stream << 'ghi'
28
+ # end
29
+ # end
30
+ # end
31
+ module LowerPriority
32
+ extend ActiveSupport::Concern
33
+
34
+ included do
35
+ unless public_method_defined?(:record_count=)
36
+ raise(ArgumentError, 'LowerPriority can only be used in conjunction with RocketJob::Plugins::Batch')
37
+ end
38
+
39
+ # For each of this many records lower the priority by 1.
40
+ class_attribute :lower_priority_count
41
+ self.lower_priority_count = 100_000
42
+ end
43
+
44
+ private
45
+
46
+ def lower_priority
47
+ return unless record_count
48
+
49
+ new_priority = priority + (record_count.to_f / lower_priority_count).to_i
50
+ self.priority = [new_priority, 100].min
51
+ end
52
+ end
53
+ end
54
+ end