rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -1,15 +1,16 @@
1
1
  require 'optparse'
2
+ require 'json'
2
3
  require 'semantic_logger'
3
4
  require 'mongoid'
4
5
  require 'rocketjob'
5
- require 'rocket_job/extensions/mongoid/factory'
6
+ require 'pathname'
6
7
  module RocketJob
7
8
  # Command Line Interface parser for Rocket Job
8
9
  class CLI
9
10
  include SemanticLogger::Loggable
10
11
  attr_accessor :name, :workers, :environment, :pidfile, :directory, :quiet,
11
12
  :log_level, :log_file, :mongo_config, :symmetric_encryption_config,
12
- :filter
13
+ :include_filter, :exclude_filter, :where_filter
13
14
 
14
15
  def initialize(argv)
15
16
  @name = nil
@@ -22,7 +23,8 @@ module RocketJob
22
23
  @log_file = nil
23
24
  @mongo_config = nil
24
25
  @symmetric_encryption_config = nil
25
- @filter = nil
26
+ @include_filter = nil
27
+ @exclude_filter = nil
26
28
  parse(argv)
27
29
  end
28
30
 
@@ -38,10 +40,13 @@ module RocketJob
38
40
  # In case Rails did not load the Mongoid Config
39
41
  RocketJob::Config.load!(environment, mongo_config, symmetric_encryption_config) if ::Mongoid::Config.clients.empty?
40
42
 
43
+ filter = build_filter
44
+
41
45
  opts = {}
42
46
  opts[:name] = name if name
43
47
  opts[:max_workers] = workers if workers
44
- opts[:filter] = {_type: filter} if filter
48
+ opts[:filter] = filter if filter
49
+
45
50
  Server.run(opts)
46
51
  end
47
52
 
@@ -91,7 +96,7 @@ module RocketJob
91
96
 
92
97
  require 'rocketjob'
93
98
  begin
94
- require 'rocketjob_pro'
99
+ require 'rocketjob_batch'
95
100
  rescue LoadError
96
101
  nil
97
102
  end
@@ -148,6 +153,17 @@ module RocketJob
148
153
  end
149
154
  end
150
155
 
156
+ # Returns [Hash] a where clause filter to apply to this server.
157
+ # Returns nil if no filter should be applied
158
+ def build_filter
159
+ raise(ArgumentError, 'Cannot supply both a filter and an exclusion filter') if include_filter && exclude_filter
160
+
161
+ filter = where_filter
162
+ (filter ||= {})['_type'] = include_filter if include_filter
163
+ (filter ||= {})['_type'] = {'$not' => exclude_filter} if exclude_filter
164
+ filter
165
+ end
166
+
151
167
  # Parse command line options placing results in the corresponding instance variables
152
168
  def parse(argv)
153
169
  parser = OptionParser.new do |o|
@@ -161,8 +177,14 @@ module RocketJob
161
177
  warn '-t and --threads are deprecated, use -w or --workers'
162
178
  @workers = arg.to_i
163
179
  end
164
- o.on('-F', '--filter REGEXP', 'Limit this worker to only those job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
165
- @filter = Regexp.new(arg, true)
180
+ o.on('-F', '--filter REGEXP', 'Limit this server to only those job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
181
+ @include_filter = Regexp.new(arg, true)
182
+ end
183
+ o.on('-E', '--exclude REGEXP', 'Prevent this server from working on any job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
184
+ @exclude_filter = Regexp.new(arg, true)
185
+ end
186
+ o.on('-W', '--where JSON', "Limit this server instance to the supplied mongo query filter. Supply as a string in JSON format. Example: '{\"priority\":{\"$lte\":25}}'") do |arg|
187
+ @where_filter = JSON.parse(arg)
166
188
  end
167
189
  o.on('-q', '--quiet', 'Do not write to stdout, only to logfile. Necessary when running as a daemon') do
168
190
  @quiet = true
@@ -55,7 +55,7 @@ module RocketJob
55
55
  raise(ArgumentError, "Mongo Configuration file: #{config_file} not found") unless config_file.file?
56
56
 
57
57
  logger.debug "Reading Mongo configuration from: #{config_file}"
58
- Mongoid.load!(config_file, environment)
58
+ ::Mongoid.load!(config_file, environment)
59
59
 
60
60
  # Load Encryption configuration file if present
61
61
  return unless defined?(SymmetricEncryption)
@@ -0,0 +1,37 @@
1
+ require 'mongoid/criteria'
2
+ require 'mongoid/document'
3
+ module RocketJob
4
+ module MongoidClients
5
+ module Options
6
+ extend ActiveSupport::Concern
7
+
8
+ def with_collection(collection_name)
9
+ self.collection_name = collection_name
10
+ self
11
+ end
12
+
13
+ def collection(parent = nil)
14
+ @collection_name ? mongo_client[@collection_name] : super(parent)
15
+ end
16
+
17
+ def collection_name
18
+ @collection_name || super
19
+ end
20
+
21
+ def collection_name=(collection_name)
22
+ @collection_name = collection_name&.to_sym
23
+ end
24
+
25
+ private
26
+
27
+ module ClassMethods
28
+ def with_collection(collection_name)
29
+ all.with_collection(collection_name)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ ::Mongoid::Criteria.include(RocketJob::MongoidClients::Options)
37
+ ::Mongoid::Document.include(RocketJob::MongoidClients::Options)
@@ -0,0 +1,17 @@
1
+ ::Mongoid::Contextual::Mongo
2
+ module Mongoid
3
+ module Contextual
4
+ class Mongo
5
+ def initialize(criteria)
6
+ @criteria, @klass, @cache = criteria, criteria.klass, criteria.options[:cache]
7
+ # Only line changed is here, get collection name from criteria, not @klass
8
+ #@collection = @klass.collection
9
+ @collection = criteria.collection
10
+
11
+ criteria.send(:merge_type_selection)
12
+ @view = collection.find(criteria.selector, session: _session)
13
+ apply_options
14
+ end
15
+ end
16
+ end
17
+ end
@@ -3,10 +3,10 @@ require 'mongoid/factory'
3
3
  module RocketJob
4
4
  # Don't convert to Mongoid::Factory since it conflicts with Mongoid use.
5
5
  module MongoidFactory
6
- def from_db(klass, attributes = nil, selected_fields = nil)
7
- super
8
- rescue NameError
9
- RocketJob::Job.instantiate(attributes, selected_fields)
6
+ def from_db(klass, attributes = nil, criteria = nil)
7
+ obj = super(klass, attributes, criteria)
8
+ obj.collection_name = criteria.collection_name if criteria
9
+ obj
10
10
  end
11
11
  end
12
12
  end
@@ -0,0 +1,38 @@
1
+ require 'mongoid/criteria'
2
+ require 'mongoid/document'
3
+ module RocketJob
4
+ module Mongoid5Clients
5
+ module Options
6
+ extend ActiveSupport::Concern
7
+
8
+ def with_collection(collection_name)
9
+ self.collection_name = collection_name
10
+ self
11
+ end
12
+
13
+ def collection
14
+ return (@klass || self.class).with(persistence_options || {}).collection unless @collection_name
15
+ (@klass || self.class).mongo_client[@collection_name]
16
+ end
17
+
18
+ def collection_name
19
+ @collection_name || super
20
+ end
21
+
22
+ def collection_name=(collection_name)
23
+ @collection_name = collection_name&.to_sym
24
+ end
25
+
26
+ private
27
+
28
+ module ClassMethods
29
+ def with_collection(collection_name)
30
+ all.with_collection(collection_name)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ ::Mongoid::Criteria.include(RocketJob::Mongoid5Clients::Options)
38
+ ::Mongoid::Document.include(RocketJob::Mongoid5Clients::Options)
@@ -0,0 +1,64 @@
1
+ ::Mongoid::Contextual::Mongo
2
+ module Mongoid
3
+ module Contextual
4
+ class Mongo
5
+ def initialize(criteria)
6
+ @criteria, @klass, @cache = criteria, criteria.klass, criteria.options[:cache]
7
+
8
+ # Only line changed is here, get collection name from criteria, not @klass
9
+ #@collection = @klass.with(criteria.persistence_options || {}).collection
10
+ @collection = criteria.collection
11
+
12
+ criteria.send(:merge_type_selection)
13
+ @view = collection.find(criteria.selector)
14
+ apply_options
15
+ end
16
+
17
+ #
18
+ # Patches below add `criteria` as the last argument to `Factory.from_db`
19
+ #
20
+ def first
21
+ return documents.first if cached? && cache_loaded?
22
+ try_cache(:first) do
23
+ if raw_doc = view.limit(-1).first
24
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
25
+ eager_load([doc]).first
26
+ end
27
+ end
28
+ end
29
+
30
+ def find_first
31
+ return documents.first if cached? && cache_loaded?
32
+ if raw_doc = view.first
33
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
34
+ eager_load([doc]).first
35
+ end
36
+ end
37
+
38
+ def last
39
+ try_cache(:last) do
40
+ with_inverse_sorting do
41
+ if raw_doc = view.limit(-1).first
42
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
43
+ eager_load([doc]).first
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def documents_for_iteration
50
+ return documents if cached? && !documents.empty?
51
+ return view unless eager_loadable?
52
+ docs = view.map{ |doc| Factory.from_db(klass, doc, criteria.options[:fields], criteria) }
53
+ eager_load(docs)
54
+ end
55
+
56
+ def yield_document(document, &block)
57
+ doc = document.respond_to?(:_id) ?
58
+ document : Factory.from_db(klass, document, criteria.options[:fields], criteria)
59
+ yield(doc)
60
+ documents.push(doc) if cacheable?
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,13 @@
1
+ require 'mongoid/factory'
2
+
3
+ module RocketJob
4
+ module Mongoid5Factory
5
+ def from_db(klass, attributes = nil, selected_fields = nil, criteria = nil)
6
+ obj = super(klass, attributes, selected_fields)
7
+ obj.collection_name = criteria.collection_name if criteria
8
+ obj
9
+ end
10
+ end
11
+ end
12
+
13
+ ::Mongoid::Factory.extend(RocketJob::Mongoid5Factory)
@@ -0,0 +1,127 @@
1
+ # Generalized Batch Job.
2
+ #
3
+ # Often used for data correction or cleansing.
4
+ #
5
+ # Example: Iterate over all rows in a table:
6
+ # code = <<-CODE
7
+ # if user = User.find(row)
8
+ # user.cleanse_attributes!
9
+ # user.save(validate: false)
10
+ # end
11
+ # CODE
12
+ # job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
13
+ # arel = User.unscoped.all.order('updated_at DESC')
14
+ # job.record_count = input.upload_arel(arel)
15
+ # job.save!
16
+ #
17
+ # Console Testing:
18
+ # code = <<-CODE
19
+ # if user = User.find(row)
20
+ # user.cleanse_attributes!
21
+ # user.save(validate: false)
22
+ # end
23
+ # CODE
24
+ # job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
25
+ #
26
+ # # Run against a sub-set using a limit
27
+ # arel = User.unscoped.all.order('updated_at DESC').limit(100)
28
+ # job.record_count = job.input.upload_arel(arel)
29
+ #
30
+ # # Run the subset directly within the console
31
+ # job.perform_now
32
+ # job.cleanup!
33
+ #
34
+ # By default output is not collected, add the option `collect_output: true` to collect output.
35
+ # Example:
36
+ # job = RocketJob::Jobs::OnDemandBatchJob(description: 'Fix data', code: code, throttle_running_slices: 5, priority: 30, collect_output: true)
37
+ #
38
+ # Example: Move the upload operation into a before_batch.
39
+ # upload_code = <<-CODE
40
+ # arel = User.unscoped.all.order('updated_at DESC')
41
+ # self.record_count = input.upload_arel(arel)
42
+ # CODE
43
+ #
44
+ # code = <<-CODE
45
+ # if user = User.find(row)
46
+ # user.cleanse_attributes!
47
+ # user.save(validate: false)
48
+ # end
49
+ # CODE
50
+ #
51
+ # RocketJob::Jobs::OnDemandBatchJob.create!(
52
+ # upload_code: upload_code,
53
+ # code: code,
54
+ # description: 'cleanse users'
55
+ # )
56
+ module RocketJob
57
+ module Jobs
58
+ class OnDemandBatchJob < RocketJob::Job
59
+ include RocketJob::Plugins::Cron
60
+ include RocketJob::Batch
61
+ include RocketJob::Batch::Statistics
62
+
63
+ self.priority = 90
64
+ self.description = 'Batch Job'
65
+ self.destroy_on_complete = false
66
+
67
+ # Code that is performed against every row / record.
68
+ field :code, type: String
69
+
70
+ # Optional code to execute before the batch is run.
71
+ # Usually to upload data into the job.
72
+ field :before_code, type: String
73
+
74
+ # Optional code to execute after the batch is run.
75
+ # Usually to upload data into the job.
76
+ field :after_code, type: String
77
+
78
+ # Data that is made available to the job during the perform.
79
+ # Be sure to store key names only as Strings, not Symbols.
80
+ field :data, type: Hash, default: {}
81
+
82
+ validates :code, presence: true
83
+ validate :validate_code
84
+ validate :validate_before_code
85
+ validate :validate_after_code
86
+
87
+ before_slice :load_perform_code
88
+ before_batch :run_before_code
89
+ after_batch :run_after_code
90
+
91
+ private
92
+
93
+ def load_perform_code
94
+ instance_eval("def perform(row)\n#{code}\nend")
95
+ end
96
+
97
+ def run_before_code
98
+ instance_eval(before_code) if before_code
99
+ end
100
+
101
+ def run_after_code
102
+ instance_eval(after_code) if after_code
103
+ end
104
+
105
+ def validate_code
106
+ return if code.nil?
107
+ validate_field(:code) { load_perform_code }
108
+ end
109
+
110
+ def validate_before_code
111
+ return if before_code.nil?
112
+ validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend") }
113
+ end
114
+
115
+ def validate_after_code
116
+ return if after_code.nil?
117
+ validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend") }
118
+ end
119
+
120
+ def validate_field(field)
121
+ yield
122
+ rescue Exception => exc
123
+ errors.add(field, "Failed to load :#{field}, #{exc.inspect}")
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,18 @@
1
+ module RocketJob
2
+ module Jobs
3
+ class PerformanceJob < RocketJob::Job
4
+ include RocketJob::Batch
5
+
6
+ # Define the job's default attributes
7
+ self.description = 'Performance Test'
8
+ self.priority = 5
9
+ self.slice_size = 100
10
+ self.destroy_on_complete = false
11
+
12
+ # No operation, just return the supplied line (record)
13
+ def perform(line)
14
+ line
15
+ end
16
+ end
17
+ end
18
+ end
@@ -60,11 +60,8 @@ module RocketJob
60
60
 
61
61
  def upload_file(job)
62
62
  if job.respond_to?(:upload)
63
- if original_file_name && defined?(IOStreams)
64
- streams = IOStreams.streams_for_file_name(original_file_name)
65
- job.upload(upload_file_name, streams: streams)
66
- # job.upload sets the archived filename, we want it to be the original file name.
67
- job.upload_file_name = original_file_name
63
+ if original_file_name
64
+ job.upload(upload_file_name, file_name: original_file_name)
68
65
  else
69
66
  job.upload(upload_file_name)
70
67
  end