rocketjob 3.5.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -1
  3. data/bin/rocketjob +1 -0
  4. data/bin/rocketjob_batch_perf +11 -0
  5. data/lib/rocket_job/batch.rb +32 -0
  6. data/lib/rocket_job/batch/callbacks.rb +40 -0
  7. data/lib/rocket_job/batch/io.rb +154 -0
  8. data/lib/rocket_job/batch/logger.rb +57 -0
  9. data/lib/rocket_job/batch/lower_priority.rb +54 -0
  10. data/lib/rocket_job/batch/model.rb +157 -0
  11. data/lib/rocket_job/batch/performance.rb +99 -0
  12. data/lib/rocket_job/batch/result.rb +8 -0
  13. data/lib/rocket_job/batch/results.rb +9 -0
  14. data/lib/rocket_job/batch/state_machine.rb +102 -0
  15. data/lib/rocket_job/batch/statistics.rb +88 -0
  16. data/lib/rocket_job/batch/tabular.rb +56 -0
  17. data/lib/rocket_job/batch/tabular/input.rb +123 -0
  18. data/lib/rocket_job/batch/tabular/output.rb +59 -0
  19. data/lib/rocket_job/batch/throttle.rb +91 -0
  20. data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
  21. data/lib/rocket_job/batch/worker.rb +288 -0
  22. data/lib/rocket_job/cli.rb +29 -7
  23. data/lib/rocket_job/config.rb +1 -1
  24. data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
  25. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
  26. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
  27. data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
  28. data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
  29. data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
  31. data/lib/rocket_job/jobs/performance_job.rb +18 -0
  32. data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
  33. data/lib/rocket_job/plugins/document.rb +2 -8
  34. data/lib/rocket_job/plugins/job/persistence.rb +6 -4
  35. data/lib/rocket_job/plugins/job/throttle.rb +3 -6
  36. data/lib/rocket_job/plugins/job/worker.rb +2 -2
  37. data/lib/rocket_job/server.rb +14 -3
  38. data/lib/rocket_job/sliced/input.rb +336 -0
  39. data/lib/rocket_job/sliced/output.rb +99 -0
  40. data/lib/rocket_job/sliced/slice.rb +166 -0
  41. data/lib/rocket_job/sliced/slices.rb +166 -0
  42. data/lib/rocket_job/sliced/writer/input.rb +60 -0
  43. data/lib/rocket_job/sliced/writer/output.rb +82 -0
  44. data/lib/rocket_job/version.rb +1 -1
  45. data/lib/rocket_job/worker.rb +2 -2
  46. data/lib/rocketjob.rb +28 -0
  47. metadata +51 -62
  48. data/test/config/database.yml +0 -5
  49. data/test/config/mongoid.yml +0 -88
  50. data/test/config_test.rb +0 -10
  51. data/test/dirmon_entry_test.rb +0 -313
  52. data/test/dirmon_job_test.rb +0 -216
  53. data/test/files/text.txt +0 -3
  54. data/test/job_test.rb +0 -71
  55. data/test/jobs/housekeeping_job_test.rb +0 -102
  56. data/test/jobs/on_demand_job_test.rb +0 -59
  57. data/test/jobs/upload_file_job_test.rb +0 -107
  58. data/test/plugins/cron_test.rb +0 -166
  59. data/test/plugins/job/callbacks_test.rb +0 -166
  60. data/test/plugins/job/defaults_test.rb +0 -53
  61. data/test/plugins/job/logger_test.rb +0 -56
  62. data/test/plugins/job/model_test.rb +0 -94
  63. data/test/plugins/job/persistence_test.rb +0 -94
  64. data/test/plugins/job/state_machine_test.rb +0 -116
  65. data/test/plugins/job/throttle_test.rb +0 -111
  66. data/test/plugins/job/worker_test.rb +0 -199
  67. data/test/plugins/processing_window_test.rb +0 -109
  68. data/test/plugins/restart_test.rb +0 -193
  69. data/test/plugins/retry_test.rb +0 -88
  70. data/test/plugins/singleton_test.rb +0 -92
  71. data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
  72. data/test/plugins/state_machine_test.rb +0 -67
  73. data/test/plugins/transaction_test.rb +0 -84
  74. data/test/test_db.sqlite3 +0 -0
  75. data/test/test_helper.rb +0 -17
@@ -1,15 +1,16 @@
1
1
  require 'optparse'
2
+ require 'json'
2
3
  require 'semantic_logger'
3
4
  require 'mongoid'
4
5
  require 'rocketjob'
5
- require 'rocket_job/extensions/mongoid/factory'
6
+ require 'pathname'
6
7
  module RocketJob
7
8
  # Command Line Interface parser for Rocket Job
8
9
  class CLI
9
10
  include SemanticLogger::Loggable
10
11
  attr_accessor :name, :workers, :environment, :pidfile, :directory, :quiet,
11
12
  :log_level, :log_file, :mongo_config, :symmetric_encryption_config,
12
- :filter
13
+ :include_filter, :exclude_filter, :where_filter
13
14
 
14
15
  def initialize(argv)
15
16
  @name = nil
@@ -22,7 +23,8 @@ module RocketJob
22
23
  @log_file = nil
23
24
  @mongo_config = nil
24
25
  @symmetric_encryption_config = nil
25
- @filter = nil
26
+ @include_filter = nil
27
+ @exclude_filter = nil
26
28
  parse(argv)
27
29
  end
28
30
 
@@ -38,10 +40,13 @@ module RocketJob
38
40
  # In case Rails did not load the Mongoid Config
39
41
  RocketJob::Config.load!(environment, mongo_config, symmetric_encryption_config) if ::Mongoid::Config.clients.empty?
40
42
 
43
+ filter = build_filter
44
+
41
45
  opts = {}
42
46
  opts[:name] = name if name
43
47
  opts[:max_workers] = workers if workers
44
- opts[:filter] = {_type: filter} if filter
48
+ opts[:filter] = filter if filter
49
+
45
50
  Server.run(opts)
46
51
  end
47
52
 
@@ -91,7 +96,7 @@ module RocketJob
91
96
 
92
97
  require 'rocketjob'
93
98
  begin
94
- require 'rocketjob_pro'
99
+ require 'rocketjob_batch'
95
100
  rescue LoadError
96
101
  nil
97
102
  end
@@ -148,6 +153,17 @@ module RocketJob
148
153
  end
149
154
  end
150
155
 
156
+ # Returns [Hash] a where clause filter to apply to this server.
157
+ # Returns nil if no filter should be applied
158
+ def build_filter
159
+ raise(ArgumentError, 'Cannot supply both a filter and an exclusion filter') if include_filter && exclude_filter
160
+
161
+ filter = where_filter
162
+ (filter ||= {})['_type'] = include_filter if include_filter
163
+ (filter ||= {})['_type'] = {'$not' => exclude_filter} if exclude_filter
164
+ filter
165
+ end
166
+
151
167
  # Parse command line options placing results in the corresponding instance variables
152
168
  def parse(argv)
153
169
  parser = OptionParser.new do |o|
@@ -161,8 +177,14 @@ module RocketJob
161
177
  warn '-t and --threads are deprecated, use -w or --workers'
162
178
  @workers = arg.to_i
163
179
  end
164
- o.on('-F', '--filter REGEXP', 'Limit this worker to only those job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
165
- @filter = Regexp.new(arg, true)
180
+ o.on('-F', '--filter REGEXP', 'Limit this server to only those job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
181
+ @include_filter = Regexp.new(arg, true)
182
+ end
183
+ o.on('-E', '--exclude REGEXP', 'Prevent this server from working on any job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
184
+ @exclude_filter = Regexp.new(arg, true)
185
+ end
186
+ o.on('-W', '--where JSON', "Limit this server instance to the supplied mongo query filter. Supply as a string in JSON format. Example: '{\"priority\":{\"$lte\":25}}'") do |arg|
187
+ @where_filter = JSON.parse(arg)
166
188
  end
167
189
  o.on('-q', '--quiet', 'Do not write to stdout, only to logfile. Necessary when running as a daemon') do
168
190
  @quiet = true
@@ -55,7 +55,7 @@ module RocketJob
55
55
  raise(ArgumentError, "Mongo Configuration file: #{config_file} not found") unless config_file.file?
56
56
 
57
57
  logger.debug "Reading Mongo configuration from: #{config_file}"
58
- Mongoid.load!(config_file, environment)
58
+ ::Mongoid.load!(config_file, environment)
59
59
 
60
60
  # Load Encryption configuration file if present
61
61
  return unless defined?(SymmetricEncryption)
@@ -0,0 +1,37 @@
1
+ require 'mongoid/criteria'
2
+ require 'mongoid/document'
3
+ module RocketJob
4
+ module MongoidClients
5
+ module Options
6
+ extend ActiveSupport::Concern
7
+
8
+ def with_collection(collection_name)
9
+ self.collection_name = collection_name
10
+ self
11
+ end
12
+
13
+ def collection(parent = nil)
14
+ @collection_name ? mongo_client[@collection_name] : super(parent)
15
+ end
16
+
17
+ def collection_name
18
+ @collection_name || super
19
+ end
20
+
21
+ def collection_name=(collection_name)
22
+ @collection_name = collection_name&.to_sym
23
+ end
24
+
25
+ private
26
+
27
+ module ClassMethods
28
+ def with_collection(collection_name)
29
+ all.with_collection(collection_name)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ ::Mongoid::Criteria.include(RocketJob::MongoidClients::Options)
37
+ ::Mongoid::Document.include(RocketJob::MongoidClients::Options)
@@ -0,0 +1,17 @@
1
+ ::Mongoid::Contextual::Mongo
2
+ module Mongoid
3
+ module Contextual
4
+ class Mongo
5
+ def initialize(criteria)
6
+ @criteria, @klass, @cache = criteria, criteria.klass, criteria.options[:cache]
7
+ # Only line changed is here, get collection name from criteria, not @klass
8
+ #@collection = @klass.collection
9
+ @collection = criteria.collection
10
+
11
+ criteria.send(:merge_type_selection)
12
+ @view = collection.find(criteria.selector, session: _session)
13
+ apply_options
14
+ end
15
+ end
16
+ end
17
+ end
@@ -3,10 +3,10 @@ require 'mongoid/factory'
3
3
  module RocketJob
4
4
  # Don't convert to Mongoid::Factory since it conflicts with Mongoid use.
5
5
  module MongoidFactory
6
- def from_db(klass, attributes = nil, selected_fields = nil)
7
- super
8
- rescue NameError
9
- RocketJob::Job.instantiate(attributes, selected_fields)
6
+ def from_db(klass, attributes = nil, criteria = nil)
7
+ obj = super(klass, attributes, criteria)
8
+ obj.collection_name = criteria.collection_name if criteria
9
+ obj
10
10
  end
11
11
  end
12
12
  end
@@ -0,0 +1,38 @@
1
+ require 'mongoid/criteria'
2
+ require 'mongoid/document'
3
+ module RocketJob
4
+ module Mongoid5Clients
5
+ module Options
6
+ extend ActiveSupport::Concern
7
+
8
+ def with_collection(collection_name)
9
+ self.collection_name = collection_name
10
+ self
11
+ end
12
+
13
+ def collection
14
+ return (@klass || self.class).with(persistence_options || {}).collection unless @collection_name
15
+ (@klass || self.class).mongo_client[@collection_name]
16
+ end
17
+
18
+ def collection_name
19
+ @collection_name || super
20
+ end
21
+
22
+ def collection_name=(collection_name)
23
+ @collection_name = collection_name&.to_sym
24
+ end
25
+
26
+ private
27
+
28
+ module ClassMethods
29
+ def with_collection(collection_name)
30
+ all.with_collection(collection_name)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ ::Mongoid::Criteria.include(RocketJob::Mongoid5Clients::Options)
38
+ ::Mongoid::Document.include(RocketJob::Mongoid5Clients::Options)
@@ -0,0 +1,64 @@
1
+ ::Mongoid::Contextual::Mongo
2
+ module Mongoid
3
+ module Contextual
4
+ class Mongo
5
+ def initialize(criteria)
6
+ @criteria, @klass, @cache = criteria, criteria.klass, criteria.options[:cache]
7
+
8
+ # Only line changed is here, get collection name from criteria, not @klass
9
+ #@collection = @klass.with(criteria.persistence_options || {}).collection
10
+ @collection = criteria.collection
11
+
12
+ criteria.send(:merge_type_selection)
13
+ @view = collection.find(criteria.selector)
14
+ apply_options
15
+ end
16
+
17
+ #
18
+ # Patches below add `criteria` as the last argument to `Factory.from_db`
19
+ #
20
+ def first
21
+ return documents.first if cached? && cache_loaded?
22
+ try_cache(:first) do
23
+ if raw_doc = view.limit(-1).first
24
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
25
+ eager_load([doc]).first
26
+ end
27
+ end
28
+ end
29
+
30
+ def find_first
31
+ return documents.first if cached? && cache_loaded?
32
+ if raw_doc = view.first
33
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
34
+ eager_load([doc]).first
35
+ end
36
+ end
37
+
38
+ def last
39
+ try_cache(:last) do
40
+ with_inverse_sorting do
41
+ if raw_doc = view.limit(-1).first
42
+ doc = Factory.from_db(klass, raw_doc, criteria.options[:fields], criteria)
43
+ eager_load([doc]).first
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def documents_for_iteration
50
+ return documents if cached? && !documents.empty?
51
+ return view unless eager_loadable?
52
+ docs = view.map{ |doc| Factory.from_db(klass, doc, criteria.options[:fields], criteria) }
53
+ eager_load(docs)
54
+ end
55
+
56
+ def yield_document(document, &block)
57
+ doc = document.respond_to?(:_id) ?
58
+ document : Factory.from_db(klass, document, criteria.options[:fields], criteria)
59
+ yield(doc)
60
+ documents.push(doc) if cacheable?
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,13 @@
1
+ require 'mongoid/factory'
2
+
3
+ module RocketJob
4
+ module Mongoid5Factory
5
+ def from_db(klass, attributes = nil, selected_fields = nil, criteria = nil)
6
+ obj = super(klass, attributes, selected_fields)
7
+ obj.collection_name = criteria.collection_name if criteria
8
+ obj
9
+ end
10
+ end
11
+ end
12
+
13
+ ::Mongoid::Factory.extend(RocketJob::Mongoid5Factory)
@@ -0,0 +1,127 @@
1
+ # Generalized Batch Job.
2
+ #
3
+ # Often used for data correction or cleansing.
4
+ #
5
+ # Example: Iterate over all rows in a table:
6
+ # code = <<-CODE
7
+ # if user = User.find(row)
8
+ # user.cleanse_attributes!
9
+ # user.save(validate: false)
10
+ # end
11
+ # CODE
12
+ # job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
13
+ # arel = User.unscoped.all.order('updated_at DESC')
14
+ # job.record_count = input.upload_arel(arel)
15
+ # job.save!
16
+ #
17
+ # Console Testing:
18
+ # code = <<-CODE
19
+ # if user = User.find(row)
20
+ # user.cleanse_attributes!
21
+ # user.save(validate: false)
22
+ # end
23
+ # CODE
24
+ # job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
25
+ #
26
+ # # Run against a sub-set using a limit
27
+ # arel = User.unscoped.all.order('updated_at DESC').limit(100)
28
+ # job.record_count = job.input.upload_arel(arel)
29
+ #
30
+ # # Run the subset directly within the console
31
+ # job.perform_now
32
+ # job.cleanup!
33
+ #
34
+ # By default output is not collected, add the option `collect_output: true` to collect output.
35
+ # Example:
36
+ # job = RocketJob::Jobs::OnDemandBatchJob(description: 'Fix data', code: code, throttle_running_slices: 5, priority: 30, collect_output: true)
37
+ #
38
+ # Example: Move the upload operation into a before_batch.
39
+ # upload_code = <<-CODE
40
+ # arel = User.unscoped.all.order('updated_at DESC')
41
+ # self.record_count = input.upload_arel(arel)
42
+ # CODE
43
+ #
44
+ # code = <<-CODE
45
+ # if user = User.find(row)
46
+ # user.cleanse_attributes!
47
+ # user.save(validate: false)
48
+ # end
49
+ # CODE
50
+ #
51
+ # RocketJob::Jobs::OnDemandBatchJob.create!(
52
+ # upload_code: upload_code,
53
+ # code: code,
54
+ # description: 'cleanse users'
55
+ # )
56
+ module RocketJob
57
+ module Jobs
58
+ class OnDemandBatchJob < RocketJob::Job
59
+ include RocketJob::Plugins::Cron
60
+ include RocketJob::Batch
61
+ include RocketJob::Batch::Statistics
62
+
63
+ self.priority = 90
64
+ self.description = 'Batch Job'
65
+ self.destroy_on_complete = false
66
+
67
+ # Code that is performed against every row / record.
68
+ field :code, type: String
69
+
70
+ # Optional code to execute before the batch is run.
71
+ # Usually to upload data into the job.
72
+ field :before_code, type: String
73
+
74
+ # Optional code to execute after the batch is run.
75
+ # Usually to upload data into the job.
76
+ field :after_code, type: String
77
+
78
+ # Data that is made available to the job during the perform.
79
+ # Be sure to store key names only as Strings, not Symbols.
80
+ field :data, type: Hash, default: {}
81
+
82
+ validates :code, presence: true
83
+ validate :validate_code
84
+ validate :validate_before_code
85
+ validate :validate_after_code
86
+
87
+ before_slice :load_perform_code
88
+ before_batch :run_before_code
89
+ after_batch :run_after_code
90
+
91
+ private
92
+
93
+ def load_perform_code
94
+ instance_eval("def perform(row)\n#{code}\nend")
95
+ end
96
+
97
+ def run_before_code
98
+ instance_eval(before_code) if before_code
99
+ end
100
+
101
+ def run_after_code
102
+ instance_eval(after_code) if after_code
103
+ end
104
+
105
+ def validate_code
106
+ return if code.nil?
107
+ validate_field(:code) { load_perform_code }
108
+ end
109
+
110
+ def validate_before_code
111
+ return if before_code.nil?
112
+ validate_field(:before_code) { instance_eval("def __before_code\n#{before_code}\nend") }
113
+ end
114
+
115
+ def validate_after_code
116
+ return if after_code.nil?
117
+ validate_field(:after_code) { instance_eval("def __after_code\n#{after_code}\nend") }
118
+ end
119
+
120
+ def validate_field(field)
121
+ yield
122
+ rescue Exception => exc
123
+ errors.add(field, "Failed to load :#{field}, #{exc.inspect}")
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,18 @@
1
+ module RocketJob
2
+ module Jobs
3
+ class PerformanceJob < RocketJob::Job
4
+ include RocketJob::Batch
5
+
6
+ # Define the job's default attributes
7
+ self.description = 'Performance Test'
8
+ self.priority = 5
9
+ self.slice_size = 100
10
+ self.destroy_on_complete = false
11
+
12
+ # No operation, just return the supplied line (record)
13
+ def perform(line)
14
+ line
15
+ end
16
+ end
17
+ end
18
+ end
@@ -60,11 +60,8 @@ module RocketJob
60
60
 
61
61
  def upload_file(job)
62
62
  if job.respond_to?(:upload)
63
- if original_file_name && defined?(IOStreams)
64
- streams = IOStreams.streams_for_file_name(original_file_name)
65
- job.upload(upload_file_name, streams: streams)
66
- # job.upload sets the archived filename, we want it to be the original file name.
67
- job.upload_file_name = original_file_name
63
+ if original_file_name
64
+ job.upload(upload_file_name, file_name: original_file_name)
68
65
  else
69
66
  job.upload(upload_file_name)
70
67
  end