rocketjob 5.4.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +175 -5
- data/bin/rocketjob_batch_perf +1 -1
- data/bin/rocketjob_perf +1 -1
- data/lib/rocket_job/batch/categories.rb +345 -0
- data/lib/rocket_job/batch/io.rb +174 -106
- data/lib/rocket_job/batch/model.rb +20 -68
- data/lib/rocket_job/batch/performance.rb +19 -7
- data/lib/rocket_job/batch/statistics.rb +34 -12
- data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
- data/lib/rocket_job/batch/worker.rb +31 -26
- data/lib/rocket_job/batch.rb +3 -1
- data/lib/rocket_job/category/base.rb +81 -0
- data/lib/rocket_job/category/input.rb +170 -0
- data/lib/rocket_job/category/output.rb +34 -0
- data/lib/rocket_job/cli.rb +25 -17
- data/lib/rocket_job/dirmon_entry.rb +23 -13
- data/lib/rocket_job/event.rb +1 -1
- data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
- data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
- data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
- data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
- data/lib/rocket_job/jobs/conversion_job.rb +43 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
- data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
- data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
- data/lib/rocket_job/jobs/performance_job.rb +3 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
- data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
- data/lib/rocket_job/lookup_collection.rb +69 -0
- data/lib/rocket_job/plugins/cron.rb +60 -20
- data/lib/rocket_job/plugins/job/model.rb +25 -50
- data/lib/rocket_job/plugins/job/persistence.rb +36 -0
- data/lib/rocket_job/plugins/job/throttle.rb +2 -2
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +2 -7
- data/lib/rocket_job/plugins/restart.rb +3 -103
- data/lib/rocket_job/plugins/state_machine.rb +4 -3
- data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
- data/lib/rocket_job/ractor_worker.rb +42 -0
- data/lib/rocket_job/server/model.rb +1 -1
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
- data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
- data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
- data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
- data/lib/rocket_job/sliced/input.rb +42 -54
- data/lib/rocket_job/sliced/slice.rb +12 -16
- data/lib/rocket_job/sliced/slices.rb +26 -11
- data/lib/rocket_job/sliced/writer/input.rb +46 -18
- data/lib/rocket_job/sliced/writer/output.rb +33 -45
- data/lib/rocket_job/sliced.rb +1 -74
- data/lib/rocket_job/subscribers/server.rb +1 -1
- data/lib/rocket_job/thread_worker.rb +46 -0
- data/lib/rocket_job/throttle_definitions.rb +7 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +21 -55
- data/lib/rocket_job/worker_pool.rb +5 -7
- data/lib/rocketjob.rb +53 -43
- metadata +36 -28
- data/lib/rocket_job/batch/tabular/input.rb +0 -131
- data/lib/rocket_job/batch/tabular/output.rb +0 -65
- data/lib/rocket_job/batch/tabular.rb +0 -56
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
- data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -0,0 +1,34 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Category
|
3
|
+
# Define the layout for each category of input or output data
|
4
|
+
class Output
|
5
|
+
include SemanticLogger::Loggable
|
6
|
+
include Plugins::Document
|
7
|
+
include Category::Base
|
8
|
+
|
9
|
+
embedded_in :job, class_name: "RocketJob::Job", inverse_of: :output_categories
|
10
|
+
|
11
|
+
# Whether to skip nil values returned from the `perform` method.
|
12
|
+
# true: save nil values to the output categories.
|
13
|
+
# false: do not save nil values to the output categories.
|
14
|
+
field :nils, type: ::Mongoid::Boolean, default: false
|
15
|
+
|
16
|
+
validates_inclusion_of :serializer, in: %i[none compress encrypt bz2 encrypted_bz2 bzip2]
|
17
|
+
|
18
|
+
# Renders [String] the header line.
|
19
|
+
# Returns [nil] if no header is needed.
|
20
|
+
def render_header
|
21
|
+
return if !tabular? || !tabular.requires_header?
|
22
|
+
|
23
|
+
tabular.render_header
|
24
|
+
end
|
25
|
+
|
26
|
+
def data_store(job)
|
27
|
+
RocketJob::Sliced::Output.new(
|
28
|
+
collection_name: build_collection_name(:output, job),
|
29
|
+
slice_class: serializer_class
|
30
|
+
)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/rocket_job/cli.rb
CHANGED
@@ -233,24 +233,23 @@ module RocketJob
|
|
233
233
|
|
234
234
|
# Parse command line options placing results in the corresponding instance variables
|
235
235
|
def parse(argv)
|
236
|
-
parser
|
236
|
+
parser = OptionParser.new do |o|
|
237
237
|
o.on("-n", "--name NAME", "Unique Name of this server (Default: host_name:PID)") do |arg|
|
238
238
|
Config.name = arg
|
239
239
|
end
|
240
240
|
o.on("-w", "--workers COUNT", "Number of workers (threads) to start") do |arg|
|
241
241
|
@max_workers = arg.to_i
|
242
242
|
end
|
243
|
-
o.on("--include REGEXP",
|
243
|
+
o.on("--include REGEXP",
|
244
|
+
'Limit this server to only those job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
|
244
245
|
@include_filter = Regexp.new(arg, true)
|
245
246
|
end
|
246
|
-
o.on("-
|
247
|
-
|
248
|
-
@include_filter = Regexp.new(arg, true)
|
249
|
-
end
|
250
|
-
o.on("-E", "--exclude REGEXP", 'Prevent this server from working on any job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
|
247
|
+
o.on("-E", "--exclude REGEXP",
|
248
|
+
'Prevent this server from working on any job classes that match this regular expression (case-insensitive). Example: "DirmonJob|WeeklyReportJob"') do |arg|
|
251
249
|
@exclude_filter = Regexp.new(arg, true)
|
252
250
|
end
|
253
|
-
o.on("-W", "--where JSON",
|
251
|
+
o.on("-W", "--where JSON",
|
252
|
+
"Limit this server instance to the supplied mongo query filter. Supply as a string in JSON format. Example: '{\"priority\":{\"$lte\":25}}'") do |arg|
|
254
253
|
@where_filter = JSON.parse(arg)
|
255
254
|
end
|
256
255
|
o.on("-q", "--quiet", "Do not write to stdout, only to logfile. Necessary when running as a daemon") do
|
@@ -259,7 +258,8 @@ module RocketJob
|
|
259
258
|
o.on("-d", "--dir DIR", "Directory containing Rails app, if not current directory") do |arg|
|
260
259
|
@directory = arg
|
261
260
|
end
|
262
|
-
o.on("-e", "--environment ENVIRONMENT",
|
261
|
+
o.on("-e", "--environment ENVIRONMENT",
|
262
|
+
"The environment to run the app on (Default: RAILS_ENV || RACK_ENV || development)") do |arg|
|
263
263
|
@environment = arg
|
264
264
|
end
|
265
265
|
o.on("-l", "--log_level trace|debug|info|warn|error|fatal", "The log level to use") do |arg|
|
@@ -274,38 +274,46 @@ module RocketJob
|
|
274
274
|
o.on("-m", "--mongo MONGO_CONFIG_FILE_NAME", "Path and filename of config file. Default: config/mongoid.yml") do |arg|
|
275
275
|
@mongo_config = arg
|
276
276
|
end
|
277
|
-
o.on("-s", "--symmetric-encryption SYMMETRIC_ENCRYPTION_CONFIG_FILE_NAME",
|
277
|
+
o.on("-s", "--symmetric-encryption SYMMETRIC_ENCRYPTION_CONFIG_FILE_NAME",
|
278
|
+
"Path and filename of Symmetric Encryption config file. Default: config/symmetric-encryption.yml") do |arg|
|
278
279
|
@symmetric_encryption_config = arg
|
279
280
|
end
|
280
|
-
o.on("--list [FILTER]",
|
281
|
+
o.on("--list [FILTER]",
|
282
|
+
"List active servers. Supply either an exact server name or a partial name as a filter.") do |filter|
|
281
283
|
@quiet = true
|
282
284
|
@server = false
|
283
285
|
@list_servers = filter || :all
|
284
286
|
end
|
285
|
-
o.on("--refresh [SECONDS]",
|
287
|
+
o.on("--refresh [SECONDS]",
|
288
|
+
"When listing active servers, update the list by this number of seconds. Defaults to every 1 second.") do |seconds|
|
286
289
|
@refresh = (seconds || 1).to_s.to_f
|
287
290
|
end
|
288
|
-
o.on("--stop [SERVER_NAME]",
|
291
|
+
o.on("--stop [SERVER_NAME]",
|
292
|
+
"Send event to stop a server once all in-process workers have completed. Optionally supply the complete or partial name of the server(s) to stop. Default: All servers.") do |server_name|
|
289
293
|
@quiet = true
|
290
294
|
@server = false
|
291
295
|
@stop_server = server_name || :all
|
292
296
|
end
|
293
|
-
o.on("--kill [SERVER_NAME]",
|
297
|
+
o.on("--kill [SERVER_NAME]",
|
298
|
+
"Send event to hard kill a server. Optionally supply the complete or partial name of the server(s) to kill. Default: All servers.") do |server_name|
|
294
299
|
@quiet = true
|
295
300
|
@server = false
|
296
301
|
@kill_server = server_name || :all
|
297
302
|
end
|
298
|
-
o.on("--pause [SERVER_NAME]",
|
303
|
+
o.on("--pause [SERVER_NAME]",
|
304
|
+
"Send event to pause a server. Optionally supply the complete or partial name of the server(s) to pause. Default: All servers.") do |server_name|
|
299
305
|
@quiet = true
|
300
306
|
@server = false
|
301
307
|
@pause_server = server_name || :all
|
302
308
|
end
|
303
|
-
o.on("--resume [SERVER_NAME]",
|
309
|
+
o.on("--resume [SERVER_NAME]",
|
310
|
+
"Send event to resume a server. Optionally supply the complete or partial name of the server(s) to resume. Default: All servers.") do |server_name|
|
304
311
|
@quiet = true
|
305
312
|
@server = false
|
306
313
|
@resume_server = server_name || :all
|
307
314
|
end
|
308
|
-
o.on("--dump [SERVER_NAME]",
|
315
|
+
o.on("--dump [SERVER_NAME]",
|
316
|
+
"Send event for a server to send a worker thread dump to its log file. Optionally supply the complete or partial name of the server(s). Default: All servers.") do |server_name|
|
309
317
|
@quiet = true
|
310
318
|
@server = false
|
311
319
|
@thread_dump = server_name || :all
|
@@ -57,20 +57,12 @@ module RocketJob
|
|
57
57
|
# If this DirmonEntry is in the failed state, exception contains the cause
|
58
58
|
embeds_one :exception, class_name: "RocketJob::JobException"
|
59
59
|
|
60
|
-
# The maximum number of files that should ever match during a single poll of the pattern.
|
61
|
-
#
|
62
|
-
# Too many files could be as a result of an invalid pattern specification.
|
63
|
-
# Exceeding this number will result in an exception being logged in a failed Dirmon instance.
|
64
|
-
# Dirmon processing will continue with new instances.
|
65
|
-
# TODO: Implement max_hits
|
66
|
-
# field :max_hits, type: Integer, default: 100
|
67
|
-
|
68
60
|
#
|
69
61
|
# Read-only attributes
|
70
62
|
#
|
71
63
|
|
72
64
|
# Current state, as set by the state machine. Do not modify directly.
|
73
|
-
field :state, type:
|
65
|
+
field :state, type: Mongoid::StringifiedSymbol, default: :pending
|
74
66
|
|
75
67
|
# Unique index on pattern to help prevent two entries from scanning the same files
|
76
68
|
index({pattern: 1}, background: true, unique: true)
|
@@ -181,7 +173,7 @@ module RocketJob
|
|
181
173
|
counts
|
182
174
|
end
|
183
175
|
|
184
|
-
#
|
176
|
+
# Yields [IOStreams::Path] for each file found that matches the current pattern.
|
185
177
|
def each
|
186
178
|
SemanticLogger.named_tagged(dirmon_entry: id.to_s) do
|
187
179
|
# Case insensitive filename matching
|
@@ -240,7 +232,7 @@ module RocketJob
|
|
240
232
|
job_class_name: job_class_name,
|
241
233
|
properties: properties,
|
242
234
|
description: "#{name}: #{iopath.basename}",
|
243
|
-
upload_file_name: archive_path
|
235
|
+
upload_file_name: archive_path,
|
244
236
|
original_file_name: iopath.to_s,
|
245
237
|
job_id: job_id
|
246
238
|
)
|
@@ -249,7 +241,7 @@ module RocketJob
|
|
249
241
|
message: "Created RocketJob::Jobs::UploadFileJob",
|
250
242
|
payload: {
|
251
243
|
dirmon_entry_name: name,
|
252
|
-
upload_file_name: archive_path
|
244
|
+
upload_file_name: archive_path,
|
253
245
|
original_file_name: iopath.to_s,
|
254
246
|
job_class_name: job_class_name,
|
255
247
|
job_id: job_id.to_s,
|
@@ -295,7 +287,25 @@ module RocketJob
|
|
295
287
|
properties.each_pair do |k, _v|
|
296
288
|
next if klass.public_method_defined?("#{k}=".to_sym)
|
297
289
|
|
298
|
-
|
290
|
+
if %i[output_categories input_categories].include?(k)
|
291
|
+
category_class = k == :input_categories ? RocketJob::Category::Input : RocketJob::Category::Output
|
292
|
+
properties[k].each do |category|
|
293
|
+
category.each_pair do |key, _value|
|
294
|
+
next if category_class.public_method_defined?("#{key}=".to_sym)
|
295
|
+
|
296
|
+
errors.add(
|
297
|
+
:properties,
|
298
|
+
"Unknown Property in #{k}: Attempted to set a value for #{key}.#{k} which is not allowed on the job #{job_class_name}"
|
299
|
+
)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
next
|
303
|
+
end
|
304
|
+
|
305
|
+
errors.add(
|
306
|
+
:properties,
|
307
|
+
"Unknown Property: Attempted to set a value for #{k.inspect} which is not allowed on the job #{job_class_name}"
|
308
|
+
)
|
299
309
|
end
|
300
310
|
end
|
301
311
|
end
|
data/lib/rocket_job/event.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
module IOStreams
|
2
|
+
class Path
|
3
|
+
# Converts an object of this instance into a database friendly value.
|
4
|
+
def mongoize
|
5
|
+
to_s
|
6
|
+
end
|
7
|
+
|
8
|
+
# Get the object as it was stored in the database, and instantiate
|
9
|
+
# this custom class from it.
|
10
|
+
def self.demongoize(object)
|
11
|
+
return if object.nil?
|
12
|
+
|
13
|
+
IOStreams.new(object)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Takes any possible object and converts it to how it would be
|
17
|
+
# stored in the database.
|
18
|
+
def self.mongoize(object)
|
19
|
+
return if object.nil?
|
20
|
+
|
21
|
+
object.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
# Converts the object that was supplied to a criteria and converts it
|
25
|
+
# into a database friendly form.
|
26
|
+
def self.evolve(object)
|
27
|
+
return if object.nil?
|
28
|
+
|
29
|
+
object.to_s
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -4,8 +4,8 @@ module Mongoid
|
|
4
4
|
class Mongo
|
5
5
|
def initialize(criteria)
|
6
6
|
@criteria = criteria
|
7
|
-
@klass
|
8
|
-
@cache
|
7
|
+
@klass = criteria.klass
|
8
|
+
@cache = criteria.options[:cache]
|
9
9
|
# Only line changed is here, get collection name from criteria, not @klass
|
10
10
|
# @collection = @klass.collection
|
11
11
|
@collection = criteria.collection
|
@@ -3,18 +3,10 @@ require "mongoid/factory"
|
|
3
3
|
module RocketJob
|
4
4
|
# Don't convert to Mongoid::Factory since it conflicts with Mongoid use.
|
5
5
|
module MongoidFactory
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
obj
|
11
|
-
end
|
12
|
-
else
|
13
|
-
def from_db(klass, attributes = nil, criteria = nil)
|
14
|
-
obj = super(klass, attributes, criteria)
|
15
|
-
obj.collection_name = criteria.collection_name if criteria
|
16
|
-
obj
|
17
|
-
end
|
6
|
+
def from_db(klass, attributes = nil, criteria = nil, selected_fields = nil)
|
7
|
+
obj = super(klass, attributes, criteria, selected_fields)
|
8
|
+
obj.collection_name = criteria.collection_name if criteria
|
9
|
+
obj
|
18
10
|
end
|
19
11
|
end
|
20
12
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# A class which sends values to the database as Strings but returns them to the user as Symbols.
|
4
|
+
module Mongoid
|
5
|
+
class StringifiedSymbol
|
6
|
+
class << self
|
7
|
+
# Convert the object from its mongo friendly ruby type to this type.
|
8
|
+
#
|
9
|
+
# @example Demongoize the object.
|
10
|
+
# Symbol.demongoize(object)
|
11
|
+
#
|
12
|
+
# @param [ Object ] object The object to demongoize.
|
13
|
+
#
|
14
|
+
# @return [ Symbol ] The object.
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
def demongoize(object)
|
18
|
+
if object.nil?
|
19
|
+
object
|
20
|
+
else
|
21
|
+
object.to_s.to_sym
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Turn the object from the ruby type we deal with to a Mongo friendly
|
26
|
+
# type.
|
27
|
+
#
|
28
|
+
# @example Mongoize the object.
|
29
|
+
# Symbol.mongoize("123.11")
|
30
|
+
#
|
31
|
+
# @param [ Object ] object The object to mongoize.
|
32
|
+
#
|
33
|
+
# @return [ Symbol ] The object mongoized.
|
34
|
+
#
|
35
|
+
# @api private
|
36
|
+
def mongoize(object)
|
37
|
+
if object.nil?
|
38
|
+
object
|
39
|
+
else
|
40
|
+
object.to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# @api private
|
45
|
+
def evolve(object)
|
46
|
+
mongoize(object)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -55,13 +55,13 @@ module ActiveJob
|
|
55
55
|
# - Completed jobs will not appear in completed since the Active Job adapter
|
56
56
|
# uses the default Rocket Job `destroy_on_completion` of `false`.
|
57
57
|
class RocketJobAdapter
|
58
|
-
def self.enqueue(active_job)
|
58
|
+
def self.enqueue(active_job)
|
59
59
|
job = RocketJob::Jobs::ActiveJob.create!(active_job_params(active_job))
|
60
60
|
active_job.provider_job_id = job.id.to_s if active_job.respond_to?(:provider_job_id=)
|
61
61
|
job
|
62
62
|
end
|
63
63
|
|
64
|
-
def self.enqueue_at(active_job, timestamp)
|
64
|
+
def self.enqueue_at(active_job, timestamp)
|
65
65
|
params = active_job_params(active_job)
|
66
66
|
params[:run_at] = Time.at(timestamp).utc
|
67
67
|
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Convert to and from CSV, JSON, xlsx, and PSV files.
|
2
|
+
#
|
3
|
+
# Example, Convert CSV file to JSON.
|
4
|
+
# job = RocketJob::Jobs::ConversionJob.new
|
5
|
+
# job.input_category.file_name = "data.csv"
|
6
|
+
# job.output_category.file_name = "data.json"
|
7
|
+
# job.save!
|
8
|
+
#
|
9
|
+
# Example, Convert JSON file to PSV and compress it with GZip.
|
10
|
+
# job = RocketJob::Jobs::ConversionJob.new
|
11
|
+
# job.input_category.file_name = "data.json"
|
12
|
+
# job.output_category.file_name = "data.psv.gz"
|
13
|
+
# job.save!
|
14
|
+
#
|
15
|
+
# Example, Read a CSV file that has been zipped from a remote website and the convert it to a GZipped json file.
|
16
|
+
# job = RocketJob::Jobs::ConversionJob.new
|
17
|
+
# job.input_category.file_name = "https://example.org/file.zip"
|
18
|
+
# job.output_category.file_name = "data.json.gz"
|
19
|
+
# job.save!
|
20
|
+
#
|
21
|
+
module RocketJob
|
22
|
+
module Jobs
|
23
|
+
class ConversionJob < RocketJob::Job
|
24
|
+
include RocketJob::Batch
|
25
|
+
|
26
|
+
self.destroy_on_complete = false
|
27
|
+
|
28
|
+
# Detects file extension for its type
|
29
|
+
input_category format: :auto
|
30
|
+
output_category format: :auto
|
31
|
+
|
32
|
+
# Upload the file specified in `input_category.file_name` unless already uploaded.
|
33
|
+
before_batch :upload, unless: :record_count
|
34
|
+
|
35
|
+
# When the job completes it will write the result to `output_category.file_name`.
|
36
|
+
after_batch :cleanup!, :download
|
37
|
+
|
38
|
+
def perform(hash)
|
39
|
+
hash
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -18,7 +18,7 @@ module RocketJob
|
|
18
18
|
# file name of the archived file is passed into the job as either
|
19
19
|
# `upload_file_name` or `full_file_name`.
|
20
20
|
|
21
|
-
#
|
21
|
+
# Notes:
|
22
22
|
# - Jobs that do not implement #upload _must_ have either `upload_file_name` or `full_file_name` as an attribute.
|
23
23
|
#
|
24
24
|
# With RocketJob Pro, the file is automatically uploaded into the job itself
|
@@ -30,59 +30,48 @@ module RocketJob
|
|
30
30
|
#
|
31
31
|
# If another DirmonJob instance is already queued or running, then the create
|
32
32
|
# above will fail with:
|
33
|
-
#
|
33
|
+
# Validation failed: State Another instance of this job is already queued or running
|
34
34
|
#
|
35
35
|
# Or to start DirmonJob and ignore errors if already running
|
36
36
|
# RocketJob::Jobs::DirmonJob.create
|
37
37
|
class DirmonJob < RocketJob::Job
|
38
|
-
|
39
|
-
include RocketJob::Plugins::Singleton
|
40
|
-
# Start a new job when this one completes, fails, or aborts
|
41
|
-
include RocketJob::Plugins::Restart
|
38
|
+
include RocketJob::Plugins::Cron
|
42
39
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
# Runs every 5 minutes by default
|
41
|
+
self.cron_schedule = "*/5 * * * * UTC"
|
42
|
+
self.description = "Directory Monitor"
|
43
|
+
self.priority = 30
|
47
44
|
|
48
45
|
# Hash[file_name, size]
|
49
46
|
field :previous_file_names, type: Hash, default: {}, copy_on_restart: true
|
50
47
|
|
51
|
-
|
52
|
-
|
53
|
-
# Iterate over each Dirmon entry looking for new files
|
54
|
-
# If a new file is found, it is not processed immediately, instead
|
55
|
-
# it is passed to the next run of this job along with the file size.
|
56
|
-
# If the file size has not changed, the Job is kicked off.
|
48
|
+
# Checks the directories for new files, starting jobs if files have not changed since the last run.
|
57
49
|
def perform
|
58
50
|
check_directories
|
59
51
|
end
|
60
52
|
|
61
53
|
private
|
62
54
|
|
63
|
-
#
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
# Checks the directories for new files, starting jobs if files have not changed
|
69
|
-
# since the last run
|
55
|
+
# Iterate over each Dirmon Entry looking for new files
|
56
|
+
# If a new file is found, it is not processed immediately, instead
|
57
|
+
# it is passed to the next run of this job along with the file size.
|
58
|
+
# If the file size has not changed, the Job is kicked off.
|
70
59
|
def check_directories
|
71
60
|
new_file_names = {}
|
72
|
-
DirmonEntry.enabled.each do |
|
73
|
-
|
74
|
-
# S3 files are only visible once completely uploaded.
|
75
|
-
unless
|
76
|
-
logger.info("File: #{
|
77
|
-
|
61
|
+
DirmonEntry.enabled.each do |dirmon_entry|
|
62
|
+
dirmon_entry.each do |path|
|
63
|
+
# Skip file size checking since S3 files are only visible once completely uploaded.
|
64
|
+
unless path.partial_files_visible?
|
65
|
+
logger.info("File: #{path}. Starting: #{dirmon_entry.job_class_name}")
|
66
|
+
dirmon_entry.later(path)
|
78
67
|
next
|
79
68
|
end
|
80
69
|
|
81
70
|
# BSON Keys cannot contain periods
|
82
|
-
key =
|
71
|
+
key = path.to_s.tr(".", "_")
|
83
72
|
previous_size = previous_file_names[key]
|
84
73
|
# Check every few minutes for a file size change before trying to process the file.
|
85
|
-
size
|
74
|
+
size = check_file(dirmon_entry, path, previous_size)
|
86
75
|
new_file_names[key] = size if size
|
87
76
|
end
|
88
77
|
end
|
@@ -91,14 +80,14 @@ module RocketJob
|
|
91
80
|
|
92
81
|
# Checks if a file should result in starting a job
|
93
82
|
# Returns [Integer] file size, or nil if the file started a job
|
94
|
-
def check_file(
|
95
|
-
size =
|
83
|
+
def check_file(dirmon_entry, path, previous_size)
|
84
|
+
size = path.size
|
96
85
|
if previous_size && (previous_size == size)
|
97
|
-
logger.info("File stabilized: #{
|
98
|
-
|
86
|
+
logger.info("File stabilized: #{path}. Starting: #{dirmon_entry.job_class_name}")
|
87
|
+
dirmon_entry.later(path)
|
99
88
|
nil
|
100
89
|
else
|
101
|
-
logger.info("Found file: #{
|
90
|
+
logger.info("Found file: #{path}. File size: #{size}")
|
102
91
|
# Keep for the next run
|
103
92
|
size
|
104
93
|
end
|