rocketjob 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +6 -7
- data/lib/rocket_job/cli.rb +14 -15
- data/lib/rocket_job/concerns/singleton.rb +33 -0
- data/lib/rocket_job/concerns/worker.rb +70 -20
- data/lib/rocket_job/config.rb +3 -1
- data/lib/rocket_job/dirmon_entry.rb +260 -30
- data/lib/rocket_job/heartbeat.rb +3 -0
- data/lib/rocket_job/job.rb +77 -154
- data/lib/rocket_job/job_exception.rb +8 -6
- data/lib/rocket_job/jobs/dirmon_job.rb +26 -102
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +40 -31
- data/lib/rocketjob.rb +26 -9
- data/test/dirmon_entry_test.rb +197 -31
- data/test/dirmon_job_test.rb +91 -188
- data/test/job_test.rb +148 -30
- data/test/job_worker_test.rb +23 -22
- data/test/test_helper.rb +9 -9
- data/test/worker_test.rb +8 -4
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9720cccfb90fd2afdf83b04785f5133c97078fbd
|
4
|
+
data.tar.gz: 352782e7591790ada1f3ab8b584202f1d666b65d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47cfcc68c411faace343f526240c036e1655c6d55e9de1cdbaee5292ba09400de0b442db667a7a894a6489d96d2494e721d5cbcdec5c9fab0358e158592dfc52
|
7
|
+
data.tar.gz: 431732d7fe08636f15654108b8d6efcc134f588fcedf93cc9b635b5ed7205d2d7add0b80c27f14cb85c3b792b115e9ec2eb39b39756bac2fc0fd09b26c36eb04
|
data/Rakefile
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
require 'rake/clean'
|
2
2
|
require 'rake/testtask'
|
3
3
|
|
4
|
-
|
5
|
-
require 'rocket_job/version'
|
4
|
+
require_relative 'lib/rocket_job/version'
|
6
5
|
|
7
6
|
task :gem do
|
8
|
-
system
|
7
|
+
system 'gem build rocketjob.gemspec'
|
9
8
|
end
|
10
9
|
|
11
|
-
task :
|
10
|
+
task publish: :gem do
|
12
11
|
system "git tag -a v#{RocketJob::VERSION} -m 'Tagging #{RocketJob::VERSION}'"
|
13
|
-
system
|
12
|
+
system 'git push --tags'
|
14
13
|
system "gem push rocketjob-#{RocketJob::VERSION}.gem"
|
15
14
|
system "rm rocketjob-#{RocketJob::VERSION}.gem"
|
16
15
|
end
|
17
16
|
|
18
|
-
desc
|
17
|
+
desc 'Run Test Suite'
|
19
18
|
task :test do
|
20
19
|
Rake::TestTask.new(:functional) do |t|
|
21
20
|
t.test_files = FileList['test/**/*_test.rb']
|
@@ -25,4 +24,4 @@ task :test do
|
|
25
24
|
Rake::Task['functional'].invoke
|
26
25
|
end
|
27
26
|
|
28
|
-
task :
|
27
|
+
task default: :test
|
data/lib/rocket_job/cli.rb
CHANGED
@@ -5,25 +5,24 @@ module RocketJob
|
|
5
5
|
attr_reader :name, :threads, :environment, :pidfile, :directory, :quiet
|
6
6
|
|
7
7
|
def initialize(argv)
|
8
|
-
@name
|
9
|
-
@threads
|
10
|
-
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@
|
14
|
-
@directory = '.'
|
8
|
+
@name = nil
|
9
|
+
@threads = nil
|
10
|
+
@quiet = false
|
11
|
+
@environment = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
12
|
+
@pidfile = nil
|
13
|
+
@directory = '.'
|
15
14
|
parse(argv)
|
16
15
|
end
|
17
16
|
|
18
17
|
# Run a RocketJob::Worker from the command line
|
19
18
|
def run
|
20
|
-
SemanticLogger.add_appender(STDOUT,
|
19
|
+
SemanticLogger.add_appender(STDOUT, &SemanticLogger::Appender::Base.colorized_formatter) unless quiet
|
21
20
|
boot_rails if defined?(:Rails)
|
22
21
|
write_pidfile
|
23
22
|
|
24
|
-
opts
|
25
|
-
opts[:name]
|
26
|
-
opts[:max_threads]
|
23
|
+
opts = {}
|
24
|
+
opts[:name] = name if name
|
25
|
+
opts[:max_threads] = threads if threads
|
27
26
|
Worker.run(opts)
|
28
27
|
end
|
29
28
|
|
@@ -33,7 +32,7 @@ module RocketJob
|
|
33
32
|
if Rails.configuration.eager_load
|
34
33
|
RocketJob::Worker.logger.benchmark_info('Eager loaded Rails and all Engines') do
|
35
34
|
Rails.application.eager_load!
|
36
|
-
Rails::Engine.subclasses.each
|
35
|
+
Rails::Engine.subclasses.each(&:eager_load!)
|
37
36
|
end
|
38
37
|
end
|
39
38
|
end
|
@@ -41,18 +40,18 @@ module RocketJob
|
|
41
40
|
# Create a PID file if requested
|
42
41
|
def write_pidfile
|
43
42
|
return unless pidfile
|
44
|
-
pid =
|
43
|
+
pid = $PID
|
45
44
|
File.open(pidfile, 'w') { |f| f.puts(pid) }
|
46
45
|
|
47
46
|
# Remove pidfile on exit
|
48
47
|
at_exit do
|
49
|
-
File.delete(pidfile) if pid ==
|
48
|
+
File.delete(pidfile) if pid == $PID
|
50
49
|
end
|
51
50
|
end
|
52
51
|
|
53
52
|
# Parse command line options placing results in the corresponding instance variables
|
54
53
|
def parse(argv)
|
55
|
-
parser
|
54
|
+
parser = OptionParser.new do |o|
|
56
55
|
o.on('-n', '--name NAME', 'Unique Name of this worker instance (Default: hostname:PID)') { |arg| @name = arg }
|
57
56
|
o.on('-t', '--threads COUNT', 'Number of worker threads to start') { |arg| @threads = arg.to_i }
|
58
57
|
o.on('-q', '--quiet', 'Do not write to stdout, only to logfile. Necessary when running as a daemon') { @quiet = true }
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'active_support/concern'
|
3
|
+
|
4
|
+
# Worker behavior for a job
|
5
|
+
module RocketJob
|
6
|
+
module Concerns
|
7
|
+
module Singleton
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
# Start the single instance of this job
|
12
|
+
#
|
13
|
+
# Returns true if the job was started
|
14
|
+
# Returns false if the job is already running and doe not need to be started
|
15
|
+
def self.start(*args, &block)
|
16
|
+
# Prevent multiple Jobs of the same class from running at the same time
|
17
|
+
return false if where(state: [:running, :queued]).count > 0
|
18
|
+
|
19
|
+
perform_later(*args, &block)
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
# TODO Make :perform_later, :perform_now, :perform, :now protected/private
|
24
|
+
# class << self
|
25
|
+
# # Ensure that only one instance of the job is running.
|
26
|
+
# protected :perform_later, :perform_now, :perform, :now
|
27
|
+
# end
|
28
|
+
#self.send(:protected, :perform_later)
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -7,9 +7,6 @@ module RocketJob
|
|
7
7
|
def self.included(base)
|
8
8
|
base.extend ClassMethods
|
9
9
|
base.class_eval do
|
10
|
-
# While working on a slice, the current slice is available via this reader
|
11
|
-
attr_reader :rocket_job_slice
|
12
|
-
|
13
10
|
@rocket_job_defaults = nil
|
14
11
|
end
|
15
12
|
end
|
@@ -28,7 +25,7 @@ module RocketJob
|
|
28
25
|
|
29
26
|
# Create a job and process it immediately in-line by this thread
|
30
27
|
def now(method, *args, &block)
|
31
|
-
job
|
28
|
+
job = build(method, *args, &block)
|
32
29
|
worker = RocketJob::Worker.new(name: 'inline')
|
33
30
|
worker.started
|
34
31
|
job.start
|
@@ -71,11 +68,61 @@ module RocketJob
|
|
71
68
|
@rocket_job_defaults = block
|
72
69
|
self
|
73
70
|
end
|
74
|
-
end
|
75
71
|
|
76
|
-
|
77
|
-
#
|
78
|
-
|
72
|
+
# Returns the next job to work on in priority based order
|
73
|
+
# Returns nil if there are currently no queued jobs, or processing batch jobs
|
74
|
+
# with records that require processing
|
75
|
+
#
|
76
|
+
# Parameters
|
77
|
+
# worker_name [String]
|
78
|
+
# Name of the worker that will be processing this job
|
79
|
+
#
|
80
|
+
# skip_job_ids [Array<BSON::ObjectId>]
|
81
|
+
# Job ids to exclude when looking for the next job
|
82
|
+
#
|
83
|
+
# Note:
|
84
|
+
# If a job is in queued state it will be started
|
85
|
+
def next_job(worker_name, skip_job_ids = nil)
|
86
|
+
query = {
|
87
|
+
'$and' => [
|
88
|
+
{
|
89
|
+
'$or' => [
|
90
|
+
{'state' => 'queued'}, # Jobs
|
91
|
+
{'state' => 'running', 'sub_state' => :processing} # Slices
|
92
|
+
]
|
93
|
+
},
|
94
|
+
{
|
95
|
+
'$or' => [
|
96
|
+
{run_at: {'$exists' => false}},
|
97
|
+
{run_at: {'$lte' => Time.now}}
|
98
|
+
]
|
99
|
+
}
|
100
|
+
]
|
101
|
+
}
|
102
|
+
query['_id'] = {'$nin' => skip_job_ids} if skip_job_ids && skip_job_ids.size > 0
|
103
|
+
|
104
|
+
while (doc = find_and_modify(
|
105
|
+
query: query,
|
106
|
+
sort: [['priority', 'asc'], ['created_at', 'asc']],
|
107
|
+
update: {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
|
108
|
+
))
|
109
|
+
job = load(doc)
|
110
|
+
if job.running?
|
111
|
+
return job
|
112
|
+
else
|
113
|
+
if job.expired?
|
114
|
+
job.destroy
|
115
|
+
logger.info "Destroyed expired job #{job.class.name}, id:#{job.id}"
|
116
|
+
else
|
117
|
+
# Also update in-memory state and run call-backs
|
118
|
+
job.start
|
119
|
+
job.set(started_at: job.started_at)
|
120
|
+
return job
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
79
126
|
end
|
80
127
|
|
81
128
|
# Works on this job
|
@@ -87,22 +134,24 @@ module RocketJob
|
|
87
134
|
#
|
88
135
|
# Thread-safe, can be called by multiple threads at the same time
|
89
136
|
def work(worker)
|
90
|
-
raise 'Job must be started before calling #work' unless running?
|
137
|
+
raise(ArgumentError, 'Job must be started before calling #work') unless running?
|
91
138
|
begin
|
92
139
|
# before_perform
|
93
140
|
call_method(perform_method, arguments, event: :before, log_level: log_level)
|
94
141
|
|
95
142
|
# perform
|
96
|
-
call_method(perform_method, arguments, log_level: log_level)
|
143
|
+
ret = call_method(perform_method, arguments, log_level: log_level)
|
97
144
|
if self.collect_output?
|
98
|
-
self.
|
145
|
+
self.result = (ret.is_a?(Hash) || ret.is_a?(BSON::OrderedHash)) ? ret : {result: ret}
|
99
146
|
end
|
100
147
|
|
101
148
|
# after_perform
|
102
149
|
call_method(perform_method, arguments, event: :after, log_level: log_level)
|
150
|
+
|
103
151
|
complete!
|
104
|
-
rescue
|
105
|
-
|
152
|
+
rescue StandardError => exc
|
153
|
+
fail!(worker.name, exc) unless failed?
|
154
|
+
logger.error("Exception running #{self.class.name}##{perform_method}", exc)
|
106
155
|
raise exc if RocketJob::Config.inline_mode
|
107
156
|
end
|
108
157
|
false
|
@@ -131,27 +180,28 @@ module RocketJob
|
|
131
180
|
# Log level to apply to silence logging during the call
|
132
181
|
# Default: nil ( no change )
|
133
182
|
#
|
134
|
-
def call_method(method, arguments, options={})
|
135
|
-
options
|
136
|
-
event
|
137
|
-
log_level
|
183
|
+
def call_method(method, arguments, options = {})
|
184
|
+
options = options.dup
|
185
|
+
event = options.delete(:event)
|
186
|
+
log_level = options.delete(:log_level)
|
138
187
|
raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
|
139
188
|
|
140
189
|
the_method = event.nil? ? method : "#{event}_#{method}".to_sym
|
141
190
|
if respond_to?(the_method)
|
142
191
|
method_name = "#{self.class.name}##{the_method}"
|
143
192
|
logger.info "Start #{method_name}"
|
144
|
-
logger.benchmark_info(
|
193
|
+
logger.benchmark_info(
|
194
|
+
"Completed #{method_name}",
|
145
195
|
metric: "rocketjob/#{self.class.name.underscore}/#{the_method}",
|
146
196
|
log_exception: :full,
|
147
197
|
on_exception_level: :error,
|
148
198
|
silence: log_level
|
149
199
|
) do
|
150
|
-
|
200
|
+
send(the_method, *arguments)
|
151
201
|
end
|
152
202
|
end
|
153
203
|
end
|
154
204
|
|
155
205
|
end
|
156
206
|
end
|
157
|
-
end
|
207
|
+
end
|
data/lib/rocket_job/config.rb
CHANGED
@@ -14,7 +14,7 @@ module RocketJob
|
|
14
14
|
sync_cattr_reader(:instance) do
|
15
15
|
begin
|
16
16
|
first || create
|
17
|
-
rescue
|
17
|
+
rescue StandardError
|
18
18
|
# In case another process has already created the first document
|
19
19
|
first
|
20
20
|
end
|
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
# No worker processes will be created, nor threads created
|
25
25
|
sync_cattr_accessor(:inline_mode) { false }
|
26
26
|
|
27
|
+
# @formatter:off
|
27
28
|
# The maximum number of worker threads to create on any one worker
|
28
29
|
key :max_worker_threads, Integer, default: 10
|
29
30
|
|
@@ -47,6 +48,7 @@ module RocketJob
|
|
47
48
|
# Limit the number of workers per job class per worker
|
48
49
|
# 'class_name' / group => 100
|
49
50
|
#key :limits, Hash
|
51
|
+
# @formatter:on
|
50
52
|
|
51
53
|
# Replace the MongoMapper default mongo connection for holding jobs
|
52
54
|
def self.mongo_connection=(connection)
|
@@ -1,28 +1,39 @@
|
|
1
|
+
require 'thread_safe'
|
2
|
+
require 'pathname'
|
3
|
+
require 'fileutils'
|
1
4
|
module RocketJob
|
2
5
|
class DirmonEntry
|
3
6
|
include MongoMapper::Document
|
7
|
+
include AASM
|
4
8
|
|
5
|
-
#
|
6
|
-
# in
|
9
|
+
# @formatter:off
|
10
|
+
# User defined name used to identify this DirmonEntry in Mission Control
|
7
11
|
key :name, String
|
8
12
|
|
9
|
-
#
|
13
|
+
# Pattern for finding files
|
10
14
|
#
|
11
|
-
# Example:
|
12
|
-
# input_files/process1/*.csv
|
15
|
+
# Example: All files ending in '.csv' in the input_files/process1 directory
|
16
|
+
# input_files/process1/*.csv
|
17
|
+
#
|
18
|
+
# Example: All files in the input_files/process1 directory and all sub-directories
|
13
19
|
# input_files/process2/**/*
|
14
20
|
#
|
15
|
-
#
|
21
|
+
# Example: All files in the input_files/process2 directory with .csv or .txt extensions
|
22
|
+
# input_files/process2/*.{csv,txt}
|
23
|
+
#
|
24
|
+
# For details on valid pattern values, see: http://ruby-doc.org/core-2.2.2/Dir.html#method-c-glob
|
16
25
|
#
|
17
26
|
# Note
|
18
|
-
# - If there
|
19
|
-
|
27
|
+
# - If there is no '*' in the pattern then an exact filename match is expected
|
28
|
+
# - The pattern is not validated to ensure the path exists, it will be validated against the
|
29
|
+
# `whitelist_paths` when processed by DirmonJob
|
30
|
+
key :pattern, String
|
20
31
|
|
21
|
-
# Job to
|
32
|
+
# Job to enqueue for processing for every file that matches the pattern
|
22
33
|
#
|
23
34
|
# Example:
|
24
35
|
# "ProcessItJob"
|
25
|
-
key :
|
36
|
+
key :job_class_name, String
|
26
37
|
|
27
38
|
# Any user supplied arguments for the method invocation
|
28
39
|
# All keys must be UTF-8 strings. The values can be any valid BSON type:
|
@@ -52,46 +63,265 @@ module RocketJob
|
|
52
63
|
#
|
53
64
|
# If supplied, the file will be moved to this directory before the job is started
|
54
65
|
# If the file was in a sub-directory, the corresponding sub-directory will
|
55
|
-
# be created in the archive directory
|
56
|
-
# is a relative path. (I.e. Does not start with '/') .
|
66
|
+
# be created in the archive directory.
|
57
67
|
key :archive_directory, String
|
58
68
|
|
59
|
-
# Allow a monitoring path to be temporarily disabled
|
60
|
-
key :enabled, Boolean, default: true
|
61
|
-
|
62
69
|
# Method to perform on the job, usually :perform
|
63
70
|
key :perform_method, Symbol, default: :perform
|
64
71
|
|
65
|
-
#
|
66
|
-
|
67
|
-
|
72
|
+
# If this DirmonEntry is in the failed state, exception contains the cause
|
73
|
+
one :exception, class_name: 'RocketJob::JobException'
|
74
|
+
|
75
|
+
# The maximum number of files that should ever match during a single poll of the pattern.
|
76
|
+
#
|
77
|
+
# Too many files could be as a result of an invalid pattern specification.
|
78
|
+
# Exceeding this number will result in an exception being logged in a failed Dirmon instance.
|
79
|
+
# Dirmon processing will continue with new instances.
|
80
|
+
# TODO: Implement max_hits
|
81
|
+
#key :max_hits, Integer, default: 100
|
82
|
+
|
83
|
+
#
|
84
|
+
# Read-only attributes
|
85
|
+
#
|
86
|
+
|
87
|
+
# Current state, as set by AASM
|
88
|
+
key :state, Symbol, default: :pending
|
89
|
+
|
90
|
+
# State Machine events and transitions
|
91
|
+
#
|
92
|
+
# :pending -> :enabled -> :disabled
|
93
|
+
# -> :failed
|
94
|
+
# -> :failed -> :active
|
95
|
+
# -> :disabled
|
96
|
+
# -> :disabled -> :active
|
97
|
+
aasm column: :state do
|
98
|
+
# DirmonEntry is `pending` until it is approved
|
99
|
+
state :pending, initial: true
|
100
|
+
|
101
|
+
# DirmonEntry is Enabled and will be included by DirmonJob
|
102
|
+
state :enabled
|
103
|
+
|
104
|
+
# DirmonEntry failed during processing and requires manual intervention
|
105
|
+
# See the exception for the reason for failing this entry
|
106
|
+
# For example: access denied, whitelist_path security violation, etc.
|
107
|
+
state :failed
|
108
|
+
|
109
|
+
# DirmonEntry has been manually disabled
|
110
|
+
state :disabled
|
111
|
+
|
112
|
+
event :enable do
|
113
|
+
transitions from: :pending, to: :enabled
|
114
|
+
transitions from: :disabled, to: :enabled
|
115
|
+
end
|
116
|
+
|
117
|
+
event :disable do
|
118
|
+
transitions from: :enabled, to: :disabled
|
119
|
+
transitions from: :failed, to: :disabled
|
120
|
+
end
|
121
|
+
|
122
|
+
event :fail do
|
123
|
+
transitions from: :enabled, to: :failed
|
124
|
+
end
|
68
125
|
end
|
69
126
|
|
70
|
-
|
127
|
+
# @formatter:on
|
128
|
+
validates_presence_of :pattern, :job_class_name, :perform_method
|
71
129
|
|
72
|
-
validates_each :
|
73
|
-
|
74
|
-
|
75
|
-
exists = value.nil? ? false : value.constantize.ancestors.include?(RocketJob::Job)
|
76
|
-
rescue NameError => exc
|
130
|
+
validates_each :perform_method do |record, attr, value|
|
131
|
+
if (klass = record.job_class) && !klass.instance_method(value)
|
132
|
+
record.errors.add(attr, "Method not implemented by #{record.job_class_name}")
|
77
133
|
end
|
78
|
-
|
134
|
+
end
|
135
|
+
|
136
|
+
validates_each :job_class_name do |record, attr, value|
|
137
|
+
exists =
|
138
|
+
begin
|
139
|
+
value.nil? ? false : record.job_class.ancestors.include?(RocketJob::Job)
|
140
|
+
rescue NameError
|
141
|
+
false
|
142
|
+
end
|
143
|
+
record.errors.add(attr, 'job_class_name must be defined and must be derived from RocketJob::Job') unless exists
|
79
144
|
end
|
80
145
|
|
81
146
|
validates_each :arguments do |record, attr, value|
|
82
|
-
if klass = record.job_class
|
147
|
+
if (klass = record.job_class)
|
83
148
|
count = klass.argument_count(record.perform_method)
|
84
|
-
record.errors.add(attr, "There must be #{count} argument(s)") if
|
149
|
+
record.errors.add(attr, "There must be #{count} argument(s)") if value.size != count
|
85
150
|
end
|
86
151
|
end
|
87
152
|
|
88
153
|
validates_each :properties do |record, attr, value|
|
89
|
-
if record.
|
90
|
-
value.each_pair do |
|
91
|
-
record.errors.add(attr, "Unknown property: #{
|
154
|
+
if record.job_class && (methods = record.job_class.instance_methods)
|
155
|
+
value.each_pair do |k, v|
|
156
|
+
record.errors.add(attr, "Unknown property: #{k.inspect} with value: #{v}") unless methods.include?("#{k}=".to_sym)
|
92
157
|
end
|
93
158
|
end
|
94
159
|
end
|
95
160
|
|
161
|
+
# Create indexes
|
162
|
+
def self.create_indexes
|
163
|
+
# Unique index on pattern to help prevent two entries from scanning the same files
|
164
|
+
ensure_index({pattern: 1}, background: true, unique: true)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Security Settings
|
168
|
+
#
|
169
|
+
# A whitelist of paths from which to process files.
|
170
|
+
# This prevents accidental or malicious `pattern`s from processing files from anywhere
|
171
|
+
# in the system that the user under which Dirmon is running can access.
|
172
|
+
#
|
173
|
+
# All resolved `pattern`s must start with one of the whitelisted path, otherwise they will be rejected
|
174
|
+
#
|
175
|
+
# Note:
|
176
|
+
# - If no whitelist paths have been added, then a whitelist check is _not_ performed
|
177
|
+
# - Relative paths can be used, but are not considered safe since they can be manipulated
|
178
|
+
# - These paths should be assigned in an initializer and not editable via the Web UI to ensure
|
179
|
+
# that they are not tampered with
|
180
|
+
#
|
181
|
+
# Default: [] ==> Do not enforce whitelists
|
182
|
+
#
|
183
|
+
# Returns [Array<String>] a copy of the whitelisted paths
|
184
|
+
def self.whitelist_paths
|
185
|
+
@@whitelist_paths.dup
|
186
|
+
end
|
187
|
+
|
188
|
+
# Add a path to the whitelist
|
189
|
+
# Raises: Errno::ENOENT: No such file or directory
|
190
|
+
def self.add_whitelist_path(path)
|
191
|
+
# Confirms that path exists
|
192
|
+
path = Pathname.new(path).realpath.to_s
|
193
|
+
@@whitelist_paths << path
|
194
|
+
@@whitelist_paths.uniq!
|
195
|
+
path
|
196
|
+
end
|
197
|
+
|
198
|
+
# Deletes a path from the whitelist paths
|
199
|
+
# Raises: Errno::ENOENT: No such file or directory
|
200
|
+
def self.delete_whitelist_path(path)
|
201
|
+
# Confirms that path exists
|
202
|
+
path = Pathname.new(path).realpath.to_s
|
203
|
+
@@whitelist_paths.delete(path)
|
204
|
+
@@whitelist_paths.uniq!
|
205
|
+
path
|
206
|
+
end
|
207
|
+
|
208
|
+
# The default archive directory that is used when the job being queued does not respond
|
209
|
+
# to #file_store_upload or #upload, and do not have an `archive_directory` specified in this entry
|
210
|
+
cattr_accessor :default_archive_directory
|
211
|
+
|
212
|
+
@@default_archive_directory = '_archive'.freeze
|
213
|
+
|
214
|
+
# Returns [Pathname] the archive_directory if set, otherwise the default_archive_directory
|
215
|
+
def archive_pathname
|
216
|
+
Pathname.new(archive_directory || self.class.default_archive_directory)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Passes each filename [Pathname] found that matches the pattern into the supplied block
|
220
|
+
def each(&block)
|
221
|
+
logger.tagged("DirmonEntry:#{id}") do
|
222
|
+
Pathname.glob(pattern).each do |pathname|
|
223
|
+
next if pathname.directory?
|
224
|
+
pathname = pathname.realpath
|
225
|
+
file_name = pathname.to_s
|
226
|
+
|
227
|
+
# Skip archive directories
|
228
|
+
next if file_name.start_with?(archive_pathname.realpath.to_s)
|
229
|
+
|
230
|
+
# Security check?
|
231
|
+
if (@@whitelist_paths.size > 0) && @@whitelist_paths.none? { |whitepath| file_name.start_with?(whitepath) }
|
232
|
+
logger.warn "Ignoring file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
|
233
|
+
next
|
234
|
+
end
|
235
|
+
|
236
|
+
# File must be writable so it can be removed after processing
|
237
|
+
unless pathname.writable?
|
238
|
+
logger.warn "Ignoring file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
|
239
|
+
next
|
240
|
+
end
|
241
|
+
block.call(pathname)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# Set exception information for this DirmonEntry and fail it
|
247
|
+
def fail_with_exception!(worker_name, exc_or_message)
|
248
|
+
if exc_or_message.is_a?(Exception)
|
249
|
+
self.exception = JobException.from_exception(exc_or_message)
|
250
|
+
exception.worker_name = worker_name
|
251
|
+
else
|
252
|
+
build_exception(
|
253
|
+
class_name: 'RocketJob::DirmonEntryException',
|
254
|
+
message: exc_or_message,
|
255
|
+
backtrace: [],
|
256
|
+
worker_name: worker_name
|
257
|
+
)
|
258
|
+
end
|
259
|
+
fail!
|
260
|
+
end
|
261
|
+
|
262
|
+
@@whitelist_paths = ThreadSafe::Array.new
|
263
|
+
|
264
|
+
# Returns the Job to be queued
|
265
|
+
def job_class
|
266
|
+
return if job_class_name.nil?
|
267
|
+
job_class_name.constantize
|
268
|
+
rescue NameError
|
269
|
+
nil
|
270
|
+
end
|
271
|
+
|
272
|
+
# Queues the job for the supplied pathname
|
273
|
+
def later(pathname)
|
274
|
+
job_class.perform_later(*arguments) do |job|
|
275
|
+
job.perform_method = perform_method
|
276
|
+
# Set properties
|
277
|
+
properties.each_pair { |k, v| job.send("#{k}=".to_sym, v) }
|
278
|
+
|
279
|
+
upload_file(job, pathname)
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
protected
|
284
|
+
|
285
|
+
# Upload the file to the job
|
286
|
+
def upload_file(job, pathname)
|
287
|
+
if job.respond_to?(:file_store_upload)
|
288
|
+
# Allow the job to determine what to do with the file
|
289
|
+
# Pass the pathname as a string, not a Pathname (IO) instance
|
290
|
+
# so that it can read the file directly
|
291
|
+
job.file_store_upload(pathname.to_s)
|
292
|
+
archive_directory ? archive_file(job, pathname) : pathname.unlink
|
293
|
+
elsif job.respond_to?(:upload)
|
294
|
+
# With RocketJob Pro the file can be uploaded directly into the Job itself
|
295
|
+
job.upload(pathname.to_s)
|
296
|
+
archive_directory ? archive_file(job, pathname) : pathname.unlink
|
297
|
+
else
|
298
|
+
upload_default(job, pathname)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Archives the file for a job where there was no #file_store_upload or #upload method
|
303
|
+
def upload_default(job, pathname)
|
304
|
+
# The first argument must be a hash
|
305
|
+
job.arguments << {} if job.arguments.size == 0
|
306
|
+
job.arguments.first[:full_file_name] = archive_file(job, pathname)
|
307
|
+
end
|
308
|
+
|
309
|
+
# Move the file to the archive directory
|
310
|
+
#
|
311
|
+
# The archived file name is prefixed with the job id
|
312
|
+
#
|
313
|
+
# Returns [String] the fully qualified archived file name
|
314
|
+
#
|
315
|
+
# Note:
|
316
|
+
# - Works across partitions when the file and the archive are on different partitions
|
317
|
+
def archive_file(job, pathname)
|
318
|
+
target_path = archive_pathname
|
319
|
+
target_path.mkpath
|
320
|
+
target_file_name = target_path.join("#{job.id}_#{pathname.basename}")
|
321
|
+
# In case the file is being moved across partitions
|
322
|
+
FileUtils.move(pathname.to_s, target_file_name.to_s)
|
323
|
+
target_file_name.to_s
|
324
|
+
end
|
325
|
+
|
96
326
|
end
|
97
327
|
end
|