rocketjob 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +6 -7
- data/lib/rocket_job/cli.rb +14 -15
- data/lib/rocket_job/concerns/singleton.rb +33 -0
- data/lib/rocket_job/concerns/worker.rb +70 -20
- data/lib/rocket_job/config.rb +3 -1
- data/lib/rocket_job/dirmon_entry.rb +260 -30
- data/lib/rocket_job/heartbeat.rb +3 -0
- data/lib/rocket_job/job.rb +77 -154
- data/lib/rocket_job/job_exception.rb +8 -6
- data/lib/rocket_job/jobs/dirmon_job.rb +26 -102
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +40 -31
- data/lib/rocketjob.rb +26 -9
- data/test/dirmon_entry_test.rb +197 -31
- data/test/dirmon_job_test.rb +91 -188
- data/test/job_test.rb +148 -30
- data/test/job_worker_test.rb +23 -22
- data/test/test_helper.rb +9 -9
- data/test/worker_test.rb +8 -4
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9720cccfb90fd2afdf83b04785f5133c97078fbd
|
4
|
+
data.tar.gz: 352782e7591790ada1f3ab8b584202f1d666b65d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47cfcc68c411faace343f526240c036e1655c6d55e9de1cdbaee5292ba09400de0b442db667a7a894a6489d96d2494e721d5cbcdec5c9fab0358e158592dfc52
|
7
|
+
data.tar.gz: 431732d7fe08636f15654108b8d6efcc134f588fcedf93cc9b635b5ed7205d2d7add0b80c27f14cb85c3b792b115e9ec2eb39b39756bac2fc0fd09b26c36eb04
|
data/Rakefile
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
require 'rake/clean'
|
2
2
|
require 'rake/testtask'
|
3
3
|
|
4
|
-
|
5
|
-
require 'rocket_job/version'
|
4
|
+
require_relative 'lib/rocket_job/version'
|
6
5
|
|
7
6
|
task :gem do
|
8
|
-
system
|
7
|
+
system 'gem build rocketjob.gemspec'
|
9
8
|
end
|
10
9
|
|
11
|
-
task :
|
10
|
+
task publish: :gem do
|
12
11
|
system "git tag -a v#{RocketJob::VERSION} -m 'Tagging #{RocketJob::VERSION}'"
|
13
|
-
system
|
12
|
+
system 'git push --tags'
|
14
13
|
system "gem push rocketjob-#{RocketJob::VERSION}.gem"
|
15
14
|
system "rm rocketjob-#{RocketJob::VERSION}.gem"
|
16
15
|
end
|
17
16
|
|
18
|
-
desc
|
17
|
+
desc 'Run Test Suite'
|
19
18
|
task :test do
|
20
19
|
Rake::TestTask.new(:functional) do |t|
|
21
20
|
t.test_files = FileList['test/**/*_test.rb']
|
@@ -25,4 +24,4 @@ task :test do
|
|
25
24
|
Rake::Task['functional'].invoke
|
26
25
|
end
|
27
26
|
|
28
|
-
task :
|
27
|
+
task default: :test
|
data/lib/rocket_job/cli.rb
CHANGED
@@ -5,25 +5,24 @@ module RocketJob
|
|
5
5
|
attr_reader :name, :threads, :environment, :pidfile, :directory, :quiet
|
6
6
|
|
7
7
|
def initialize(argv)
|
8
|
-
@name
|
9
|
-
@threads
|
10
|
-
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@
|
14
|
-
@directory = '.'
|
8
|
+
@name = nil
|
9
|
+
@threads = nil
|
10
|
+
@quiet = false
|
11
|
+
@environment = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
|
12
|
+
@pidfile = nil
|
13
|
+
@directory = '.'
|
15
14
|
parse(argv)
|
16
15
|
end
|
17
16
|
|
18
17
|
# Run a RocketJob::Worker from the command line
|
19
18
|
def run
|
20
|
-
SemanticLogger.add_appender(STDOUT,
|
19
|
+
SemanticLogger.add_appender(STDOUT, &SemanticLogger::Appender::Base.colorized_formatter) unless quiet
|
21
20
|
boot_rails if defined?(:Rails)
|
22
21
|
write_pidfile
|
23
22
|
|
24
|
-
opts
|
25
|
-
opts[:name]
|
26
|
-
opts[:max_threads]
|
23
|
+
opts = {}
|
24
|
+
opts[:name] = name if name
|
25
|
+
opts[:max_threads] = threads if threads
|
27
26
|
Worker.run(opts)
|
28
27
|
end
|
29
28
|
|
@@ -33,7 +32,7 @@ module RocketJob
|
|
33
32
|
if Rails.configuration.eager_load
|
34
33
|
RocketJob::Worker.logger.benchmark_info('Eager loaded Rails and all Engines') do
|
35
34
|
Rails.application.eager_load!
|
36
|
-
Rails::Engine.subclasses.each
|
35
|
+
Rails::Engine.subclasses.each(&:eager_load!)
|
37
36
|
end
|
38
37
|
end
|
39
38
|
end
|
@@ -41,18 +40,18 @@ module RocketJob
|
|
41
40
|
# Create a PID file if requested
|
42
41
|
def write_pidfile
|
43
42
|
return unless pidfile
|
44
|
-
pid =
|
43
|
+
pid = $PID
|
45
44
|
File.open(pidfile, 'w') { |f| f.puts(pid) }
|
46
45
|
|
47
46
|
# Remove pidfile on exit
|
48
47
|
at_exit do
|
49
|
-
File.delete(pidfile) if pid ==
|
48
|
+
File.delete(pidfile) if pid == $PID
|
50
49
|
end
|
51
50
|
end
|
52
51
|
|
53
52
|
# Parse command line options placing results in the corresponding instance variables
|
54
53
|
def parse(argv)
|
55
|
-
parser
|
54
|
+
parser = OptionParser.new do |o|
|
56
55
|
o.on('-n', '--name NAME', 'Unique Name of this worker instance (Default: hostname:PID)') { |arg| @name = arg }
|
57
56
|
o.on('-t', '--threads COUNT', 'Number of worker threads to start') { |arg| @threads = arg.to_i }
|
58
57
|
o.on('-q', '--quiet', 'Do not write to stdout, only to logfile. Necessary when running as a daemon') { @quiet = true }
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'active_support/concern'
|
3
|
+
|
4
|
+
# Worker behavior for a job
|
5
|
+
module RocketJob
|
6
|
+
module Concerns
|
7
|
+
module Singleton
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
# Start the single instance of this job
|
12
|
+
#
|
13
|
+
# Returns true if the job was started
|
14
|
+
# Returns false if the job is already running and doe not need to be started
|
15
|
+
def self.start(*args, &block)
|
16
|
+
# Prevent multiple Jobs of the same class from running at the same time
|
17
|
+
return false if where(state: [:running, :queued]).count > 0
|
18
|
+
|
19
|
+
perform_later(*args, &block)
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
# TODO Make :perform_later, :perform_now, :perform, :now protected/private
|
24
|
+
# class << self
|
25
|
+
# # Ensure that only one instance of the job is running.
|
26
|
+
# protected :perform_later, :perform_now, :perform, :now
|
27
|
+
# end
|
28
|
+
#self.send(:protected, :perform_later)
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -7,9 +7,6 @@ module RocketJob
|
|
7
7
|
def self.included(base)
|
8
8
|
base.extend ClassMethods
|
9
9
|
base.class_eval do
|
10
|
-
# While working on a slice, the current slice is available via this reader
|
11
|
-
attr_reader :rocket_job_slice
|
12
|
-
|
13
10
|
@rocket_job_defaults = nil
|
14
11
|
end
|
15
12
|
end
|
@@ -28,7 +25,7 @@ module RocketJob
|
|
28
25
|
|
29
26
|
# Create a job and process it immediately in-line by this thread
|
30
27
|
def now(method, *args, &block)
|
31
|
-
job
|
28
|
+
job = build(method, *args, &block)
|
32
29
|
worker = RocketJob::Worker.new(name: 'inline')
|
33
30
|
worker.started
|
34
31
|
job.start
|
@@ -71,11 +68,61 @@ module RocketJob
|
|
71
68
|
@rocket_job_defaults = block
|
72
69
|
self
|
73
70
|
end
|
74
|
-
end
|
75
71
|
|
76
|
-
|
77
|
-
#
|
78
|
-
|
72
|
+
# Returns the next job to work on in priority based order
|
73
|
+
# Returns nil if there are currently no queued jobs, or processing batch jobs
|
74
|
+
# with records that require processing
|
75
|
+
#
|
76
|
+
# Parameters
|
77
|
+
# worker_name [String]
|
78
|
+
# Name of the worker that will be processing this job
|
79
|
+
#
|
80
|
+
# skip_job_ids [Array<BSON::ObjectId>]
|
81
|
+
# Job ids to exclude when looking for the next job
|
82
|
+
#
|
83
|
+
# Note:
|
84
|
+
# If a job is in queued state it will be started
|
85
|
+
def next_job(worker_name, skip_job_ids = nil)
|
86
|
+
query = {
|
87
|
+
'$and' => [
|
88
|
+
{
|
89
|
+
'$or' => [
|
90
|
+
{'state' => 'queued'}, # Jobs
|
91
|
+
{'state' => 'running', 'sub_state' => :processing} # Slices
|
92
|
+
]
|
93
|
+
},
|
94
|
+
{
|
95
|
+
'$or' => [
|
96
|
+
{run_at: {'$exists' => false}},
|
97
|
+
{run_at: {'$lte' => Time.now}}
|
98
|
+
]
|
99
|
+
}
|
100
|
+
]
|
101
|
+
}
|
102
|
+
query['_id'] = {'$nin' => skip_job_ids} if skip_job_ids && skip_job_ids.size > 0
|
103
|
+
|
104
|
+
while (doc = find_and_modify(
|
105
|
+
query: query,
|
106
|
+
sort: [['priority', 'asc'], ['created_at', 'asc']],
|
107
|
+
update: {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
|
108
|
+
))
|
109
|
+
job = load(doc)
|
110
|
+
if job.running?
|
111
|
+
return job
|
112
|
+
else
|
113
|
+
if job.expired?
|
114
|
+
job.destroy
|
115
|
+
logger.info "Destroyed expired job #{job.class.name}, id:#{job.id}"
|
116
|
+
else
|
117
|
+
# Also update in-memory state and run call-backs
|
118
|
+
job.start
|
119
|
+
job.set(started_at: job.started_at)
|
120
|
+
return job
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
79
126
|
end
|
80
127
|
|
81
128
|
# Works on this job
|
@@ -87,22 +134,24 @@ module RocketJob
|
|
87
134
|
#
|
88
135
|
# Thread-safe, can be called by multiple threads at the same time
|
89
136
|
def work(worker)
|
90
|
-
raise 'Job must be started before calling #work' unless running?
|
137
|
+
raise(ArgumentError, 'Job must be started before calling #work') unless running?
|
91
138
|
begin
|
92
139
|
# before_perform
|
93
140
|
call_method(perform_method, arguments, event: :before, log_level: log_level)
|
94
141
|
|
95
142
|
# perform
|
96
|
-
call_method(perform_method, arguments, log_level: log_level)
|
143
|
+
ret = call_method(perform_method, arguments, log_level: log_level)
|
97
144
|
if self.collect_output?
|
98
|
-
self.
|
145
|
+
self.result = (ret.is_a?(Hash) || ret.is_a?(BSON::OrderedHash)) ? ret : {result: ret}
|
99
146
|
end
|
100
147
|
|
101
148
|
# after_perform
|
102
149
|
call_method(perform_method, arguments, event: :after, log_level: log_level)
|
150
|
+
|
103
151
|
complete!
|
104
|
-
rescue
|
105
|
-
|
152
|
+
rescue StandardError => exc
|
153
|
+
fail!(worker.name, exc) unless failed?
|
154
|
+
logger.error("Exception running #{self.class.name}##{perform_method}", exc)
|
106
155
|
raise exc if RocketJob::Config.inline_mode
|
107
156
|
end
|
108
157
|
false
|
@@ -131,27 +180,28 @@ module RocketJob
|
|
131
180
|
# Log level to apply to silence logging during the call
|
132
181
|
# Default: nil ( no change )
|
133
182
|
#
|
134
|
-
def call_method(method, arguments, options={})
|
135
|
-
options
|
136
|
-
event
|
137
|
-
log_level
|
183
|
+
def call_method(method, arguments, options = {})
|
184
|
+
options = options.dup
|
185
|
+
event = options.delete(:event)
|
186
|
+
log_level = options.delete(:log_level)
|
138
187
|
raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
|
139
188
|
|
140
189
|
the_method = event.nil? ? method : "#{event}_#{method}".to_sym
|
141
190
|
if respond_to?(the_method)
|
142
191
|
method_name = "#{self.class.name}##{the_method}"
|
143
192
|
logger.info "Start #{method_name}"
|
144
|
-
logger.benchmark_info(
|
193
|
+
logger.benchmark_info(
|
194
|
+
"Completed #{method_name}",
|
145
195
|
metric: "rocketjob/#{self.class.name.underscore}/#{the_method}",
|
146
196
|
log_exception: :full,
|
147
197
|
on_exception_level: :error,
|
148
198
|
silence: log_level
|
149
199
|
) do
|
150
|
-
|
200
|
+
send(the_method, *arguments)
|
151
201
|
end
|
152
202
|
end
|
153
203
|
end
|
154
204
|
|
155
205
|
end
|
156
206
|
end
|
157
|
-
end
|
207
|
+
end
|
data/lib/rocket_job/config.rb
CHANGED
@@ -14,7 +14,7 @@ module RocketJob
|
|
14
14
|
sync_cattr_reader(:instance) do
|
15
15
|
begin
|
16
16
|
first || create
|
17
|
-
rescue
|
17
|
+
rescue StandardError
|
18
18
|
# In case another process has already created the first document
|
19
19
|
first
|
20
20
|
end
|
@@ -24,6 +24,7 @@ module RocketJob
|
|
24
24
|
# No worker processes will be created, nor threads created
|
25
25
|
sync_cattr_accessor(:inline_mode) { false }
|
26
26
|
|
27
|
+
# @formatter:off
|
27
28
|
# The maximum number of worker threads to create on any one worker
|
28
29
|
key :max_worker_threads, Integer, default: 10
|
29
30
|
|
@@ -47,6 +48,7 @@ module RocketJob
|
|
47
48
|
# Limit the number of workers per job class per worker
|
48
49
|
# 'class_name' / group => 100
|
49
50
|
#key :limits, Hash
|
51
|
+
# @formatter:on
|
50
52
|
|
51
53
|
# Replace the MongoMapper default mongo connection for holding jobs
|
52
54
|
def self.mongo_connection=(connection)
|
@@ -1,28 +1,39 @@
|
|
1
|
+
require 'thread_safe'
|
2
|
+
require 'pathname'
|
3
|
+
require 'fileutils'
|
1
4
|
module RocketJob
|
2
5
|
class DirmonEntry
|
3
6
|
include MongoMapper::Document
|
7
|
+
include AASM
|
4
8
|
|
5
|
-
#
|
6
|
-
# in
|
9
|
+
# @formatter:off
|
10
|
+
# User defined name used to identify this DirmonEntry in Mission Control
|
7
11
|
key :name, String
|
8
12
|
|
9
|
-
#
|
13
|
+
# Pattern for finding files
|
10
14
|
#
|
11
|
-
# Example:
|
12
|
-
# input_files/process1/*.csv
|
15
|
+
# Example: All files ending in '.csv' in the input_files/process1 directory
|
16
|
+
# input_files/process1/*.csv
|
17
|
+
#
|
18
|
+
# Example: All files in the input_files/process1 directory and all sub-directories
|
13
19
|
# input_files/process2/**/*
|
14
20
|
#
|
15
|
-
#
|
21
|
+
# Example: All files in the input_files/process2 directory with .csv or .txt extensions
|
22
|
+
# input_files/process2/*.{csv,txt}
|
23
|
+
#
|
24
|
+
# For details on valid pattern values, see: http://ruby-doc.org/core-2.2.2/Dir.html#method-c-glob
|
16
25
|
#
|
17
26
|
# Note
|
18
|
-
# - If there
|
19
|
-
|
27
|
+
# - If there is no '*' in the pattern then an exact filename match is expected
|
28
|
+
# - The pattern is not validated to ensure the path exists, it will be validated against the
|
29
|
+
# `whitelist_paths` when processed by DirmonJob
|
30
|
+
key :pattern, String
|
20
31
|
|
21
|
-
# Job to
|
32
|
+
# Job to enqueue for processing for every file that matches the pattern
|
22
33
|
#
|
23
34
|
# Example:
|
24
35
|
# "ProcessItJob"
|
25
|
-
key :
|
36
|
+
key :job_class_name, String
|
26
37
|
|
27
38
|
# Any user supplied arguments for the method invocation
|
28
39
|
# All keys must be UTF-8 strings. The values can be any valid BSON type:
|
@@ -52,46 +63,265 @@ module RocketJob
|
|
52
63
|
#
|
53
64
|
# If supplied, the file will be moved to this directory before the job is started
|
54
65
|
# If the file was in a sub-directory, the corresponding sub-directory will
|
55
|
-
# be created in the archive directory
|
56
|
-
# is a relative path. (I.e. Does not start with '/') .
|
66
|
+
# be created in the archive directory.
|
57
67
|
key :archive_directory, String
|
58
68
|
|
59
|
-
# Allow a monitoring path to be temporarily disabled
|
60
|
-
key :enabled, Boolean, default: true
|
61
|
-
|
62
69
|
# Method to perform on the job, usually :perform
|
63
70
|
key :perform_method, Symbol, default: :perform
|
64
71
|
|
65
|
-
#
|
66
|
-
|
67
|
-
|
72
|
+
# If this DirmonEntry is in the failed state, exception contains the cause
|
73
|
+
one :exception, class_name: 'RocketJob::JobException'
|
74
|
+
|
75
|
+
# The maximum number of files that should ever match during a single poll of the pattern.
|
76
|
+
#
|
77
|
+
# Too many files could be as a result of an invalid pattern specification.
|
78
|
+
# Exceeding this number will result in an exception being logged in a failed Dirmon instance.
|
79
|
+
# Dirmon processing will continue with new instances.
|
80
|
+
# TODO: Implement max_hits
|
81
|
+
#key :max_hits, Integer, default: 100
|
82
|
+
|
83
|
+
#
|
84
|
+
# Read-only attributes
|
85
|
+
#
|
86
|
+
|
87
|
+
# Current state, as set by AASM
|
88
|
+
key :state, Symbol, default: :pending
|
89
|
+
|
90
|
+
# State Machine events and transitions
|
91
|
+
#
|
92
|
+
# :pending -> :enabled -> :disabled
|
93
|
+
# -> :failed
|
94
|
+
# -> :failed -> :active
|
95
|
+
# -> :disabled
|
96
|
+
# -> :disabled -> :active
|
97
|
+
aasm column: :state do
|
98
|
+
# DirmonEntry is `pending` until it is approved
|
99
|
+
state :pending, initial: true
|
100
|
+
|
101
|
+
# DirmonEntry is Enabled and will be included by DirmonJob
|
102
|
+
state :enabled
|
103
|
+
|
104
|
+
# DirmonEntry failed during processing and requires manual intervention
|
105
|
+
# See the exception for the reason for failing this entry
|
106
|
+
# For example: access denied, whitelist_path security violation, etc.
|
107
|
+
state :failed
|
108
|
+
|
109
|
+
# DirmonEntry has been manually disabled
|
110
|
+
state :disabled
|
111
|
+
|
112
|
+
event :enable do
|
113
|
+
transitions from: :pending, to: :enabled
|
114
|
+
transitions from: :disabled, to: :enabled
|
115
|
+
end
|
116
|
+
|
117
|
+
event :disable do
|
118
|
+
transitions from: :enabled, to: :disabled
|
119
|
+
transitions from: :failed, to: :disabled
|
120
|
+
end
|
121
|
+
|
122
|
+
event :fail do
|
123
|
+
transitions from: :enabled, to: :failed
|
124
|
+
end
|
68
125
|
end
|
69
126
|
|
70
|
-
|
127
|
+
# @formatter:on
|
128
|
+
validates_presence_of :pattern, :job_class_name, :perform_method
|
71
129
|
|
72
|
-
validates_each :
|
73
|
-
|
74
|
-
|
75
|
-
exists = value.nil? ? false : value.constantize.ancestors.include?(RocketJob::Job)
|
76
|
-
rescue NameError => exc
|
130
|
+
validates_each :perform_method do |record, attr, value|
|
131
|
+
if (klass = record.job_class) && !klass.instance_method(value)
|
132
|
+
record.errors.add(attr, "Method not implemented by #{record.job_class_name}")
|
77
133
|
end
|
78
|
-
|
134
|
+
end
|
135
|
+
|
136
|
+
validates_each :job_class_name do |record, attr, value|
|
137
|
+
exists =
|
138
|
+
begin
|
139
|
+
value.nil? ? false : record.job_class.ancestors.include?(RocketJob::Job)
|
140
|
+
rescue NameError
|
141
|
+
false
|
142
|
+
end
|
143
|
+
record.errors.add(attr, 'job_class_name must be defined and must be derived from RocketJob::Job') unless exists
|
79
144
|
end
|
80
145
|
|
81
146
|
validates_each :arguments do |record, attr, value|
|
82
|
-
if klass = record.job_class
|
147
|
+
if (klass = record.job_class)
|
83
148
|
count = klass.argument_count(record.perform_method)
|
84
|
-
record.errors.add(attr, "There must be #{count} argument(s)") if
|
149
|
+
record.errors.add(attr, "There must be #{count} argument(s)") if value.size != count
|
85
150
|
end
|
86
151
|
end
|
87
152
|
|
88
153
|
validates_each :properties do |record, attr, value|
|
89
|
-
if record.
|
90
|
-
value.each_pair do |
|
91
|
-
record.errors.add(attr, "Unknown property: #{
|
154
|
+
if record.job_class && (methods = record.job_class.instance_methods)
|
155
|
+
value.each_pair do |k, v|
|
156
|
+
record.errors.add(attr, "Unknown property: #{k.inspect} with value: #{v}") unless methods.include?("#{k}=".to_sym)
|
92
157
|
end
|
93
158
|
end
|
94
159
|
end
|
95
160
|
|
161
|
+
# Create indexes
|
162
|
+
def self.create_indexes
|
163
|
+
# Unique index on pattern to help prevent two entries from scanning the same files
|
164
|
+
ensure_index({pattern: 1}, background: true, unique: true)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Security Settings
|
168
|
+
#
|
169
|
+
# A whitelist of paths from which to process files.
|
170
|
+
# This prevents accidental or malicious `pattern`s from processing files from anywhere
|
171
|
+
# in the system that the user under which Dirmon is running can access.
|
172
|
+
#
|
173
|
+
# All resolved `pattern`s must start with one of the whitelisted path, otherwise they will be rejected
|
174
|
+
#
|
175
|
+
# Note:
|
176
|
+
# - If no whitelist paths have been added, then a whitelist check is _not_ performed
|
177
|
+
# - Relative paths can be used, but are not considered safe since they can be manipulated
|
178
|
+
# - These paths should be assigned in an initializer and not editable via the Web UI to ensure
|
179
|
+
# that they are not tampered with
|
180
|
+
#
|
181
|
+
# Default: [] ==> Do not enforce whitelists
|
182
|
+
#
|
183
|
+
# Returns [Array<String>] a copy of the whitelisted paths
|
184
|
+
def self.whitelist_paths
|
185
|
+
@@whitelist_paths.dup
|
186
|
+
end
|
187
|
+
|
188
|
+
# Add a path to the whitelist
|
189
|
+
# Raises: Errno::ENOENT: No such file or directory
|
190
|
+
def self.add_whitelist_path(path)
|
191
|
+
# Confirms that path exists
|
192
|
+
path = Pathname.new(path).realpath.to_s
|
193
|
+
@@whitelist_paths << path
|
194
|
+
@@whitelist_paths.uniq!
|
195
|
+
path
|
196
|
+
end
|
197
|
+
|
198
|
+
# Deletes a path from the whitelist paths
|
199
|
+
# Raises: Errno::ENOENT: No such file or directory
|
200
|
+
def self.delete_whitelist_path(path)
|
201
|
+
# Confirms that path exists
|
202
|
+
path = Pathname.new(path).realpath.to_s
|
203
|
+
@@whitelist_paths.delete(path)
|
204
|
+
@@whitelist_paths.uniq!
|
205
|
+
path
|
206
|
+
end
|
207
|
+
|
208
|
+
# The default archive directory that is used when the job being queued does not respond
|
209
|
+
# to #file_store_upload or #upload, and do not have an `archive_directory` specified in this entry
|
210
|
+
cattr_accessor :default_archive_directory
|
211
|
+
|
212
|
+
@@default_archive_directory = '_archive'.freeze
|
213
|
+
|
214
|
+
# Returns [Pathname] the archive_directory if set, otherwise the default_archive_directory
|
215
|
+
def archive_pathname
|
216
|
+
Pathname.new(archive_directory || self.class.default_archive_directory)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Passes each filename [Pathname] found that matches the pattern into the supplied block
|
220
|
+
def each(&block)
|
221
|
+
logger.tagged("DirmonEntry:#{id}") do
|
222
|
+
Pathname.glob(pattern).each do |pathname|
|
223
|
+
next if pathname.directory?
|
224
|
+
pathname = pathname.realpath
|
225
|
+
file_name = pathname.to_s
|
226
|
+
|
227
|
+
# Skip archive directories
|
228
|
+
next if file_name.start_with?(archive_pathname.realpath.to_s)
|
229
|
+
|
230
|
+
# Security check?
|
231
|
+
if (@@whitelist_paths.size > 0) && @@whitelist_paths.none? { |whitepath| file_name.start_with?(whitepath) }
|
232
|
+
logger.warn "Ignoring file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
|
233
|
+
next
|
234
|
+
end
|
235
|
+
|
236
|
+
# File must be writable so it can be removed after processing
|
237
|
+
unless pathname.writable?
|
238
|
+
logger.warn "Ignoring file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
|
239
|
+
next
|
240
|
+
end
|
241
|
+
block.call(pathname)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# Set exception information for this DirmonEntry and fail it
|
247
|
+
def fail_with_exception!(worker_name, exc_or_message)
|
248
|
+
if exc_or_message.is_a?(Exception)
|
249
|
+
self.exception = JobException.from_exception(exc_or_message)
|
250
|
+
exception.worker_name = worker_name
|
251
|
+
else
|
252
|
+
build_exception(
|
253
|
+
class_name: 'RocketJob::DirmonEntryException',
|
254
|
+
message: exc_or_message,
|
255
|
+
backtrace: [],
|
256
|
+
worker_name: worker_name
|
257
|
+
)
|
258
|
+
end
|
259
|
+
fail!
|
260
|
+
end
|
261
|
+
|
262
|
+
@@whitelist_paths = ThreadSafe::Array.new
|
263
|
+
|
264
|
+
# Returns the Job to be queued
|
265
|
+
def job_class
|
266
|
+
return if job_class_name.nil?
|
267
|
+
job_class_name.constantize
|
268
|
+
rescue NameError
|
269
|
+
nil
|
270
|
+
end
|
271
|
+
|
272
|
+
# Queues the job for the supplied pathname
|
273
|
+
def later(pathname)
|
274
|
+
job_class.perform_later(*arguments) do |job|
|
275
|
+
job.perform_method = perform_method
|
276
|
+
# Set properties
|
277
|
+
properties.each_pair { |k, v| job.send("#{k}=".to_sym, v) }
|
278
|
+
|
279
|
+
upload_file(job, pathname)
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
protected
|
284
|
+
|
285
|
+
# Upload the file to the job
|
286
|
+
def upload_file(job, pathname)
|
287
|
+
if job.respond_to?(:file_store_upload)
|
288
|
+
# Allow the job to determine what to do with the file
|
289
|
+
# Pass the pathname as a string, not a Pathname (IO) instance
|
290
|
+
# so that it can read the file directly
|
291
|
+
job.file_store_upload(pathname.to_s)
|
292
|
+
archive_directory ? archive_file(job, pathname) : pathname.unlink
|
293
|
+
elsif job.respond_to?(:upload)
|
294
|
+
# With RocketJob Pro the file can be uploaded directly into the Job itself
|
295
|
+
job.upload(pathname.to_s)
|
296
|
+
archive_directory ? archive_file(job, pathname) : pathname.unlink
|
297
|
+
else
|
298
|
+
upload_default(job, pathname)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Archives the file for a job where there was no #file_store_upload or #upload method
|
303
|
+
def upload_default(job, pathname)
|
304
|
+
# The first argument must be a hash
|
305
|
+
job.arguments << {} if job.arguments.size == 0
|
306
|
+
job.arguments.first[:full_file_name] = archive_file(job, pathname)
|
307
|
+
end
|
308
|
+
|
309
|
+
# Move the file to the archive directory
|
310
|
+
#
|
311
|
+
# The archived file name is prefixed with the job id
|
312
|
+
#
|
313
|
+
# Returns [String] the fully qualified archived file name
|
314
|
+
#
|
315
|
+
# Note:
|
316
|
+
# - Works across partitions when the file and the archive are on different partitions
|
317
|
+
def archive_file(job, pathname)
|
318
|
+
target_path = archive_pathname
|
319
|
+
target_path.mkpath
|
320
|
+
target_file_name = target_path.join("#{job.id}_#{pathname.basename}")
|
321
|
+
# In case the file is being moved across partitions
|
322
|
+
FileUtils.move(pathname.to_s, target_file_name.to_s)
|
323
|
+
target_file_name.to_s
|
324
|
+
end
|
325
|
+
|
96
326
|
end
|
97
327
|
end
|