sidejob 3.0.1 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/README.md +10 -14
- data/lib/sidejob.rb +29 -23
- data/lib/sidejob/job.rb +183 -213
- data/lib/sidejob/port.rb +112 -80
- data/lib/sidejob/server_middleware.rb +56 -50
- data/lib/sidejob/testing.rb +0 -2
- data/lib/sidejob/version.rb +1 -1
- data/lib/sidejob/worker.rb +28 -46
- data/spec/integration/fib_spec.rb +8 -4
- data/spec/integration/sum_spec.rb +0 -1
- data/spec/sidejob/job_spec.rb +323 -241
- data/spec/sidejob/port_spec.rb +152 -138
- data/spec/sidejob/server_middleware_spec.rb +27 -47
- data/spec/sidejob/worker_spec.rb +16 -84
- data/spec/sidejob_spec.rb +39 -16
- data/web/Gemfile +6 -0
- data/web/Gemfile.lock +43 -0
- data/web/app.rb +205 -0
- data/web/config.ru +14 -0
- metadata +6 -2
data/lib/sidejob/port.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
module SideJob
|
2
2
|
# Represents an input or output port from a Job
|
3
3
|
class Port
|
4
|
+
# Returned by {#read} and {#default} to indicate no data
|
5
|
+
class None < Object; end
|
6
|
+
|
4
7
|
attr_reader :job, :type, :name
|
5
8
|
|
6
9
|
# @param job [SideJob::Job, SideJob::Worker]
|
@@ -10,7 +13,8 @@ module SideJob
|
|
10
13
|
@job = job
|
11
14
|
@type = type.to_sym
|
12
15
|
@name = name.to_sym
|
13
|
-
raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/
|
16
|
+
raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/
|
17
|
+
check_exists
|
14
18
|
end
|
15
19
|
|
16
20
|
# @return [Boolean] True if two ports are equal
|
@@ -23,38 +27,6 @@ module SideJob
|
|
23
27
|
self == other
|
24
28
|
end
|
25
29
|
|
26
|
-
# Returns the port options. Currently supported options are mode and default.
|
27
|
-
# @return [Hash] Port options
|
28
|
-
def options
|
29
|
-
opts = {mode: mode}
|
30
|
-
|
31
|
-
default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
|
32
|
-
opts[:default] = parse_json(default) if default
|
33
|
-
|
34
|
-
opts
|
35
|
-
end
|
36
|
-
|
37
|
-
# Reset the port options. Currently supported options are mode and default.
|
38
|
-
# @param options [Hash] New port options
|
39
|
-
def options=(options)
|
40
|
-
options = options.symbolize_keys
|
41
|
-
SideJob.redis.multi do |multi|
|
42
|
-
multi.hset "#{@job.redis_key}:#{type}ports:mode", @name, options[:mode] || :queue
|
43
|
-
if options.has_key?(:default)
|
44
|
-
multi.hset "#{@job.redis_key}:#{type}ports:default", @name, options[:default].to_json
|
45
|
-
else
|
46
|
-
multi.hdel "#{@job.redis_key}:#{type}ports:default", @name
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# @return [Symbol, nil] The port mode or nil if the port is invalid
|
52
|
-
def mode
|
53
|
-
mode = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:mode", @name)
|
54
|
-
mode = mode.to_sym if mode
|
55
|
-
mode
|
56
|
-
end
|
57
|
-
|
58
30
|
# Returns the number of items waiting on this port.
|
59
31
|
# @return [Fixnum]
|
60
32
|
def size
|
@@ -68,9 +40,10 @@ module SideJob
|
|
68
40
|
end
|
69
41
|
|
70
42
|
# Returns the port default value. To distinguish a null default value vs no default, use {#default?}.
|
71
|
-
# @return [Object,
|
43
|
+
# @return [Object, None] The default value on the port or {SideJob::Port::None} if none
|
72
44
|
def default
|
73
|
-
|
45
|
+
val = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
|
46
|
+
val ? parse_json(val) : None
|
74
47
|
end
|
75
48
|
|
76
49
|
# Returns if the port has a default value.
|
@@ -79,27 +52,31 @@ module SideJob
|
|
79
52
|
SideJob.redis.hexists("#{@job.redis_key}:#{type}ports:default", @name)
|
80
53
|
end
|
81
54
|
|
55
|
+
# Sets the port default value.
|
56
|
+
# @param val [Object, None] New JSON encodable default value or None to clear the default
|
57
|
+
def default=(val)
|
58
|
+
if val == None
|
59
|
+
SideJob.redis.hdel "#{@job.redis_key}:#{type}ports:default", @name
|
60
|
+
else
|
61
|
+
SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, val.to_json
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
82
65
|
# Write data to the port. If port in an input port, runs the job.
|
83
|
-
# The default operating mode for a port is :queue which means packets are read/written as a FIFO queue.
|
84
|
-
# In :memory mode, writes do not enter the queue and instead overwrite the default port value.
|
85
66
|
# @param data [Object] JSON encodable data to write to the port
|
86
67
|
def write(data)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, data.to_json
|
93
|
-
else
|
94
|
-
raise "Missing port #{@name} or invalid mode #{mode}"
|
68
|
+
# For {SideJob::Worker#for_inputs}, if this is set, we instead set the port default on writes
|
69
|
+
if Thread.current[:sidejob_port_write_default]
|
70
|
+
self.default = data
|
71
|
+
else
|
72
|
+
SideJob.redis.rpush redis_key, data.to_json
|
95
73
|
end
|
96
|
-
|
97
|
-
|
74
|
+
@job.run(parent: type != :in) # run job if inport otherwise run parent
|
75
|
+
log(write: [ { port: self, data: [data] } ])
|
98
76
|
end
|
99
77
|
|
100
78
|
# Reads the oldest data from the port. Returns the default value if no data and there is a default.
|
101
|
-
# @return [Object] First data from port
|
102
|
-
# @raise [EOFError] Error raised if no data to be read
|
79
|
+
# @return [Object, None] First data from port or {SideJob::Port::None} if there is no data and no default.
|
103
80
|
def read
|
104
81
|
data = SideJob.redis.lpop(redis_key)
|
105
82
|
if data
|
@@ -107,10 +84,10 @@ module SideJob
|
|
107
84
|
elsif default?
|
108
85
|
data = default
|
109
86
|
else
|
110
|
-
|
87
|
+
return None
|
111
88
|
end
|
112
89
|
|
113
|
-
|
90
|
+
log(read: [ { port: self, data: [data] } ])
|
114
91
|
|
115
92
|
data
|
116
93
|
end
|
@@ -119,48 +96,57 @@ module SideJob
|
|
119
96
|
# All data is read from the current port and written to the destination ports.
|
120
97
|
# If the current port has a default value, the default is copied to all destination ports.
|
121
98
|
# @param ports [Array<SideJob::Port>, SideJob::Port] Destination port(s)
|
122
|
-
# @param metadata [Hash] If provided, the metadata is merged into the log entry
|
123
99
|
# @return [Array<Object>] Returns all data on current port
|
124
|
-
def connect_to(ports
|
100
|
+
def connect_to(ports)
|
125
101
|
ports = [ports] unless ports.is_a?(Array)
|
126
|
-
ports_by_mode = ports.group_by {|port| port.mode}
|
127
102
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
103
|
+
# Get source port data and default
|
104
|
+
(default, data, trash) = result = SideJob.redis.multi do |multi|
|
105
|
+
multi.hget("#{@job.redis_key}:#{@type}ports:default", @name)
|
106
|
+
# get all and empty the port of all data
|
132
107
|
multi.lrange redis_key, 0, -1
|
133
108
|
multi.del redis_key
|
134
|
-
end
|
109
|
+
end
|
110
|
+
|
111
|
+
default = result[0]
|
112
|
+
data = result[1]
|
135
113
|
|
136
|
-
|
114
|
+
return data unless data.length > 0 || default
|
115
|
+
|
116
|
+
# Get destination port defaults
|
117
|
+
port_defaults = SideJob.redis.multi do |multi|
|
118
|
+
# port defaults
|
119
|
+
ports.each { |port| multi.hget("#{port.job.redis_key}:#{port.type}ports:default", port.name) }
|
120
|
+
end
|
137
121
|
|
138
122
|
SideJob.redis.multi do |multi|
|
139
123
|
if data.length > 0
|
140
|
-
|
124
|
+
ports.each_with_index do |port, i|
|
141
125
|
multi.rpush port.redis_key, data
|
142
|
-
to_run.add port.job if port.type == :in
|
143
|
-
end
|
144
|
-
if ! default
|
145
|
-
(ports_by_mode[:memory] || []).each do |port|
|
146
|
-
multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, data.last
|
147
|
-
end
|
148
126
|
end
|
149
127
|
end
|
150
128
|
|
151
129
|
if default
|
152
|
-
ports.
|
153
|
-
|
130
|
+
ports.each_with_index do |port, i|
|
131
|
+
if default != port_defaults[i]
|
132
|
+
multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, default
|
133
|
+
end
|
154
134
|
end
|
155
135
|
end
|
156
136
|
end
|
157
137
|
|
158
138
|
data.map! {|x| parse_json x}
|
159
139
|
if data.length > 0
|
160
|
-
|
140
|
+
log(read: [{ port: self, data: data }], write: ports.map { |port| {port: port, data: data} })
|
141
|
+
end
|
142
|
+
|
143
|
+
# Run the port job or parent only if something was changed
|
144
|
+
ports.each_with_index do |port, i|
|
145
|
+
if data.length > 0 || default != port_defaults[i]
|
146
|
+
port.job.run(parent: port.type != :in)
|
147
|
+
end
|
161
148
|
end
|
162
149
|
|
163
|
-
to_run.each { |job| job.run }
|
164
150
|
data
|
165
151
|
end
|
166
152
|
|
@@ -171,7 +157,6 @@ module SideJob
|
|
171
157
|
while size > 0 do
|
172
158
|
yield read
|
173
159
|
end
|
174
|
-
rescue EOFError
|
175
160
|
end
|
176
161
|
|
177
162
|
# Returns the redis key used for storing inputs or outputs from a port name
|
@@ -186,21 +171,68 @@ module SideJob
|
|
186
171
|
redis_key.hash
|
187
172
|
end
|
188
173
|
|
174
|
+
# Groups all port reads and writes within the block into a single logged event.
|
175
|
+
def self.log_group(&block)
|
176
|
+
outermost = ! Thread.current[:sidejob_port_group]
|
177
|
+
Thread.current[:sidejob_port_group] ||= {read: {}, write: {}} # port -> [data]
|
178
|
+
yield
|
179
|
+
ensure
|
180
|
+
if outermost
|
181
|
+
self._really_log Thread.current[:sidejob_port_group]
|
182
|
+
Thread.current[:sidejob_port_group] = nil
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
189
186
|
private
|
190
187
|
|
191
|
-
def
|
192
|
-
|
193
|
-
|
194
|
-
|
188
|
+
def self._really_log(entry)
|
189
|
+
return unless entry && (entry[:read].length > 0 || entry[:write].length > 0)
|
190
|
+
|
191
|
+
log_entry = {}
|
192
|
+
%i{read write}.each do |type|
|
193
|
+
log_entry[type] = entry[type].map do |port, data|
|
194
|
+
x = {job: port.job.id, data: data}
|
195
|
+
x[:"#{port.type}port"] = port.name
|
196
|
+
x
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
SideJob.log log_entry
|
201
|
+
end
|
202
|
+
|
203
|
+
def log(data)
|
204
|
+
entry = Thread.current[:sidejob_port_group] ? Thread.current[:sidejob_port_group] : {read: {}, write: {}}
|
205
|
+
%i{read write}.each do |type|
|
206
|
+
(data[type] || []).each do |x|
|
207
|
+
entry[type][x[:port]] ||= []
|
208
|
+
entry[type][x[:port]].concat JSON.parse(x[:data].to_json) # serialize/deserialize to do a deep copy
|
209
|
+
end
|
210
|
+
end
|
211
|
+
if ! Thread.current[:sidejob_port_group]
|
212
|
+
self.class._really_log(entry)
|
213
|
+
end
|
195
214
|
end
|
196
215
|
|
197
216
|
# Wrapper around JSON.parse to also handle primitive types.
|
198
|
-
# @param data [String
|
217
|
+
# @param data [String] Data to parse
|
199
218
|
# @return [Object, nil]
|
200
219
|
def parse_json(data)
|
201
|
-
|
202
|
-
|
203
|
-
|
220
|
+
JSON.parse("[#{data}]")[0]
|
221
|
+
end
|
222
|
+
|
223
|
+
# Check if the port exists, dynamically creating it if it does not exist and a * port exists for the job
|
224
|
+
# @raise [RuntimeError] Error raised if port does not exist
|
225
|
+
def check_exists
|
226
|
+
return if SideJob.redis.sismember "#{@job.redis_key}:#{type}ports", @name
|
227
|
+
dynamic = SideJob.redis.sismember("#{@job.redis_key}:#{type}ports", '*')
|
228
|
+
raise "Job #{@job.id} does not have #{@type}port #{@name}!" unless dynamic
|
229
|
+
dynamic_default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", '*')
|
230
|
+
SideJob.redis.multi do |multi|
|
231
|
+
multi.sadd "#{@job.redis_key}:#{type}ports", @name
|
232
|
+
if dynamic_default
|
233
|
+
multi.hset "#{@job.redis_key}:#{type}ports:default", @name, dynamic_default
|
234
|
+
end
|
235
|
+
end
|
204
236
|
end
|
205
237
|
end
|
206
238
|
end
|
@@ -5,13 +5,6 @@ module SideJob
|
|
5
5
|
# For simplicity, a job is allowed to be queued multiple times in the Sidekiq queue
|
6
6
|
# Only when it gets pulled out to be run, i.e. here, we decide if we want to actually run it
|
7
7
|
class ServerMiddleware
|
8
|
-
# Configuration parameters for running workers
|
9
|
-
CONFIGURATION = {
|
10
|
-
lock_expiration: 86400, # the worker should not run longer than this number of seconds
|
11
|
-
max_depth: 20, # the job should not be nested more than this number of levels
|
12
|
-
max_runs_per_minute: 60, # generate error if the job is run more often than this
|
13
|
-
}
|
14
|
-
|
15
8
|
# Called by sidekiq as a server middleware to handle running a worker
|
16
9
|
# @param worker [SideJob::Worker]
|
17
10
|
# @param msg [Hash] Sidekiq message format
|
@@ -20,12 +13,45 @@ module SideJob
|
|
20
13
|
@worker = worker
|
21
14
|
return unless @worker.exists? # make sure the job has not been deleted
|
22
15
|
|
16
|
+
# only run if status is queued or terminating
|
23
17
|
case @worker.status
|
24
|
-
when 'queued'
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
18
|
+
when 'queued', 'terminating'
|
19
|
+
else
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
# We use the presence of this lock:worker key to indicate that a worker is trying to the get the job lock.
|
24
|
+
# No other worker needs to also wait and no calls to {SideJob::Job#run} need to queue a new run.
|
25
|
+
return unless SideJob.redis.set("#{@worker.redis_key}:lock:worker", 1, {nx: true, ex: 2})
|
26
|
+
|
27
|
+
# Obtain a lock to allow only one worker to run at a time to simplify workers from having to deal with concurrency
|
28
|
+
token = @worker.lock(CONFIGURATION[:lock_expiration])
|
29
|
+
if token
|
30
|
+
begin
|
31
|
+
SideJob.redis.del "#{@worker.redis_key}:lock:worker"
|
32
|
+
SideJob.log_context(job: @worker.id) do
|
33
|
+
case @worker.status
|
34
|
+
when 'queued'
|
35
|
+
run_worker { yield }
|
36
|
+
when 'terminating'
|
37
|
+
terminate_worker
|
38
|
+
# else no longer need running
|
39
|
+
end
|
40
|
+
end
|
41
|
+
ensure
|
42
|
+
@worker.unlock(token)
|
43
|
+
@worker.run(parent: true) # run the parent every time worker runs
|
44
|
+
end
|
45
|
+
else
|
46
|
+
SideJob.redis.del "#{@worker.redis_key}:lock:worker"
|
47
|
+
# Unable to obtain job lock which may indicate another worker thread is running
|
48
|
+
# Schedule another run
|
49
|
+
# Note that the actual time before requeue depends on sidekiq poll_interval (default 15 seconds)
|
50
|
+
case @worker.status
|
51
|
+
when 'queued', 'terminating'
|
52
|
+
@worker.run(wait: 1)
|
53
|
+
# else no longer need running
|
54
|
+
end
|
29
55
|
end
|
30
56
|
end
|
31
57
|
|
@@ -39,64 +65,44 @@ module SideJob
|
|
39
65
|
add_exception e
|
40
66
|
ensure
|
41
67
|
@worker.status = 'terminated'
|
42
|
-
@worker.parent.run if @worker.parent
|
43
68
|
end
|
44
69
|
|
45
70
|
def run_worker(&block)
|
46
71
|
# limit each job to being called too many times per minute
|
47
|
-
#
|
48
|
-
# this is to help prevent bad coding that leads to recursive busy loops
|
72
|
+
# this is to help prevent bad coding that leads to infinite looping
|
49
73
|
# Uses Rate limiter 1 pattern from http://redis.io/commands/INCR
|
50
74
|
rate_key = "#{@worker.redis_key}:rate:#{Time.now.to_i / 60}"
|
51
75
|
rate = SideJob.redis.multi do |multi|
|
52
76
|
multi.incr rate_key
|
53
|
-
multi.expire rate_key,
|
77
|
+
multi.expire rate_key, 60
|
54
78
|
end[0]
|
55
79
|
|
56
80
|
if rate.to_i > CONFIGURATION[:max_runs_per_minute]
|
57
|
-
SideJob.log({
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
return @worker.terminate
|
62
|
-
end
|
63
|
-
|
64
|
-
# if another thread is already running this job, we don't run the job now
|
65
|
-
# this simplifies workers from having to deal with thread safety
|
66
|
-
# we will requeue the job in the other thread
|
67
|
-
lock = "#{@worker.redis_key}:lock"
|
68
|
-
now = Time.now.to_f
|
69
|
-
val = SideJob.redis.multi do |multi|
|
70
|
-
multi.get(lock)
|
71
|
-
multi.set(lock, now, {ex: CONFIGURATION[:lock_expiration]}) # add an expiration just in case the lock becomes stale
|
72
|
-
end[0]
|
73
|
-
|
74
|
-
return if val # only run if lock key was not set
|
75
|
-
|
76
|
-
begin
|
81
|
+
SideJob.log({ error: 'Job was terminated due to being called too rapidly' })
|
82
|
+
@worker.terminate
|
83
|
+
else
|
84
|
+
# normal run
|
77
85
|
@worker.set ran_at: SideJob.timestamp
|
78
86
|
@worker.status = 'running'
|
79
87
|
yield
|
80
88
|
@worker.status = 'completed' if @worker.status == 'running'
|
81
|
-
rescue SideJob::Worker::Suspended
|
82
|
-
@worker.status = 'suspended' if @worker.status == 'running'
|
83
|
-
rescue => e
|
84
|
-
@worker.status = 'failed' if @worker.status == 'running'
|
85
|
-
add_exception e
|
86
|
-
ensure
|
87
|
-
val = SideJob.redis.multi do |multi|
|
88
|
-
multi.get lock
|
89
|
-
multi.del lock
|
90
|
-
end[0]
|
91
|
-
|
92
|
-
@worker.run if val && val.to_f != now # run it again if the lock key changed
|
93
|
-
@worker.parent.run if @worker.parent
|
94
89
|
end
|
90
|
+
rescue SideJob::Worker::Suspended
|
91
|
+
@worker.status = 'suspended' if @worker.status == 'running'
|
92
|
+
rescue => e
|
93
|
+
# only set failed if not terminating/terminated
|
94
|
+
case @worker.status
|
95
|
+
when 'terminating', 'terminated'
|
96
|
+
else
|
97
|
+
@worker.status = 'failed'
|
98
|
+
end
|
99
|
+
|
100
|
+
add_exception e
|
95
101
|
end
|
96
102
|
|
97
103
|
def add_exception(exception)
|
98
104
|
# only store the backtrace until the first sidekiq line
|
99
|
-
SideJob.log({
|
105
|
+
SideJob.log({ error: exception.message, backtrace: exception.backtrace.take_while {|l| l !~ /sidekiq/}.join("\n") })
|
100
106
|
end
|
101
107
|
end
|
102
108
|
end
|
data/lib/sidejob/testing.rb
CHANGED
data/lib/sidejob/version.rb
CHANGED