sidejob 3.0.1 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sidejob/port.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  module SideJob
2
2
  # Represents an input or output port from a Job
3
3
  class Port
4
+ # Returned by {#read} and {#default} to indicate no data
5
+ class None < Object; end
6
+
4
7
  attr_reader :job, :type, :name
5
8
 
6
9
  # @param job [SideJob::Job, SideJob::Worker]
@@ -10,7 +13,8 @@ module SideJob
10
13
  @job = job
11
14
  @type = type.to_sym
12
15
  @name = name.to_sym
13
- raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/ && name != '*'
16
+ raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/
17
+ check_exists
14
18
  end
15
19
 
16
20
  # @return [Boolean] True if two ports are equal
@@ -23,38 +27,6 @@ module SideJob
23
27
  self == other
24
28
  end
25
29
 
26
- # Returns the port options. Currently supported options are mode and default.
27
- # @return [Hash] Port options
28
- def options
29
- opts = {mode: mode}
30
-
31
- default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
32
- opts[:default] = parse_json(default) if default
33
-
34
- opts
35
- end
36
-
37
- # Reset the port options. Currently supported options are mode and default.
38
- # @param options [Hash] New port options
39
- def options=(options)
40
- options = options.symbolize_keys
41
- SideJob.redis.multi do |multi|
42
- multi.hset "#{@job.redis_key}:#{type}ports:mode", @name, options[:mode] || :queue
43
- if options.has_key?(:default)
44
- multi.hset "#{@job.redis_key}:#{type}ports:default", @name, options[:default].to_json
45
- else
46
- multi.hdel "#{@job.redis_key}:#{type}ports:default", @name
47
- end
48
- end
49
- end
50
-
51
- # @return [Symbol, nil] The port mode or nil if the port is invalid
52
- def mode
53
- mode = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:mode", @name)
54
- mode = mode.to_sym if mode
55
- mode
56
- end
57
-
58
30
  # Returns the number of items waiting on this port.
59
31
  # @return [Fixnum]
60
32
  def size
@@ -68,9 +40,10 @@ module SideJob
68
40
  end
69
41
 
70
42
  # Returns the port default value. To distinguish a null default value vs no default, use {#default?}.
71
- # @return [Object, nil] The default value on the port or nil if none
43
+ # @return [Object, None] The default value on the port or {SideJob::Port::None} if none
72
44
  def default
73
- parse_json SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
45
+ val = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
46
+ val ? parse_json(val) : None
74
47
  end
75
48
 
76
49
  # Returns if the port has a default value.
@@ -79,27 +52,31 @@ module SideJob
79
52
  SideJob.redis.hexists("#{@job.redis_key}:#{type}ports:default", @name)
80
53
  end
81
54
 
55
+ # Sets the port default value.
56
+ # @param val [Object, None] New JSON encodable default value or None to clear the default
57
+ def default=(val)
58
+ if val == None
59
+ SideJob.redis.hdel "#{@job.redis_key}:#{type}ports:default", @name
60
+ else
61
+ SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, val.to_json
62
+ end
63
+ end
64
+
82
65
  # Write data to the port. If port in an input port, runs the job.
83
- # The default operating mode for a port is :queue which means packets are read/written as a FIFO queue.
84
- # In :memory mode, writes do not enter the queue and instead overwrite the default port value.
85
66
  # @param data [Object] JSON encodable data to write to the port
86
67
  def write(data)
87
- case mode
88
- when :queue
89
- SideJob.redis.rpush redis_key, data.to_json
90
- @job.run if type == :in
91
- when :memory
92
- SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, data.to_json
93
- else
94
- raise "Missing port #{@name} or invalid mode #{mode}"
68
+ # For {SideJob::Worker#for_inputs}, if this is set, we instead set the port default on writes
69
+ if Thread.current[:sidejob_port_write_default]
70
+ self.default = data
71
+ else
72
+ SideJob.redis.rpush redis_key, data.to_json
95
73
  end
96
-
97
- @job.log({read: [], write: [log_port_data(self, [data])]})
74
+ @job.run(parent: type != :in) # run job if inport otherwise run parent
75
+ log(write: [ { port: self, data: [data] } ])
98
76
  end
99
77
 
100
78
  # Reads the oldest data from the port. Returns the default value if no data and there is a default.
101
- # @return [Object] First data from port
102
- # @raise [EOFError] Error raised if no data to be read
79
+ # @return [Object, None] First data from port or {SideJob::Port::None} if there is no data and no default.
103
80
  def read
104
81
  data = SideJob.redis.lpop(redis_key)
105
82
  if data
@@ -107,10 +84,10 @@ module SideJob
107
84
  elsif default?
108
85
  data = default
109
86
  else
110
- raise EOFError unless data
87
+ return None
111
88
  end
112
89
 
113
- @job.log({read: [log_port_data(self, [data])], write: []})
90
+ log(read: [ { port: self, data: [data] } ])
114
91
 
115
92
  data
116
93
  end
@@ -119,48 +96,57 @@ module SideJob
119
96
  # All data is read from the current port and written to the destination ports.
120
97
  # If the current port has a default value, the default is copied to all destination ports.
121
98
  # @param ports [Array<SideJob::Port>, SideJob::Port] Destination port(s)
122
- # @param metadata [Hash] If provided, the metadata is merged into the log entry
123
99
  # @return [Array<Object>] Returns all data on current port
124
- def connect_to(ports, metadata={})
100
+ def connect_to(ports)
125
101
  ports = [ports] unless ports.is_a?(Array)
126
- ports_by_mode = ports.group_by {|port| port.mode}
127
102
 
128
- default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
129
-
130
- # empty the port of all data
131
- data = SideJob.redis.multi do |multi|
103
+ # Get source port data and default
104
+ (default, data, trash) = result = SideJob.redis.multi do |multi|
105
+ multi.hget("#{@job.redis_key}:#{@type}ports:default", @name)
106
+ # get all and empty the port of all data
132
107
  multi.lrange redis_key, 0, -1
133
108
  multi.del redis_key
134
- end[0]
109
+ end
110
+
111
+ default = result[0]
112
+ data = result[1]
135
113
 
136
- to_run = Set.new
114
+ return data unless data.length > 0 || default
115
+
116
+ # Get destination port defaults
117
+ port_defaults = SideJob.redis.multi do |multi|
118
+ # port defaults
119
+ ports.each { |port| multi.hget("#{port.job.redis_key}:#{port.type}ports:default", port.name) }
120
+ end
137
121
 
138
122
  SideJob.redis.multi do |multi|
139
123
  if data.length > 0
140
- (ports_by_mode[:queue] || []).each do |port|
124
+ ports.each_with_index do |port, i|
141
125
  multi.rpush port.redis_key, data
142
- to_run.add port.job if port.type == :in
143
- end
144
- if ! default
145
- (ports_by_mode[:memory] || []).each do |port|
146
- multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, data.last
147
- end
148
126
  end
149
127
  end
150
128
 
151
129
  if default
152
- ports.each do |port|
153
- multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, default
130
+ ports.each_with_index do |port, i|
131
+ if default != port_defaults[i]
132
+ multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, default
133
+ end
154
134
  end
155
135
  end
156
136
  end
157
137
 
158
138
  data.map! {|x| parse_json x}
159
139
  if data.length > 0
160
- SideJob.log metadata.merge({read: [log_port_data(self, data)], write: ports.map { |port| log_port_data(port, data)}})
140
+ log(read: [{ port: self, data: data }], write: ports.map { |port| {port: port, data: data} })
141
+ end
142
+
143
+ # Run the port job or parent only if something was changed
144
+ ports.each_with_index do |port, i|
145
+ if data.length > 0 || default != port_defaults[i]
146
+ port.job.run(parent: port.type != :in)
147
+ end
161
148
  end
162
149
 
163
- to_run.each { |job| job.run }
164
150
  data
165
151
  end
166
152
 
@@ -171,7 +157,6 @@ module SideJob
171
157
  while size > 0 do
172
158
  yield read
173
159
  end
174
- rescue EOFError
175
160
  end
176
161
 
177
162
  # Returns the redis key used for storing inputs or outputs from a port name
@@ -186,21 +171,68 @@ module SideJob
186
171
  redis_key.hash
187
172
  end
188
173
 
174
+ # Groups all port reads and writes within the block into a single logged event.
175
+ def self.log_group(&block)
176
+ outermost = ! Thread.current[:sidejob_port_group]
177
+ Thread.current[:sidejob_port_group] ||= {read: {}, write: {}} # port -> [data]
178
+ yield
179
+ ensure
180
+ if outermost
181
+ self._really_log Thread.current[:sidejob_port_group]
182
+ Thread.current[:sidejob_port_group] = nil
183
+ end
184
+ end
185
+
189
186
  private
190
187
 
191
- def log_port_data(port, data)
192
- x = {job: port.job.id, data: data}
193
- x[:"#{port.type}port"] = port.name
194
- x
188
+ def self._really_log(entry)
189
+ return unless entry && (entry[:read].length > 0 || entry[:write].length > 0)
190
+
191
+ log_entry = {}
192
+ %i{read write}.each do |type|
193
+ log_entry[type] = entry[type].map do |port, data|
194
+ x = {job: port.job.id, data: data}
195
+ x[:"#{port.type}port"] = port.name
196
+ x
197
+ end
198
+ end
199
+
200
+ SideJob.log log_entry
201
+ end
202
+
203
+ def log(data)
204
+ entry = Thread.current[:sidejob_port_group] ? Thread.current[:sidejob_port_group] : {read: {}, write: {}}
205
+ %i{read write}.each do |type|
206
+ (data[type] || []).each do |x|
207
+ entry[type][x[:port]] ||= []
208
+ entry[type][x[:port]].concat JSON.parse(x[:data].to_json) # serialize/deserialize to do a deep copy
209
+ end
210
+ end
211
+ if ! Thread.current[:sidejob_port_group]
212
+ self.class._really_log(entry)
213
+ end
195
214
  end
196
215
 
197
216
  # Wrapper around JSON.parse to also handle primitive types.
198
- # @param data [String, nil] Data to parse
217
+ # @param data [String] Data to parse
199
218
  # @return [Object, nil]
200
219
  def parse_json(data)
201
- raise "Invalid json #{data}" if data && ! data.is_a?(String)
202
- data = JSON.parse("[#{data}]")[0] if data
203
- data
220
+ JSON.parse("[#{data}]")[0]
221
+ end
222
+
223
+ # Check if the port exists, dynamically creating it if it does not exist and a * port exists for the job
224
+ # @raise [RuntimeError] Error raised if port does not exist
225
+ def check_exists
226
+ return if SideJob.redis.sismember "#{@job.redis_key}:#{type}ports", @name
227
+ dynamic = SideJob.redis.sismember("#{@job.redis_key}:#{type}ports", '*')
228
+ raise "Job #{@job.id} does not have #{@type}port #{@name}!" unless dynamic
229
+ dynamic_default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", '*')
230
+ SideJob.redis.multi do |multi|
231
+ multi.sadd "#{@job.redis_key}:#{type}ports", @name
232
+ if dynamic_default
233
+ multi.hset "#{@job.redis_key}:#{type}ports:default", @name, dynamic_default
234
+ end
235
+ end
204
236
  end
205
237
  end
206
238
  end
@@ -5,13 +5,6 @@ module SideJob
5
5
  # For simplicity, a job is allowed to be queued multiple times in the Sidekiq queue
6
6
  # Only when it gets pulled out to be run, i.e. here, we decide if we want to actually run it
7
7
  class ServerMiddleware
8
- # Configuration parameters for running workers
9
- CONFIGURATION = {
10
- lock_expiration: 86400, # the worker should not run longer than this number of seconds
11
- max_depth: 20, # the job should not be nested more than this number of levels
12
- max_runs_per_minute: 60, # generate error if the job is run more often than this
13
- }
14
-
15
8
  # Called by sidekiq as a server middleware to handle running a worker
16
9
  # @param worker [SideJob::Worker]
17
10
  # @param msg [Hash] Sidekiq message format
@@ -20,12 +13,45 @@ module SideJob
20
13
  @worker = worker
21
14
  return unless @worker.exists? # make sure the job has not been deleted
22
15
 
16
+ # only run if status is queued or terminating
23
17
  case @worker.status
24
- when 'queued'
25
- run_worker { yield }
26
- when 'terminating'
27
- terminate_worker
28
- # for any other status, we assume this worker does not need to be run
18
+ when 'queued', 'terminating'
19
+ else
20
+ return
21
+ end
22
+
23
+ # We use the presence of this lock:worker key to indicate that a worker is trying to the get the job lock.
24
+ # No other worker needs to also wait and no calls to {SideJob::Job#run} need to queue a new run.
25
+ return unless SideJob.redis.set("#{@worker.redis_key}:lock:worker", 1, {nx: true, ex: 2})
26
+
27
+ # Obtain a lock to allow only one worker to run at a time to simplify workers from having to deal with concurrency
28
+ token = @worker.lock(CONFIGURATION[:lock_expiration])
29
+ if token
30
+ begin
31
+ SideJob.redis.del "#{@worker.redis_key}:lock:worker"
32
+ SideJob.log_context(job: @worker.id) do
33
+ case @worker.status
34
+ when 'queued'
35
+ run_worker { yield }
36
+ when 'terminating'
37
+ terminate_worker
38
+ # else no longer need running
39
+ end
40
+ end
41
+ ensure
42
+ @worker.unlock(token)
43
+ @worker.run(parent: true) # run the parent every time worker runs
44
+ end
45
+ else
46
+ SideJob.redis.del "#{@worker.redis_key}:lock:worker"
47
+ # Unable to obtain job lock which may indicate another worker thread is running
48
+ # Schedule another run
49
+ # Note that the actual time before requeue depends on sidekiq poll_interval (default 15 seconds)
50
+ case @worker.status
51
+ when 'queued', 'terminating'
52
+ @worker.run(wait: 1)
53
+ # else no longer need running
54
+ end
29
55
  end
30
56
  end
31
57
 
@@ -39,64 +65,44 @@ module SideJob
39
65
  add_exception e
40
66
  ensure
41
67
  @worker.status = 'terminated'
42
- @worker.parent.run if @worker.parent
43
68
  end
44
69
 
45
70
  def run_worker(&block)
46
71
  # limit each job to being called too many times per minute
47
- # or too deep of a job tree
48
- # this is to help prevent bad coding that leads to recursive busy loops
72
+ # this is to help prevent bad coding that leads to infinite looping
49
73
  # Uses Rate limiter 1 pattern from http://redis.io/commands/INCR
50
74
  rate_key = "#{@worker.redis_key}:rate:#{Time.now.to_i / 60}"
51
75
  rate = SideJob.redis.multi do |multi|
52
76
  multi.incr rate_key
53
- multi.expire rate_key, 300 # 5 minutes
77
+ multi.expire rate_key, 60
54
78
  end[0]
55
79
 
56
80
  if rate.to_i > CONFIGURATION[:max_runs_per_minute]
57
- SideJob.log({ job: @worker.id, error: 'Job was terminated due to being called too rapidly' })
58
- return @worker.terminate
59
- elsif SideJob.redis.llen("#{@worker.redis_key}:ancestors") > CONFIGURATION[:max_depth]
60
- SideJob.log({ job: @worker.id, error: 'Job was terminated due to being too deep' })
61
- return @worker.terminate
62
- end
63
-
64
- # if another thread is already running this job, we don't run the job now
65
- # this simplifies workers from having to deal with thread safety
66
- # we will requeue the job in the other thread
67
- lock = "#{@worker.redis_key}:lock"
68
- now = Time.now.to_f
69
- val = SideJob.redis.multi do |multi|
70
- multi.get(lock)
71
- multi.set(lock, now, {ex: CONFIGURATION[:lock_expiration]}) # add an expiration just in case the lock becomes stale
72
- end[0]
73
-
74
- return if val # only run if lock key was not set
75
-
76
- begin
81
+ SideJob.log({ error: 'Job was terminated due to being called too rapidly' })
82
+ @worker.terminate
83
+ else
84
+ # normal run
77
85
  @worker.set ran_at: SideJob.timestamp
78
86
  @worker.status = 'running'
79
87
  yield
80
88
  @worker.status = 'completed' if @worker.status == 'running'
81
- rescue SideJob::Worker::Suspended
82
- @worker.status = 'suspended' if @worker.status == 'running'
83
- rescue => e
84
- @worker.status = 'failed' if @worker.status == 'running'
85
- add_exception e
86
- ensure
87
- val = SideJob.redis.multi do |multi|
88
- multi.get lock
89
- multi.del lock
90
- end[0]
91
-
92
- @worker.run if val && val.to_f != now # run it again if the lock key changed
93
- @worker.parent.run if @worker.parent
94
89
  end
90
+ rescue SideJob::Worker::Suspended
91
+ @worker.status = 'suspended' if @worker.status == 'running'
92
+ rescue => e
93
+ # only set failed if not terminating/terminated
94
+ case @worker.status
95
+ when 'terminating', 'terminated'
96
+ else
97
+ @worker.status = 'failed'
98
+ end
99
+
100
+ add_exception e
95
101
  end
96
102
 
97
103
  def add_exception(exception)
98
104
  # only store the backtrace until the first sidekiq line
99
- SideJob.log({ job: @worker.id, error: exception.message, backtrace: exception.backtrace.take_while {|l| l !~ /sidekiq/}.join("\n") })
105
+ SideJob.log({ error: exception.message, backtrace: exception.backtrace.take_while {|l| l !~ /sidekiq/}.join("\n") })
100
106
  end
101
107
  end
102
108
  end
@@ -37,8 +37,6 @@ module SideJob
37
37
  worker.perform(*args)
38
38
  end
39
39
 
40
- reload
41
-
42
40
  if errors && status == 'failed'
43
41
  SideJob.logs.each do |event|
44
42
  if event['error']
@@ -1,4 +1,4 @@
1
1
  module SideJob
2
2
  # The current SideJob version
3
- VERSION = '3.0.1'
3
+ VERSION = '4.0.1'
4
4
  end