sidejob 3.0.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sidejob/port.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  module SideJob
2
2
  # Represents an input or output port from a Job
3
3
  class Port
4
+ # Returned by {#read} and {#default} to indicate no data
5
+ class None < Object; end
6
+
4
7
  attr_reader :job, :type, :name
5
8
 
6
9
  # @param job [SideJob::Job, SideJob::Worker]
@@ -10,7 +13,8 @@ module SideJob
10
13
  @job = job
11
14
  @type = type.to_sym
12
15
  @name = name.to_sym
13
- raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/ && name != '*'
16
+ raise "Invalid port name: #{@name}" if @name !~ /^[a-zA-Z0-9_]+$/
17
+ check_exists
14
18
  end
15
19
 
16
20
  # @return [Boolean] True if two ports are equal
@@ -23,38 +27,6 @@ module SideJob
23
27
  self == other
24
28
  end
25
29
 
26
- # Returns the port options. Currently supported options are mode and default.
27
- # @return [Hash] Port options
28
- def options
29
- opts = {mode: mode}
30
-
31
- default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
32
- opts[:default] = parse_json(default) if default
33
-
34
- opts
35
- end
36
-
37
- # Reset the port options. Currently supported options are mode and default.
38
- # @param options [Hash] New port options
39
- def options=(options)
40
- options = options.symbolize_keys
41
- SideJob.redis.multi do |multi|
42
- multi.hset "#{@job.redis_key}:#{type}ports:mode", @name, options[:mode] || :queue
43
- if options.has_key?(:default)
44
- multi.hset "#{@job.redis_key}:#{type}ports:default", @name, options[:default].to_json
45
- else
46
- multi.hdel "#{@job.redis_key}:#{type}ports:default", @name
47
- end
48
- end
49
- end
50
-
51
- # @return [Symbol, nil] The port mode or nil if the port is invalid
52
- def mode
53
- mode = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:mode", @name)
54
- mode = mode.to_sym if mode
55
- mode
56
- end
57
-
58
30
  # Returns the number of items waiting on this port.
59
31
  # @return [Fixnum]
60
32
  def size
@@ -68,9 +40,10 @@ module SideJob
68
40
  end
69
41
 
70
42
  # Returns the port default value. To distinguish a null default value vs no default, use {#default?}.
71
- # @return [Object, nil] The default value on the port or nil if none
43
+ # @return [Object, None] The default value on the port or {SideJob::Port::None} if none
72
44
  def default
73
- parse_json SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
45
+ val = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
46
+ val ? parse_json(val) : None
74
47
  end
75
48
 
76
49
  # Returns if the port has a default value.
@@ -79,27 +52,31 @@ module SideJob
79
52
  SideJob.redis.hexists("#{@job.redis_key}:#{type}ports:default", @name)
80
53
  end
81
54
 
55
+ # Sets the port default value.
56
+ # @param val [Object, None] New JSON encodable default value or None to clear the default
57
+ def default=(val)
58
+ if val == None
59
+ SideJob.redis.hdel "#{@job.redis_key}:#{type}ports:default", @name
60
+ else
61
+ SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, val.to_json
62
+ end
63
+ end
64
+
82
65
  # Write data to the port. If port in an input port, runs the job.
83
- # The default operating mode for a port is :queue which means packets are read/written as a FIFO queue.
84
- # In :memory mode, writes do not enter the queue and instead overwrite the default port value.
85
66
  # @param data [Object] JSON encodable data to write to the port
86
67
  def write(data)
87
- case mode
88
- when :queue
89
- SideJob.redis.rpush redis_key, data.to_json
90
- @job.run if type == :in
91
- when :memory
92
- SideJob.redis.hset "#{@job.redis_key}:#{type}ports:default", @name, data.to_json
93
- else
94
- raise "Missing port #{@name} or invalid mode #{mode}"
68
+ # For {SideJob::Worker#for_inputs}, if this is set, we instead set the port default on writes
69
+ if Thread.current[:sidejob_port_write_default]
70
+ self.default = data
71
+ else
72
+ SideJob.redis.rpush redis_key, data.to_json
95
73
  end
96
-
97
- @job.log({read: [], write: [log_port_data(self, [data])]})
74
+ @job.run(parent: type != :in) # run job if inport otherwise run parent
75
+ log(write: [ { port: self, data: [data] } ])
98
76
  end
99
77
 
100
78
  # Reads the oldest data from the port. Returns the default value if no data and there is a default.
101
- # @return [Object] First data from port
102
- # @raise [EOFError] Error raised if no data to be read
79
+ # @return [Object, None] First data from port or {SideJob::Port::None} if there is no data and no default.
103
80
  def read
104
81
  data = SideJob.redis.lpop(redis_key)
105
82
  if data
@@ -107,10 +84,10 @@ module SideJob
107
84
  elsif default?
108
85
  data = default
109
86
  else
110
- raise EOFError unless data
87
+ return None
111
88
  end
112
89
 
113
- @job.log({read: [log_port_data(self, [data])], write: []})
90
+ log(read: [ { port: self, data: [data] } ])
114
91
 
115
92
  data
116
93
  end
@@ -119,48 +96,57 @@ module SideJob
119
96
  # All data is read from the current port and written to the destination ports.
120
97
  # If the current port has a default value, the default is copied to all destination ports.
121
98
  # @param ports [Array<SideJob::Port>, SideJob::Port] Destination port(s)
122
- # @param metadata [Hash] If provided, the metadata is merged into the log entry
123
99
  # @return [Array<Object>] Returns all data on current port
124
- def connect_to(ports, metadata={})
100
+ def connect_to(ports)
125
101
  ports = [ports] unless ports.is_a?(Array)
126
- ports_by_mode = ports.group_by {|port| port.mode}
127
102
 
128
- default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", @name)
129
-
130
- # empty the port of all data
131
- data = SideJob.redis.multi do |multi|
103
+ # Get source port data and default
104
+ (default, data, trash) = result = SideJob.redis.multi do |multi|
105
+ multi.hget("#{@job.redis_key}:#{@type}ports:default", @name)
106
+ # get all and empty the port of all data
132
107
  multi.lrange redis_key, 0, -1
133
108
  multi.del redis_key
134
- end[0]
109
+ end
110
+
111
+ default = result[0]
112
+ data = result[1]
135
113
 
136
- to_run = Set.new
114
+ return data unless data.length > 0 || default
115
+
116
+ # Get destination port defaults
117
+ port_defaults = SideJob.redis.multi do |multi|
118
+ # port defaults
119
+ ports.each { |port| multi.hget("#{port.job.redis_key}:#{port.type}ports:default", port.name) }
120
+ end
137
121
 
138
122
  SideJob.redis.multi do |multi|
139
123
  if data.length > 0
140
- (ports_by_mode[:queue] || []).each do |port|
124
+ ports.each_with_index do |port, i|
141
125
  multi.rpush port.redis_key, data
142
- to_run.add port.job if port.type == :in
143
- end
144
- if ! default
145
- (ports_by_mode[:memory] || []).each do |port|
146
- multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, data.last
147
- end
148
126
  end
149
127
  end
150
128
 
151
129
  if default
152
- ports.each do |port|
153
- multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, default
130
+ ports.each_with_index do |port, i|
131
+ if default != port_defaults[i]
132
+ multi.hset "#{port.job.redis_key}:#{port.type}ports:default", port.name, default
133
+ end
154
134
  end
155
135
  end
156
136
  end
157
137
 
158
138
  data.map! {|x| parse_json x}
159
139
  if data.length > 0
160
- SideJob.log metadata.merge({read: [log_port_data(self, data)], write: ports.map { |port| log_port_data(port, data)}})
140
+ log(read: [{ port: self, data: data }], write: ports.map { |port| {port: port, data: data} })
141
+ end
142
+
143
+ # Run the port job or parent only if something was changed
144
+ ports.each_with_index do |port, i|
145
+ if data.length > 0 || default != port_defaults[i]
146
+ port.job.run(parent: port.type != :in)
147
+ end
161
148
  end
162
149
 
163
- to_run.each { |job| job.run }
164
150
  data
165
151
  end
166
152
 
@@ -171,7 +157,6 @@ module SideJob
171
157
  while size > 0 do
172
158
  yield read
173
159
  end
174
- rescue EOFError
175
160
  end
176
161
 
177
162
  # Returns the redis key used for storing inputs or outputs from a port name
@@ -186,21 +171,68 @@ module SideJob
186
171
  redis_key.hash
187
172
  end
188
173
 
174
+ # Groups all port reads and writes within the block into a single logged event.
175
+ def self.log_group(&block)
176
+ outermost = ! Thread.current[:sidejob_port_group]
177
+ Thread.current[:sidejob_port_group] ||= {read: {}, write: {}} # port -> [data]
178
+ yield
179
+ ensure
180
+ if outermost
181
+ self._really_log Thread.current[:sidejob_port_group]
182
+ Thread.current[:sidejob_port_group] = nil
183
+ end
184
+ end
185
+
189
186
  private
190
187
 
191
- def log_port_data(port, data)
192
- x = {job: port.job.id, data: data}
193
- x[:"#{port.type}port"] = port.name
194
- x
188
+ def self._really_log(entry)
189
+ return unless entry && (entry[:read].length > 0 || entry[:write].length > 0)
190
+
191
+ log_entry = {}
192
+ %i{read write}.each do |type|
193
+ log_entry[type] = entry[type].map do |port, data|
194
+ x = {job: port.job.id, data: data}
195
+ x[:"#{port.type}port"] = port.name
196
+ x
197
+ end
198
+ end
199
+
200
+ SideJob.log log_entry
201
+ end
202
+
203
+ def log(data)
204
+ entry = Thread.current[:sidejob_port_group] ? Thread.current[:sidejob_port_group] : {read: {}, write: {}}
205
+ %i{read write}.each do |type|
206
+ (data[type] || []).each do |x|
207
+ entry[type][x[:port]] ||= []
208
+ entry[type][x[:port]].concat JSON.parse(x[:data].to_json) # serialize/deserialize to do a deep copy
209
+ end
210
+ end
211
+ if ! Thread.current[:sidejob_port_group]
212
+ self.class._really_log(entry)
213
+ end
195
214
  end
196
215
 
197
216
  # Wrapper around JSON.parse to also handle primitive types.
198
- # @param data [String, nil] Data to parse
217
+ # @param data [String] Data to parse
199
218
  # @return [Object, nil]
200
219
  def parse_json(data)
201
- raise "Invalid json #{data}" if data && ! data.is_a?(String)
202
- data = JSON.parse("[#{data}]")[0] if data
203
- data
220
+ JSON.parse("[#{data}]")[0]
221
+ end
222
+
223
+ # Check if the port exists, dynamically creating it if it does not exist and a * port exists for the job
224
+ # @raise [RuntimeError] Error raised if port does not exist
225
+ def check_exists
226
+ return if SideJob.redis.sismember "#{@job.redis_key}:#{type}ports", @name
227
+ dynamic = SideJob.redis.sismember("#{@job.redis_key}:#{type}ports", '*')
228
+ raise "Job #{@job.id} does not have #{@type}port #{@name}!" unless dynamic
229
+ dynamic_default = SideJob.redis.hget("#{@job.redis_key}:#{type}ports:default", '*')
230
+ SideJob.redis.multi do |multi|
231
+ multi.sadd "#{@job.redis_key}:#{type}ports", @name
232
+ if dynamic_default
233
+ multi.hset "#{@job.redis_key}:#{type}ports:default", @name, dynamic_default
234
+ end
235
+ end
204
236
  end
205
237
  end
206
238
  end
@@ -5,13 +5,6 @@ module SideJob
5
5
  # For simplicity, a job is allowed to be queued multiple times in the Sidekiq queue
6
6
  # Only when it gets pulled out to be run, i.e. here, we decide if we want to actually run it
7
7
  class ServerMiddleware
8
- # Configuration parameters for running workers
9
- CONFIGURATION = {
10
- lock_expiration: 86400, # the worker should not run longer than this number of seconds
11
- max_depth: 20, # the job should not be nested more than this number of levels
12
- max_runs_per_minute: 60, # generate error if the job is run more often than this
13
- }
14
-
15
8
  # Called by sidekiq as a server middleware to handle running a worker
16
9
  # @param worker [SideJob::Worker]
17
10
  # @param msg [Hash] Sidekiq message format
@@ -20,12 +13,45 @@ module SideJob
20
13
  @worker = worker
21
14
  return unless @worker.exists? # make sure the job has not been deleted
22
15
 
16
+ # only run if status is queued or terminating
23
17
  case @worker.status
24
- when 'queued'
25
- run_worker { yield }
26
- when 'terminating'
27
- terminate_worker
28
- # for any other status, we assume this worker does not need to be run
18
+ when 'queued', 'terminating'
19
+ else
20
+ return
21
+ end
22
+
23
+ # We use the presence of this lock:worker key to indicate that a worker is trying to the get the job lock.
24
+ # No other worker needs to also wait and no calls to {SideJob::Job#run} need to queue a new run.
25
+ return unless SideJob.redis.set("#{@worker.redis_key}:lock:worker", 1, {nx: true, ex: 2})
26
+
27
+ # Obtain a lock to allow only one worker to run at a time to simplify workers from having to deal with concurrency
28
+ token = @worker.lock(CONFIGURATION[:lock_expiration])
29
+ if token
30
+ begin
31
+ SideJob.redis.del "#{@worker.redis_key}:lock:worker"
32
+ SideJob.log_context(job: @worker.id) do
33
+ case @worker.status
34
+ when 'queued'
35
+ run_worker { yield }
36
+ when 'terminating'
37
+ terminate_worker
38
+ # else no longer need running
39
+ end
40
+ end
41
+ ensure
42
+ @worker.unlock(token)
43
+ @worker.run(parent: true) # run the parent every time worker runs
44
+ end
45
+ else
46
+ SideJob.redis.del "#{@worker.redis_key}:lock:worker"
47
+ # Unable to obtain job lock which may indicate another worker thread is running
48
+ # Schedule another run
49
+ # Note that the actual time before requeue depends on sidekiq poll_interval (default 15 seconds)
50
+ case @worker.status
51
+ when 'queued', 'terminating'
52
+ @worker.run(wait: 1)
53
+ # else no longer need running
54
+ end
29
55
  end
30
56
  end
31
57
 
@@ -39,64 +65,44 @@ module SideJob
39
65
  add_exception e
40
66
  ensure
41
67
  @worker.status = 'terminated'
42
- @worker.parent.run if @worker.parent
43
68
  end
44
69
 
45
70
  def run_worker(&block)
46
71
  # limit each job to being called too many times per minute
47
- # or too deep of a job tree
48
- # this is to help prevent bad coding that leads to recursive busy loops
72
+ # this is to help prevent bad coding that leads to infinite looping
49
73
  # Uses Rate limiter 1 pattern from http://redis.io/commands/INCR
50
74
  rate_key = "#{@worker.redis_key}:rate:#{Time.now.to_i / 60}"
51
75
  rate = SideJob.redis.multi do |multi|
52
76
  multi.incr rate_key
53
- multi.expire rate_key, 300 # 5 minutes
77
+ multi.expire rate_key, 60
54
78
  end[0]
55
79
 
56
80
  if rate.to_i > CONFIGURATION[:max_runs_per_minute]
57
- SideJob.log({ job: @worker.id, error: 'Job was terminated due to being called too rapidly' })
58
- return @worker.terminate
59
- elsif SideJob.redis.llen("#{@worker.redis_key}:ancestors") > CONFIGURATION[:max_depth]
60
- SideJob.log({ job: @worker.id, error: 'Job was terminated due to being too deep' })
61
- return @worker.terminate
62
- end
63
-
64
- # if another thread is already running this job, we don't run the job now
65
- # this simplifies workers from having to deal with thread safety
66
- # we will requeue the job in the other thread
67
- lock = "#{@worker.redis_key}:lock"
68
- now = Time.now.to_f
69
- val = SideJob.redis.multi do |multi|
70
- multi.get(lock)
71
- multi.set(lock, now, {ex: CONFIGURATION[:lock_expiration]}) # add an expiration just in case the lock becomes stale
72
- end[0]
73
-
74
- return if val # only run if lock key was not set
75
-
76
- begin
81
+ SideJob.log({ error: 'Job was terminated due to being called too rapidly' })
82
+ @worker.terminate
83
+ else
84
+ # normal run
77
85
  @worker.set ran_at: SideJob.timestamp
78
86
  @worker.status = 'running'
79
87
  yield
80
88
  @worker.status = 'completed' if @worker.status == 'running'
81
- rescue SideJob::Worker::Suspended
82
- @worker.status = 'suspended' if @worker.status == 'running'
83
- rescue => e
84
- @worker.status = 'failed' if @worker.status == 'running'
85
- add_exception e
86
- ensure
87
- val = SideJob.redis.multi do |multi|
88
- multi.get lock
89
- multi.del lock
90
- end[0]
91
-
92
- @worker.run if val && val.to_f != now # run it again if the lock key changed
93
- @worker.parent.run if @worker.parent
94
89
  end
90
+ rescue SideJob::Worker::Suspended
91
+ @worker.status = 'suspended' if @worker.status == 'running'
92
+ rescue => e
93
+ # only set failed if not terminating/terminated
94
+ case @worker.status
95
+ when 'terminating', 'terminated'
96
+ else
97
+ @worker.status = 'failed'
98
+ end
99
+
100
+ add_exception e
95
101
  end
96
102
 
97
103
  def add_exception(exception)
98
104
  # only store the backtrace until the first sidekiq line
99
- SideJob.log({ job: @worker.id, error: exception.message, backtrace: exception.backtrace.take_while {|l| l !~ /sidekiq/}.join("\n") })
105
+ SideJob.log({ error: exception.message, backtrace: exception.backtrace.take_while {|l| l !~ /sidekiq/}.join("\n") })
100
106
  end
101
107
  end
102
108
  end
@@ -37,8 +37,6 @@ module SideJob
37
37
  worker.perform(*args)
38
38
  end
39
39
 
40
- reload
41
-
42
40
  if errors && status == 'failed'
43
41
  SideJob.logs.each do |event|
44
42
  if event['error']
@@ -1,4 +1,4 @@
1
1
  module SideJob
2
2
  # The current SideJob version
3
- VERSION = '3.0.1'
3
+ VERSION = '4.0.1'
4
4
  end