ultravisor 0.0.0.3.g8cf10dc
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.yardopts +1 -0
- data/LICENCE +674 -0
- data/README.md +451 -0
- data/lib/ultravisor.rb +216 -0
- data/lib/ultravisor/child.rb +481 -0
- data/lib/ultravisor/child/call.rb +16 -0
- data/lib/ultravisor/child/call_receiver.rb +13 -0
- data/lib/ultravisor/child/cast.rb +15 -0
- data/lib/ultravisor/child/cast_receiver.rb +10 -0
- data/lib/ultravisor/child/process_cast_call.rb +38 -0
- data/lib/ultravisor/error.rb +24 -0
- data/lib/ultravisor/logging_helpers.rb +32 -0
- data/ultravisor.gemspec +36 -0
- metadata +188 -0
@@ -0,0 +1,481 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "./logging_helpers"
|
4
|
+
|
5
|
+
class Ultravisor
|
6
|
+
class Child
|
7
|
+
include LoggingHelpers
|
8
|
+
|
9
|
+
attr_reader :id
|
10
|
+
|
11
|
+
def initialize(id:,
|
12
|
+
klass:,
|
13
|
+
args: [],
|
14
|
+
method:,
|
15
|
+
restart: :always,
|
16
|
+
restart_policy: {
|
17
|
+
period: 5,
|
18
|
+
max: 3,
|
19
|
+
delay: 1,
|
20
|
+
},
|
21
|
+
shutdown: {
|
22
|
+
method: nil,
|
23
|
+
timeout: 1,
|
24
|
+
},
|
25
|
+
logger: Logger.new("/dev/null"),
|
26
|
+
enable_castcall: false,
|
27
|
+
access: nil
|
28
|
+
)
|
29
|
+
|
30
|
+
@logger = logger
|
31
|
+
@id = id
|
32
|
+
|
33
|
+
@klass, @args, @method = klass, args, method
|
34
|
+
validate_kam
|
35
|
+
|
36
|
+
@restart = restart
|
37
|
+
validate_restart
|
38
|
+
|
39
|
+
@restart_policy = restart_policy
|
40
|
+
validate_restart_policy
|
41
|
+
|
42
|
+
@shutdown_spec = shutdown
|
43
|
+
validate_shutdown_spec
|
44
|
+
|
45
|
+
@access = access
|
46
|
+
validate_access
|
47
|
+
|
48
|
+
@enable_castcall = enable_castcall
|
49
|
+
|
50
|
+
@runtime_history = []
|
51
|
+
|
52
|
+
@spawn_m = Mutex.new
|
53
|
+
@spawn_cv = ConditionVariable.new
|
54
|
+
|
55
|
+
@shutdown_m = Mutex.new
|
56
|
+
end
|
57
|
+
|
58
|
+
def spawn(term_queue)
|
59
|
+
@spawn_m.synchronize do
|
60
|
+
@value = nil
|
61
|
+
@exception = nil
|
62
|
+
@start_time = Time.now
|
63
|
+
@instance = new_instance
|
64
|
+
|
65
|
+
@spawn_id = sid = rand
|
66
|
+
|
67
|
+
Thread.handle_interrupt(::Exception => :never, ::Numeric => :never) do
|
68
|
+
@thread = Thread.new do
|
69
|
+
Thread.current.name = "Ultravisor::Child(#{@id})"
|
70
|
+
logger.debug(logloc) { "Spawning new instance of #{@id}" }
|
71
|
+
|
72
|
+
begin
|
73
|
+
Thread.handle_interrupt(::Exception => :immediate, ::Numeric => :immediate) do
|
74
|
+
logger.debug(logloc) { "Calling #{@klass}##{@method} to start #{@id} running" }
|
75
|
+
@value = @instance.public_send(@method)
|
76
|
+
end
|
77
|
+
rescue Exception => ex
|
78
|
+
@exception = ex
|
79
|
+
ensure
|
80
|
+
@spawn_m.synchronize do
|
81
|
+
# Even if a thread gets whacked by Thread#kill, ensure blocks
|
82
|
+
# still get run. This is... wonderful! And terrifying!
|
83
|
+
|
84
|
+
termination_cleanup(term_queue) if @spawn_id == sid
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
@spawn_cv.broadcast
|
91
|
+
end
|
92
|
+
|
93
|
+
self
|
94
|
+
end
|
95
|
+
|
96
|
+
def shutdown(force: false)
|
97
|
+
@shutdown_m.synchronize do
|
98
|
+
th = nil
|
99
|
+
sid = nil
|
100
|
+
|
101
|
+
@spawn_m.synchronize do
|
102
|
+
return if @thread.nil? || @thread == Thread.current
|
103
|
+
|
104
|
+
# Take a reference to the running thread, so we don't need to
|
105
|
+
# keep acquiring spawn_m every time we want to do something
|
106
|
+
# with it -- which causes collisions when it comes time to
|
107
|
+
# wait on the terminating thread, which is itself is trying
|
108
|
+
# to acquire the same lock so it can cleanup.
|
109
|
+
th = @thread
|
110
|
+
sid = @spawn_id
|
111
|
+
|
112
|
+
# Let everyone know we're in shutdown mode
|
113
|
+
@shutting_down = true
|
114
|
+
end
|
115
|
+
|
116
|
+
if @shutdown_spec[:method] && !force
|
117
|
+
begin
|
118
|
+
@instance.public_send(@shutdown_spec[:method])
|
119
|
+
rescue Exception => ex
|
120
|
+
log_exception(ex) { "Unhandled exception when calling #{@shutdown_spec[:method].inspect} on child #{id}" }
|
121
|
+
th.kill
|
122
|
+
end
|
123
|
+
else
|
124
|
+
th.kill
|
125
|
+
end
|
126
|
+
|
127
|
+
unless th.join(@shutdown_spec[:timeout])
|
128
|
+
logger.info(logloc) { "Child instance for #{self.id} did not cleanly shutdown within #{@shutdown_spec[:timeout]} seconds; force-killing the thread" }
|
129
|
+
th.kill
|
130
|
+
end
|
131
|
+
|
132
|
+
# Last chance, bubs
|
133
|
+
unless th.join(0.1)
|
134
|
+
logger.error(logloc) { "Child thread for #{self.id} appears hung; abandoning thread #{th}" }
|
135
|
+
|
136
|
+
# If we get here, then the worker instance has seized up spectacularly,
|
137
|
+
# and the cleanup in the `spawn` ensure hasn't triggered, so we need
|
138
|
+
# to do the cleanup instead.
|
139
|
+
@spawn_m.synchronize do
|
140
|
+
termination_cleanup if @spawn_id == sid
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def wait
|
147
|
+
@spawn_m.synchronize do
|
148
|
+
@spawn_cv.wait(@spawn_m) while @thread
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def termination_exception
|
153
|
+
@spawn_m.synchronize do
|
154
|
+
@spawn_cv.wait(@spawn_m) while @thread
|
155
|
+
@exception
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def termination_value
|
160
|
+
@spawn_m.synchronize do
|
161
|
+
@spawn_cv.wait(@spawn_m) while @thread
|
162
|
+
@value
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def restart_delay
|
167
|
+
d = begin
|
168
|
+
case @restart_policy[:delay]
|
169
|
+
when Numeric
|
170
|
+
@restart_policy[:delay]
|
171
|
+
when Range
|
172
|
+
@restart_policy[:delay].first + (@restart_policy[:delay].last - @restart_policy[:delay].first) * rand
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
[0, d].max
|
177
|
+
end
|
178
|
+
|
179
|
+
def restart?
|
180
|
+
if blown_policy?
|
181
|
+
raise BlownRestartPolicyError,
|
182
|
+
"Child #{self.id} has restarted more than #{@restart_policy[:max]} times in #{@restart_policy[:period]} seconds."
|
183
|
+
end
|
184
|
+
|
185
|
+
!!(@restart == :always || (@restart == :on_failure && termination_exception))
|
186
|
+
end
|
187
|
+
|
188
|
+
def unsafe_instance
|
189
|
+
unless @access == :unsafe
|
190
|
+
raise Ultravisor::ThreadSafetyError,
|
191
|
+
"#unsafe_instance called on a child not declared with access: :unsafe"
|
192
|
+
end
|
193
|
+
|
194
|
+
current_instance
|
195
|
+
end
|
196
|
+
|
197
|
+
def cast
|
198
|
+
unless castcall_enabled?
|
199
|
+
raise NoMethodError,
|
200
|
+
"undefined method `cast' for #{self}"
|
201
|
+
end
|
202
|
+
|
203
|
+
CastReceiver.new do |castback|
|
204
|
+
@spawn_m.synchronize do
|
205
|
+
while @instance.nil?
|
206
|
+
#:nocov:
|
207
|
+
@spawn_cv.wait(@spawn_m)
|
208
|
+
#:nocov:
|
209
|
+
end
|
210
|
+
|
211
|
+
unless @instance.respond_to? castback.method_name
|
212
|
+
raise NoMethodError,
|
213
|
+
"undefined method `#{castback.method_name}' for #{@instance}"
|
214
|
+
end
|
215
|
+
|
216
|
+
begin
|
217
|
+
@instance.instance_variable_get(:@ultravisor_child_castcall_queue) << castback
|
218
|
+
rescue ClosedQueueError
|
219
|
+
# casts aren't guaranteed to ever execute, so dropping it
|
220
|
+
# when the instance's queue has closed is perfectly valid
|
221
|
+
end
|
222
|
+
|
223
|
+
@castcall_fd_writer.putc "!"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def call
|
229
|
+
unless castcall_enabled?
|
230
|
+
raise NoMethodError,
|
231
|
+
"undefined method `call' for #{self}"
|
232
|
+
end
|
233
|
+
|
234
|
+
CallReceiver.new do |callback|
|
235
|
+
@spawn_m.synchronize do
|
236
|
+
while @instance.nil?
|
237
|
+
#:nocov:
|
238
|
+
@spawn_cv.wait(@spawn_m)
|
239
|
+
#:nocov:
|
240
|
+
end
|
241
|
+
|
242
|
+
unless @instance.respond_to? callback.method_name
|
243
|
+
raise NoMethodError,
|
244
|
+
"undefined method `#{callback.method_name}' for #{@instance}"
|
245
|
+
end
|
246
|
+
|
247
|
+
begin
|
248
|
+
@instance.instance_variable_get(:@ultravisor_child_castcall_queue) << callback
|
249
|
+
rescue ClosedQueueError
|
250
|
+
raise ChildRestartedError
|
251
|
+
end
|
252
|
+
|
253
|
+
@castcall_fd_writer.putc "!"
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
private
|
259
|
+
|
260
|
+
def validate_kam
|
261
|
+
if @klass.instance_method(:initialize).arity == 0 && @args != []
|
262
|
+
raise InvalidKAMError,
|
263
|
+
"#{@klass.to_s}.new takes no arguments, but args not empty."
|
264
|
+
end
|
265
|
+
|
266
|
+
begin
|
267
|
+
if @klass.instance_method(@method).arity != 0
|
268
|
+
raise InvalidKAMError,
|
269
|
+
"#{@klass.to_s}##{@method} must not take arguments"
|
270
|
+
end
|
271
|
+
rescue NameError
|
272
|
+
raise InvalidKAMError,
|
273
|
+
"#{@klass.to_s} has no instance method #{@method}"
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def validate_restart
|
278
|
+
unless %i{never on_failure always}.include?(@restart)
|
279
|
+
raise ArgumentError,
|
280
|
+
"Invalid value for restart: #{@restart.inspect}"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def validate_restart_policy
|
285
|
+
unless @restart_policy.is_a?(Hash)
|
286
|
+
raise ArgumentError,
|
287
|
+
"restart_policy must be a hash (got #{@restart_policy.inspect})"
|
288
|
+
end
|
289
|
+
|
290
|
+
bad_keys = @restart_policy.keys - %i{period max delay}
|
291
|
+
unless bad_keys.empty?
|
292
|
+
raise ArgumentError,
|
293
|
+
"Invalid key(s) in restart_policy: #{bad_keys.inspect}"
|
294
|
+
end
|
295
|
+
|
296
|
+
# Restore any missing defaults
|
297
|
+
@restart_policy = { period: 5, max: 3, delay: 1 }.merge(@restart_policy)
|
298
|
+
|
299
|
+
unless @restart_policy[:period].is_a?(Numeric) && @restart_policy[:period].positive?
|
300
|
+
raise ArgumentError,
|
301
|
+
"Invalid restart_policy period #{@restart_policy[:period].inspect}: must be positive integer"
|
302
|
+
end
|
303
|
+
|
304
|
+
unless @restart_policy[:max].is_a?(Numeric) && !@restart_policy[:max].negative?
|
305
|
+
raise ArgumentError,
|
306
|
+
"Invalid restart_policy max #{@restart_policy[:period].inspect}: must be non-negative integer"
|
307
|
+
end
|
308
|
+
|
309
|
+
case @restart_policy[:delay]
|
310
|
+
when Numeric
|
311
|
+
if @restart_policy[:delay].negative?
|
312
|
+
raise ArgumentError,
|
313
|
+
"Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
|
314
|
+
end
|
315
|
+
when Range
|
316
|
+
if @restart_policy[:delay].first >= @restart_policy[:delay].last
|
317
|
+
raise ArgumentError,
|
318
|
+
"Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or increasing range"
|
319
|
+
end
|
320
|
+
|
321
|
+
if @restart_policy[:delay].first.negative?
|
322
|
+
raise ArgumentError,
|
323
|
+
"Invalid restart_policy delay #{@restart_policy[:delay].inspect}: range must not be negative"
|
324
|
+
end
|
325
|
+
else
|
326
|
+
raise ArgumentError,
|
327
|
+
"Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def validate_shutdown_spec
|
332
|
+
unless @shutdown_spec.is_a?(Hash)
|
333
|
+
raise ArgumentError,
|
334
|
+
"shutdown must be a hash (got #{@shutdown_spec.inspect})"
|
335
|
+
end
|
336
|
+
|
337
|
+
bad_keys = @shutdown_spec.keys - %i{method timeout}
|
338
|
+
unless bad_keys.empty?
|
339
|
+
raise ArgumentError,
|
340
|
+
"Invalid key(s) in shutdown specification: #{bad_keys.inspect}"
|
341
|
+
end
|
342
|
+
|
343
|
+
# Restore any missing defaults
|
344
|
+
@shutdown_spec = { method: nil, timeout: 1 }.merge(@shutdown_spec)
|
345
|
+
|
346
|
+
if @shutdown_spec[:method]
|
347
|
+
begin
|
348
|
+
unless @klass.instance_method(@shutdown_spec[:method]).arity == 0
|
349
|
+
raise ArgumentError,
|
350
|
+
"Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} must not take any arguments"
|
351
|
+
end
|
352
|
+
rescue NameError
|
353
|
+
raise ArgumentError,
|
354
|
+
"Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} is not defined"
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
unless @shutdown_spec[:timeout].is_a?(Numeric) && !@shutdown_spec[:timeout].negative?
|
359
|
+
raise ArgumentError,
|
360
|
+
"Invalid shutdown timeout #{@shutdown_spec[:timeout].inspect}: must be non-negative integer"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def validate_access
|
365
|
+
return if @access.nil?
|
366
|
+
|
367
|
+
unless %i{unsafe}.include? @access
|
368
|
+
raise ArgumentError,
|
369
|
+
"Invalid instance access specification: #{@access.inspect}"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def castcall_enabled?
|
374
|
+
!!@enable_castcall
|
375
|
+
end
|
376
|
+
|
377
|
+
def new_instance
|
378
|
+
# If there is anything that pisses me off about Ruby's varargs handling more
|
379
|
+
# than the fact that *[] is an empty array, and not a zero-length argument
|
380
|
+
# list, I don't know what it is. Everything else works *so well*, and this...
|
381
|
+
# urgh.
|
382
|
+
if @klass.instance_method(:initialize).arity == 0
|
383
|
+
@klass.new()
|
384
|
+
else
|
385
|
+
@klass.new(*@args)
|
386
|
+
end.tap do |i|
|
387
|
+
if castcall_enabled?
|
388
|
+
i.singleton_class.prepend(Ultravisor::Child::ProcessCastCall)
|
389
|
+
i.instance_variable_set(:@ultravisor_child_castcall_queue, Queue.new)
|
390
|
+
|
391
|
+
r, @castcall_fd_writer = IO.pipe
|
392
|
+
i.instance_variable_set(:@ultravisor_child_castcall_fd, r)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
def current_instance
|
398
|
+
@spawn_m.synchronize do
|
399
|
+
while @instance.nil?
|
400
|
+
@spawn_cv.wait(@spawn_m)
|
401
|
+
end
|
402
|
+
|
403
|
+
return @instance
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def blown_policy?
|
408
|
+
cumulative_runtime = 0
|
409
|
+
# This starts at 1 because we only check this during a restart, so
|
410
|
+
# by definition there must have been at least one recent restart
|
411
|
+
recent_restart_count = 1
|
412
|
+
|
413
|
+
@runtime_history.each do |t|
|
414
|
+
cumulative_runtime += t
|
415
|
+
|
416
|
+
if cumulative_runtime < @restart_policy[:period]
|
417
|
+
recent_restart_count += 1
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
logger.debug(logloc) { "@runtime_history: #{@runtime_history.inspect}, cumulative_runtime: #{cumulative_runtime}, recent_restart_count: #{recent_restart_count}, restart_policy: #{@restart_policy.inspect}" }
|
422
|
+
|
423
|
+
if recent_restart_count > @restart_policy[:max]
|
424
|
+
return true
|
425
|
+
end
|
426
|
+
|
427
|
+
@runtime_history = @runtime_history[0..recent_restart_count]
|
428
|
+
|
429
|
+
false
|
430
|
+
end
|
431
|
+
|
432
|
+
def termination_cleanup(term_queue = nil)
|
433
|
+
unless @spawn_m.owned?
|
434
|
+
#:nocov:
|
435
|
+
raise ThreadSafetyError,
|
436
|
+
"termination_cleanup must be called while holding the @spawn_m lock"
|
437
|
+
#:nocov:
|
438
|
+
end
|
439
|
+
|
440
|
+
if @start_time
|
441
|
+
@runtime_history.unshift(Time.now.to_f - @start_time.to_f)
|
442
|
+
@start_time = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
term_queue << self if term_queue && !@shutting_down
|
446
|
+
|
447
|
+
if castcall_enabled?
|
448
|
+
cc_q = @instance.instance_variable_get(:@ultravisor_child_castcall_queue)
|
449
|
+
cc_q.close
|
450
|
+
x = 0
|
451
|
+
begin
|
452
|
+
loop do
|
453
|
+
cc_q.pop(true).child_restarted!
|
454
|
+
end
|
455
|
+
rescue ThreadError => ex
|
456
|
+
raise unless ex.message == "queue empty"
|
457
|
+
end
|
458
|
+
|
459
|
+
@instance.instance_variable_get(:@ultravisor_child_castcall_fd).close
|
460
|
+
@instance.instance_variable_set(:@ultravisor_child_castcall_fd, nil)
|
461
|
+
@castcall_fd_writer.close
|
462
|
+
@castcall_fd_writer = nil
|
463
|
+
end
|
464
|
+
|
465
|
+
@instance = nil
|
466
|
+
|
467
|
+
if @thread
|
468
|
+
@thread = nil
|
469
|
+
@spawn_cv.broadcast
|
470
|
+
end
|
471
|
+
|
472
|
+
@spawn_id = nil
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
require_relative "./child/call"
|
478
|
+
require_relative "./child/call_receiver"
|
479
|
+
require_relative "./child/cast"
|
480
|
+
require_relative "./child/cast_receiver"
|
481
|
+
require_relative "./child/process_cast_call"
|