ultravisor 0.0.0.3.g8cf10dc

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,481 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "./logging_helpers"
4
+
5
+ class Ultravisor
6
+ class Child
7
+ include LoggingHelpers
8
+
9
+ attr_reader :id
10
+
11
+ def initialize(id:,
12
+ klass:,
13
+ args: [],
14
+ method:,
15
+ restart: :always,
16
+ restart_policy: {
17
+ period: 5,
18
+ max: 3,
19
+ delay: 1,
20
+ },
21
+ shutdown: {
22
+ method: nil,
23
+ timeout: 1,
24
+ },
25
+ logger: Logger.new("/dev/null"),
26
+ enable_castcall: false,
27
+ access: nil
28
+ )
29
+
30
+ @logger = logger
31
+ @id = id
32
+
33
+ @klass, @args, @method = klass, args, method
34
+ validate_kam
35
+
36
+ @restart = restart
37
+ validate_restart
38
+
39
+ @restart_policy = restart_policy
40
+ validate_restart_policy
41
+
42
+ @shutdown_spec = shutdown
43
+ validate_shutdown_spec
44
+
45
+ @access = access
46
+ validate_access
47
+
48
+ @enable_castcall = enable_castcall
49
+
50
+ @runtime_history = []
51
+
52
+ @spawn_m = Mutex.new
53
+ @spawn_cv = ConditionVariable.new
54
+
55
+ @shutdown_m = Mutex.new
56
+ end
57
+
58
+ def spawn(term_queue)
59
+ @spawn_m.synchronize do
60
+ @value = nil
61
+ @exception = nil
62
+ @start_time = Time.now
63
+ @instance = new_instance
64
+
65
+ @spawn_id = sid = rand
66
+
67
+ Thread.handle_interrupt(::Exception => :never, ::Numeric => :never) do
68
+ @thread = Thread.new do
69
+ Thread.current.name = "Ultravisor::Child(#{@id})"
70
+ logger.debug(logloc) { "Spawning new instance of #{@id}" }
71
+
72
+ begin
73
+ Thread.handle_interrupt(::Exception => :immediate, ::Numeric => :immediate) do
74
+ logger.debug(logloc) { "Calling #{@klass}##{@method} to start #{@id} running" }
75
+ @value = @instance.public_send(@method)
76
+ end
77
+ rescue Exception => ex
78
+ @exception = ex
79
+ ensure
80
+ @spawn_m.synchronize do
81
+ # Even if a thread gets whacked by Thread#kill, ensure blocks
82
+ # still get run. This is... wonderful! And terrifying!
83
+
84
+ termination_cleanup(term_queue) if @spawn_id == sid
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ @spawn_cv.broadcast
91
+ end
92
+
93
+ self
94
+ end
95
+
96
+ def shutdown(force: false)
97
+ @shutdown_m.synchronize do
98
+ th = nil
99
+ sid = nil
100
+
101
+ @spawn_m.synchronize do
102
+ return if @thread.nil? || @thread == Thread.current
103
+
104
+ # Take a reference to the running thread, so we don't need to
105
+ # keep acquiring spawn_m every time we want to do something
106
+ # with it -- which causes collisions when it comes time to
107
+ # wait on the terminating thread, which is itself is trying
108
+ # to acquire the same lock so it can cleanup.
109
+ th = @thread
110
+ sid = @spawn_id
111
+
112
+ # Let everyone know we're in shutdown mode
113
+ @shutting_down = true
114
+ end
115
+
116
+ if @shutdown_spec[:method] && !force
117
+ begin
118
+ @instance.public_send(@shutdown_spec[:method])
119
+ rescue Exception => ex
120
+ log_exception(ex) { "Unhandled exception when calling #{@shutdown_spec[:method].inspect} on child #{id}" }
121
+ th.kill
122
+ end
123
+ else
124
+ th.kill
125
+ end
126
+
127
+ unless th.join(@shutdown_spec[:timeout])
128
+ logger.info(logloc) { "Child instance for #{self.id} did not cleanly shutdown within #{@shutdown_spec[:timeout]} seconds; force-killing the thread" }
129
+ th.kill
130
+ end
131
+
132
+ # Last chance, bubs
133
+ unless th.join(0.1)
134
+ logger.error(logloc) { "Child thread for #{self.id} appears hung; abandoning thread #{th}" }
135
+
136
+ # If we get here, then the worker instance has seized up spectacularly,
137
+ # and the cleanup in the `spawn` ensure hasn't triggered, so we need
138
+ # to do the cleanup instead.
139
+ @spawn_m.synchronize do
140
+ termination_cleanup if @spawn_id == sid
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ def wait
147
+ @spawn_m.synchronize do
148
+ @spawn_cv.wait(@spawn_m) while @thread
149
+ end
150
+ end
151
+
152
+ def termination_exception
153
+ @spawn_m.synchronize do
154
+ @spawn_cv.wait(@spawn_m) while @thread
155
+ @exception
156
+ end
157
+ end
158
+
159
+ def termination_value
160
+ @spawn_m.synchronize do
161
+ @spawn_cv.wait(@spawn_m) while @thread
162
+ @value
163
+ end
164
+ end
165
+
166
+ def restart_delay
167
+ d = begin
168
+ case @restart_policy[:delay]
169
+ when Numeric
170
+ @restart_policy[:delay]
171
+ when Range
172
+ @restart_policy[:delay].first + (@restart_policy[:delay].last - @restart_policy[:delay].first) * rand
173
+ end
174
+ end
175
+
176
+ [0, d].max
177
+ end
178
+
179
+ def restart?
180
+ if blown_policy?
181
+ raise BlownRestartPolicyError,
182
+ "Child #{self.id} has restarted more than #{@restart_policy[:max]} times in #{@restart_policy[:period]} seconds."
183
+ end
184
+
185
+ !!(@restart == :always || (@restart == :on_failure && termination_exception))
186
+ end
187
+
188
+ def unsafe_instance
189
+ unless @access == :unsafe
190
+ raise Ultravisor::ThreadSafetyError,
191
+ "#unsafe_instance called on a child not declared with access: :unsafe"
192
+ end
193
+
194
+ current_instance
195
+ end
196
+
197
+ def cast
198
+ unless castcall_enabled?
199
+ raise NoMethodError,
200
+ "undefined method `cast' for #{self}"
201
+ end
202
+
203
+ CastReceiver.new do |castback|
204
+ @spawn_m.synchronize do
205
+ while @instance.nil?
206
+ #:nocov:
207
+ @spawn_cv.wait(@spawn_m)
208
+ #:nocov:
209
+ end
210
+
211
+ unless @instance.respond_to? castback.method_name
212
+ raise NoMethodError,
213
+ "undefined method `#{castback.method_name}' for #{@instance}"
214
+ end
215
+
216
+ begin
217
+ @instance.instance_variable_get(:@ultravisor_child_castcall_queue) << castback
218
+ rescue ClosedQueueError
219
+ # casts aren't guaranteed to ever execute, so dropping it
220
+ # when the instance's queue has closed is perfectly valid
221
+ end
222
+
223
+ @castcall_fd_writer.putc "!"
224
+ end
225
+ end
226
+ end
227
+
228
+ def call
229
+ unless castcall_enabled?
230
+ raise NoMethodError,
231
+ "undefined method `call' for #{self}"
232
+ end
233
+
234
+ CallReceiver.new do |callback|
235
+ @spawn_m.synchronize do
236
+ while @instance.nil?
237
+ #:nocov:
238
+ @spawn_cv.wait(@spawn_m)
239
+ #:nocov:
240
+ end
241
+
242
+ unless @instance.respond_to? callback.method_name
243
+ raise NoMethodError,
244
+ "undefined method `#{callback.method_name}' for #{@instance}"
245
+ end
246
+
247
+ begin
248
+ @instance.instance_variable_get(:@ultravisor_child_castcall_queue) << callback
249
+ rescue ClosedQueueError
250
+ raise ChildRestartedError
251
+ end
252
+
253
+ @castcall_fd_writer.putc "!"
254
+ end
255
+ end
256
+ end
257
+
258
+ private
259
+
260
+ def validate_kam
261
+ if @klass.instance_method(:initialize).arity == 0 && @args != []
262
+ raise InvalidKAMError,
263
+ "#{@klass.to_s}.new takes no arguments, but args not empty."
264
+ end
265
+
266
+ begin
267
+ if @klass.instance_method(@method).arity != 0
268
+ raise InvalidKAMError,
269
+ "#{@klass.to_s}##{@method} must not take arguments"
270
+ end
271
+ rescue NameError
272
+ raise InvalidKAMError,
273
+ "#{@klass.to_s} has no instance method #{@method}"
274
+ end
275
+ end
276
+
277
+ def validate_restart
278
+ unless %i{never on_failure always}.include?(@restart)
279
+ raise ArgumentError,
280
+ "Invalid value for restart: #{@restart.inspect}"
281
+ end
282
+ end
283
+
284
+ def validate_restart_policy
285
+ unless @restart_policy.is_a?(Hash)
286
+ raise ArgumentError,
287
+ "restart_policy must be a hash (got #{@restart_policy.inspect})"
288
+ end
289
+
290
+ bad_keys = @restart_policy.keys - %i{period max delay}
291
+ unless bad_keys.empty?
292
+ raise ArgumentError,
293
+ "Invalid key(s) in restart_policy: #{bad_keys.inspect}"
294
+ end
295
+
296
+ # Restore any missing defaults
297
+ @restart_policy = { period: 5, max: 3, delay: 1 }.merge(@restart_policy)
298
+
299
+ unless @restart_policy[:period].is_a?(Numeric) && @restart_policy[:period].positive?
300
+ raise ArgumentError,
301
+ "Invalid restart_policy period #{@restart_policy[:period].inspect}: must be positive integer"
302
+ end
303
+
304
+ unless @restart_policy[:max].is_a?(Numeric) && !@restart_policy[:max].negative?
305
+ raise ArgumentError,
306
+ "Invalid restart_policy max #{@restart_policy[:period].inspect}: must be non-negative integer"
307
+ end
308
+
309
+ case @restart_policy[:delay]
310
+ when Numeric
311
+ if @restart_policy[:delay].negative?
312
+ raise ArgumentError,
313
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
314
+ end
315
+ when Range
316
+ if @restart_policy[:delay].first >= @restart_policy[:delay].last
317
+ raise ArgumentError,
318
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or increasing range"
319
+ end
320
+
321
+ if @restart_policy[:delay].first.negative?
322
+ raise ArgumentError,
323
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: range must not be negative"
324
+ end
325
+ else
326
+ raise ArgumentError,
327
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
328
+ end
329
+ end
330
+
331
+ def validate_shutdown_spec
332
+ unless @shutdown_spec.is_a?(Hash)
333
+ raise ArgumentError,
334
+ "shutdown must be a hash (got #{@shutdown_spec.inspect})"
335
+ end
336
+
337
+ bad_keys = @shutdown_spec.keys - %i{method timeout}
338
+ unless bad_keys.empty?
339
+ raise ArgumentError,
340
+ "Invalid key(s) in shutdown specification: #{bad_keys.inspect}"
341
+ end
342
+
343
+ # Restore any missing defaults
344
+ @shutdown_spec = { method: nil, timeout: 1 }.merge(@shutdown_spec)
345
+
346
+ if @shutdown_spec[:method]
347
+ begin
348
+ unless @klass.instance_method(@shutdown_spec[:method]).arity == 0
349
+ raise ArgumentError,
350
+ "Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} must not take any arguments"
351
+ end
352
+ rescue NameError
353
+ raise ArgumentError,
354
+ "Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} is not defined"
355
+ end
356
+ end
357
+
358
+ unless @shutdown_spec[:timeout].is_a?(Numeric) && !@shutdown_spec[:timeout].negative?
359
+ raise ArgumentError,
360
+ "Invalid shutdown timeout #{@shutdown_spec[:timeout].inspect}: must be non-negative integer"
361
+ end
362
+ end
363
+
364
+ def validate_access
365
+ return if @access.nil?
366
+
367
+ unless %i{unsafe}.include? @access
368
+ raise ArgumentError,
369
+ "Invalid instance access specification: #{@access.inspect}"
370
+ end
371
+ end
372
+
373
+ def castcall_enabled?
374
+ !!@enable_castcall
375
+ end
376
+
377
+ def new_instance
378
+ # If there is anything that pisses me off about Ruby's varargs handling more
379
+ # than the fact that *[] is an empty array, and not a zero-length argument
380
+ # list, I don't know what it is. Everything else works *so well*, and this...
381
+ # urgh.
382
+ if @klass.instance_method(:initialize).arity == 0
383
+ @klass.new()
384
+ else
385
+ @klass.new(*@args)
386
+ end.tap do |i|
387
+ if castcall_enabled?
388
+ i.singleton_class.prepend(Ultravisor::Child::ProcessCastCall)
389
+ i.instance_variable_set(:@ultravisor_child_castcall_queue, Queue.new)
390
+
391
+ r, @castcall_fd_writer = IO.pipe
392
+ i.instance_variable_set(:@ultravisor_child_castcall_fd, r)
393
+ end
394
+ end
395
+ end
396
+
397
+ def current_instance
398
+ @spawn_m.synchronize do
399
+ while @instance.nil?
400
+ @spawn_cv.wait(@spawn_m)
401
+ end
402
+
403
+ return @instance
404
+ end
405
+ end
406
+
407
+ def blown_policy?
408
+ cumulative_runtime = 0
409
+ # This starts at 1 because we only check this during a restart, so
410
+ # by definition there must have been at least one recent restart
411
+ recent_restart_count = 1
412
+
413
+ @runtime_history.each do |t|
414
+ cumulative_runtime += t
415
+
416
+ if cumulative_runtime < @restart_policy[:period]
417
+ recent_restart_count += 1
418
+ end
419
+ end
420
+
421
+ logger.debug(logloc) { "@runtime_history: #{@runtime_history.inspect}, cumulative_runtime: #{cumulative_runtime}, recent_restart_count: #{recent_restart_count}, restart_policy: #{@restart_policy.inspect}" }
422
+
423
+ if recent_restart_count > @restart_policy[:max]
424
+ return true
425
+ end
426
+
427
+ @runtime_history = @runtime_history[0..recent_restart_count]
428
+
429
+ false
430
+ end
431
+
432
+ def termination_cleanup(term_queue = nil)
433
+ unless @spawn_m.owned?
434
+ #:nocov:
435
+ raise ThreadSafetyError,
436
+ "termination_cleanup must be called while holding the @spawn_m lock"
437
+ #:nocov:
438
+ end
439
+
440
+ if @start_time
441
+ @runtime_history.unshift(Time.now.to_f - @start_time.to_f)
442
+ @start_time = nil
443
+ end
444
+
445
+ term_queue << self if term_queue && !@shutting_down
446
+
447
+ if castcall_enabled?
448
+ cc_q = @instance.instance_variable_get(:@ultravisor_child_castcall_queue)
449
+ cc_q.close
450
+ x = 0
451
+ begin
452
+ loop do
453
+ cc_q.pop(true).child_restarted!
454
+ end
455
+ rescue ThreadError => ex
456
+ raise unless ex.message == "queue empty"
457
+ end
458
+
459
+ @instance.instance_variable_get(:@ultravisor_child_castcall_fd).close
460
+ @instance.instance_variable_set(:@ultravisor_child_castcall_fd, nil)
461
+ @castcall_fd_writer.close
462
+ @castcall_fd_writer = nil
463
+ end
464
+
465
+ @instance = nil
466
+
467
+ if @thread
468
+ @thread = nil
469
+ @spawn_cv.broadcast
470
+ end
471
+
472
+ @spawn_id = nil
473
+ end
474
+ end
475
+ end
476
+
477
+ require_relative "./child/call"
478
+ require_relative "./child/call_receiver"
479
+ require_relative "./child/cast"
480
+ require_relative "./child/cast_receiver"
481
+ require_relative "./child/process_cast_call"