ultravisor 0.0.0.3.g8cf10dc

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "./logging_helpers"
4
+
5
+ class Ultravisor
6
+ class Child
7
+ include LoggingHelpers
8
+
9
+ attr_reader :id
10
+
11
+ def initialize(id:,
12
+ klass:,
13
+ args: [],
14
+ method:,
15
+ restart: :always,
16
+ restart_policy: {
17
+ period: 5,
18
+ max: 3,
19
+ delay: 1,
20
+ },
21
+ shutdown: {
22
+ method: nil,
23
+ timeout: 1,
24
+ },
25
+ logger: Logger.new("/dev/null"),
26
+ enable_castcall: false,
27
+ access: nil
28
+ )
29
+
30
+ @logger = logger
31
+ @id = id
32
+
33
+ @klass, @args, @method = klass, args, method
34
+ validate_kam
35
+
36
+ @restart = restart
37
+ validate_restart
38
+
39
+ @restart_policy = restart_policy
40
+ validate_restart_policy
41
+
42
+ @shutdown_spec = shutdown
43
+ validate_shutdown_spec
44
+
45
+ @access = access
46
+ validate_access
47
+
48
+ @enable_castcall = enable_castcall
49
+
50
+ @runtime_history = []
51
+
52
+ @spawn_m = Mutex.new
53
+ @spawn_cv = ConditionVariable.new
54
+
55
+ @shutdown_m = Mutex.new
56
+ end
57
+
58
+ def spawn(term_queue)
59
+ @spawn_m.synchronize do
60
+ @value = nil
61
+ @exception = nil
62
+ @start_time = Time.now
63
+ @instance = new_instance
64
+
65
+ @spawn_id = sid = rand
66
+
67
+ Thread.handle_interrupt(::Exception => :never, ::Numeric => :never) do
68
+ @thread = Thread.new do
69
+ Thread.current.name = "Ultravisor::Child(#{@id})"
70
+ logger.debug(logloc) { "Spawning new instance of #{@id}" }
71
+
72
+ begin
73
+ Thread.handle_interrupt(::Exception => :immediate, ::Numeric => :immediate) do
74
+ logger.debug(logloc) { "Calling #{@klass}##{@method} to start #{@id} running" }
75
+ @value = @instance.public_send(@method)
76
+ end
77
+ rescue Exception => ex
78
+ @exception = ex
79
+ ensure
80
+ @spawn_m.synchronize do
81
+ # Even if a thread gets whacked by Thread#kill, ensure blocks
82
+ # still get run. This is... wonderful! And terrifying!
83
+
84
+ termination_cleanup(term_queue) if @spawn_id == sid
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ @spawn_cv.broadcast
91
+ end
92
+
93
+ self
94
+ end
95
+
96
+ def shutdown(force: false)
97
+ @shutdown_m.synchronize do
98
+ th = nil
99
+ sid = nil
100
+
101
+ @spawn_m.synchronize do
102
+ return if @thread.nil? || @thread == Thread.current
103
+
104
+ # Take a reference to the running thread, so we don't need to
105
+ # keep acquiring spawn_m every time we want to do something
106
+ # with it -- which causes collisions when it comes time to
107
+ # wait on the terminating thread, which is itself is trying
108
+ # to acquire the same lock so it can cleanup.
109
+ th = @thread
110
+ sid = @spawn_id
111
+
112
+ # Let everyone know we're in shutdown mode
113
+ @shutting_down = true
114
+ end
115
+
116
+ if @shutdown_spec[:method] && !force
117
+ begin
118
+ @instance.public_send(@shutdown_spec[:method])
119
+ rescue Exception => ex
120
+ log_exception(ex) { "Unhandled exception when calling #{@shutdown_spec[:method].inspect} on child #{id}" }
121
+ th.kill
122
+ end
123
+ else
124
+ th.kill
125
+ end
126
+
127
+ unless th.join(@shutdown_spec[:timeout])
128
+ logger.info(logloc) { "Child instance for #{self.id} did not cleanly shutdown within #{@shutdown_spec[:timeout]} seconds; force-killing the thread" }
129
+ th.kill
130
+ end
131
+
132
+ # Last chance, bubs
133
+ unless th.join(0.1)
134
+ logger.error(logloc) { "Child thread for #{self.id} appears hung; abandoning thread #{th}" }
135
+
136
+ # If we get here, then the worker instance has seized up spectacularly,
137
+ # and the cleanup in the `spawn` ensure hasn't triggered, so we need
138
+ # to do the cleanup instead.
139
+ @spawn_m.synchronize do
140
+ termination_cleanup if @spawn_id == sid
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ def wait
147
+ @spawn_m.synchronize do
148
+ @spawn_cv.wait(@spawn_m) while @thread
149
+ end
150
+ end
151
+
152
+ def termination_exception
153
+ @spawn_m.synchronize do
154
+ @spawn_cv.wait(@spawn_m) while @thread
155
+ @exception
156
+ end
157
+ end
158
+
159
+ def termination_value
160
+ @spawn_m.synchronize do
161
+ @spawn_cv.wait(@spawn_m) while @thread
162
+ @value
163
+ end
164
+ end
165
+
166
+ def restart_delay
167
+ d = begin
168
+ case @restart_policy[:delay]
169
+ when Numeric
170
+ @restart_policy[:delay]
171
+ when Range
172
+ @restart_policy[:delay].first + (@restart_policy[:delay].last - @restart_policy[:delay].first) * rand
173
+ end
174
+ end
175
+
176
+ [0, d].max
177
+ end
178
+
179
+ def restart?
180
+ if blown_policy?
181
+ raise BlownRestartPolicyError,
182
+ "Child #{self.id} has restarted more than #{@restart_policy[:max]} times in #{@restart_policy[:period]} seconds."
183
+ end
184
+
185
+ !!(@restart == :always || (@restart == :on_failure && termination_exception))
186
+ end
187
+
188
+ def unsafe_instance
189
+ unless @access == :unsafe
190
+ raise Ultravisor::ThreadSafetyError,
191
+ "#unsafe_instance called on a child not declared with access: :unsafe"
192
+ end
193
+
194
+ current_instance
195
+ end
196
+
197
+ def cast
198
+ unless castcall_enabled?
199
+ raise NoMethodError,
200
+ "undefined method `cast' for #{self}"
201
+ end
202
+
203
+ CastReceiver.new do |castback|
204
+ @spawn_m.synchronize do
205
+ while @instance.nil?
206
+ #:nocov:
207
+ @spawn_cv.wait(@spawn_m)
208
+ #:nocov:
209
+ end
210
+
211
+ unless @instance.respond_to? castback.method_name
212
+ raise NoMethodError,
213
+ "undefined method `#{castback.method_name}' for #{@instance}"
214
+ end
215
+
216
+ begin
217
+ @instance.instance_variable_get(:@ultravisor_child_castcall_queue) << castback
218
+ rescue ClosedQueueError
219
+ # casts aren't guaranteed to ever execute, so dropping it
220
+ # when the instance's queue has closed is perfectly valid
221
+ end
222
+
223
+ @castcall_fd_writer.putc "!"
224
+ end
225
+ end
226
+ end
227
+
228
+ def call
229
+ unless castcall_enabled?
230
+ raise NoMethodError,
231
+ "undefined method `call' for #{self}"
232
+ end
233
+
234
+ CallReceiver.new do |callback|
235
+ @spawn_m.synchronize do
236
+ while @instance.nil?
237
+ #:nocov:
238
+ @spawn_cv.wait(@spawn_m)
239
+ #:nocov:
240
+ end
241
+
242
+ unless @instance.respond_to? callback.method_name
243
+ raise NoMethodError,
244
+ "undefined method `#{callback.method_name}' for #{@instance}"
245
+ end
246
+
247
+ begin
248
+ @instance.instance_variable_get(:@ultravisor_child_castcall_queue) << callback
249
+ rescue ClosedQueueError
250
+ raise ChildRestartedError
251
+ end
252
+
253
+ @castcall_fd_writer.putc "!"
254
+ end
255
+ end
256
+ end
257
+
258
+ private
259
+
260
+ def validate_kam
261
+ if @klass.instance_method(:initialize).arity == 0 && @args != []
262
+ raise InvalidKAMError,
263
+ "#{@klass.to_s}.new takes no arguments, but args not empty."
264
+ end
265
+
266
+ begin
267
+ if @klass.instance_method(@method).arity != 0
268
+ raise InvalidKAMError,
269
+ "#{@klass.to_s}##{@method} must not take arguments"
270
+ end
271
+ rescue NameError
272
+ raise InvalidKAMError,
273
+ "#{@klass.to_s} has no instance method #{@method}"
274
+ end
275
+ end
276
+
277
+ def validate_restart
278
+ unless %i{never on_failure always}.include?(@restart)
279
+ raise ArgumentError,
280
+ "Invalid value for restart: #{@restart.inspect}"
281
+ end
282
+ end
283
+
284
+ def validate_restart_policy
285
+ unless @restart_policy.is_a?(Hash)
286
+ raise ArgumentError,
287
+ "restart_policy must be a hash (got #{@restart_policy.inspect})"
288
+ end
289
+
290
+ bad_keys = @restart_policy.keys - %i{period max delay}
291
+ unless bad_keys.empty?
292
+ raise ArgumentError,
293
+ "Invalid key(s) in restart_policy: #{bad_keys.inspect}"
294
+ end
295
+
296
+ # Restore any missing defaults
297
+ @restart_policy = { period: 5, max: 3, delay: 1 }.merge(@restart_policy)
298
+
299
+ unless @restart_policy[:period].is_a?(Numeric) && @restart_policy[:period].positive?
300
+ raise ArgumentError,
301
+ "Invalid restart_policy period #{@restart_policy[:period].inspect}: must be positive integer"
302
+ end
303
+
304
+ unless @restart_policy[:max].is_a?(Numeric) && !@restart_policy[:max].negative?
305
+ raise ArgumentError,
306
+ "Invalid restart_policy max #{@restart_policy[:period].inspect}: must be non-negative integer"
307
+ end
308
+
309
+ case @restart_policy[:delay]
310
+ when Numeric
311
+ if @restart_policy[:delay].negative?
312
+ raise ArgumentError,
313
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
314
+ end
315
+ when Range
316
+ if @restart_policy[:delay].first >= @restart_policy[:delay].last
317
+ raise ArgumentError,
318
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or increasing range"
319
+ end
320
+
321
+ if @restart_policy[:delay].first.negative?
322
+ raise ArgumentError,
323
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: range must not be negative"
324
+ end
325
+ else
326
+ raise ArgumentError,
327
+ "Invalid restart_policy delay #{@restart_policy[:delay].inspect}: must be non-negative integer or range"
328
+ end
329
+ end
330
+
331
+ def validate_shutdown_spec
332
+ unless @shutdown_spec.is_a?(Hash)
333
+ raise ArgumentError,
334
+ "shutdown must be a hash (got #{@shutdown_spec.inspect})"
335
+ end
336
+
337
+ bad_keys = @shutdown_spec.keys - %i{method timeout}
338
+ unless bad_keys.empty?
339
+ raise ArgumentError,
340
+ "Invalid key(s) in shutdown specification: #{bad_keys.inspect}"
341
+ end
342
+
343
+ # Restore any missing defaults
344
+ @shutdown_spec = { method: nil, timeout: 1 }.merge(@shutdown_spec)
345
+
346
+ if @shutdown_spec[:method]
347
+ begin
348
+ unless @klass.instance_method(@shutdown_spec[:method]).arity == 0
349
+ raise ArgumentError,
350
+ "Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} must not take any arguments"
351
+ end
352
+ rescue NameError
353
+ raise ArgumentError,
354
+ "Shutdown method #{@klass.to_s}##{@shutdown_spec[:method]} is not defined"
355
+ end
356
+ end
357
+
358
+ unless @shutdown_spec[:timeout].is_a?(Numeric) && !@shutdown_spec[:timeout].negative?
359
+ raise ArgumentError,
360
+ "Invalid shutdown timeout #{@shutdown_spec[:timeout].inspect}: must be non-negative integer"
361
+ end
362
+ end
363
+
364
+ def validate_access
365
+ return if @access.nil?
366
+
367
+ unless %i{unsafe}.include? @access
368
+ raise ArgumentError,
369
+ "Invalid instance access specification: #{@access.inspect}"
370
+ end
371
+ end
372
+
373
+ def castcall_enabled?
374
+ !!@enable_castcall
375
+ end
376
+
377
+ def new_instance
378
+ # If there is anything that pisses me off about Ruby's varargs handling more
379
+ # than the fact that *[] is an empty array, and not a zero-length argument
380
+ # list, I don't know what it is. Everything else works *so well*, and this...
381
+ # urgh.
382
+ if @klass.instance_method(:initialize).arity == 0
383
+ @klass.new()
384
+ else
385
+ @klass.new(*@args)
386
+ end.tap do |i|
387
+ if castcall_enabled?
388
+ i.singleton_class.prepend(Ultravisor::Child::ProcessCastCall)
389
+ i.instance_variable_set(:@ultravisor_child_castcall_queue, Queue.new)
390
+
391
+ r, @castcall_fd_writer = IO.pipe
392
+ i.instance_variable_set(:@ultravisor_child_castcall_fd, r)
393
+ end
394
+ end
395
+ end
396
+
397
+ def current_instance
398
+ @spawn_m.synchronize do
399
+ while @instance.nil?
400
+ @spawn_cv.wait(@spawn_m)
401
+ end
402
+
403
+ return @instance
404
+ end
405
+ end
406
+
407
+ def blown_policy?
408
+ cumulative_runtime = 0
409
+ # This starts at 1 because we only check this during a restart, so
410
+ # by definition there must have been at least one recent restart
411
+ recent_restart_count = 1
412
+
413
+ @runtime_history.each do |t|
414
+ cumulative_runtime += t
415
+
416
+ if cumulative_runtime < @restart_policy[:period]
417
+ recent_restart_count += 1
418
+ end
419
+ end
420
+
421
+ logger.debug(logloc) { "@runtime_history: #{@runtime_history.inspect}, cumulative_runtime: #{cumulative_runtime}, recent_restart_count: #{recent_restart_count}, restart_policy: #{@restart_policy.inspect}" }
422
+
423
+ if recent_restart_count > @restart_policy[:max]
424
+ return true
425
+ end
426
+
427
+ @runtime_history = @runtime_history[0..recent_restart_count]
428
+
429
+ false
430
+ end
431
+
432
+ def termination_cleanup(term_queue = nil)
433
+ unless @spawn_m.owned?
434
+ #:nocov:
435
+ raise ThreadSafetyError,
436
+ "termination_cleanup must be called while holding the @spawn_m lock"
437
+ #:nocov:
438
+ end
439
+
440
+ if @start_time
441
+ @runtime_history.unshift(Time.now.to_f - @start_time.to_f)
442
+ @start_time = nil
443
+ end
444
+
445
+ term_queue << self if term_queue && !@shutting_down
446
+
447
+ if castcall_enabled?
448
+ cc_q = @instance.instance_variable_get(:@ultravisor_child_castcall_queue)
449
+ cc_q.close
450
+ x = 0
451
+ begin
452
+ loop do
453
+ cc_q.pop(true).child_restarted!
454
+ end
455
+ rescue ThreadError => ex
456
+ raise unless ex.message == "queue empty"
457
+ end
458
+
459
+ @instance.instance_variable_get(:@ultravisor_child_castcall_fd).close
460
+ @instance.instance_variable_set(:@ultravisor_child_castcall_fd, nil)
461
+ @castcall_fd_writer.close
462
+ @castcall_fd_writer = nil
463
+ end
464
+
465
+ @instance = nil
466
+
467
+ if @thread
468
+ @thread = nil
469
+ @spawn_cv.broadcast
470
+ end
471
+
472
+ @spawn_id = nil
473
+ end
474
+ end
475
+ end
476
+
477
+ require_relative "./child/call"
478
+ require_relative "./child/call_receiver"
479
+ require_relative "./child/cast"
480
+ require_relative "./child/cast_receiver"
481
+ require_relative "./child/process_cast_call"