service_skeleton 0.0.0.1.ENOTAG → 0.0.0.2.g46c1e0e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +0 -2
- data/.rubocop.yml +114 -9
- data/.travis.yml +11 -0
- data/README.md +153 -279
- data/lib/service_skeleton/background_worker.rb +80 -0
- data/lib/service_skeleton/config.rb +18 -78
- data/lib/service_skeleton/config_variable.rb +8 -29
- data/lib/service_skeleton/config_variables.rb +68 -54
- data/lib/service_skeleton/error.rb +3 -5
- data/lib/service_skeleton/filtering_logger.rb +0 -2
- data/lib/service_skeleton/logging_helpers.rb +3 -10
- data/lib/service_skeleton/metrics_methods.rb +13 -28
- data/lib/service_skeleton/signal_handler.rb +183 -0
- data/lib/service_skeleton.rb +145 -22
- data/service_skeleton.gemspec +9 -10
- metadata +19 -102
- data/.editorconfig +0 -7
- data/.git-blame-ignore-revs +0 -2
- data/.github/workflows/ci.yml +0 -50
- data/lib/service_skeleton/config_class.rb +0 -16
- data/lib/service_skeleton/config_variable/boolean.rb +0 -21
- data/lib/service_skeleton/config_variable/enum.rb +0 -27
- data/lib/service_skeleton/config_variable/float.rb +0 -25
- data/lib/service_skeleton/config_variable/integer.rb +0 -25
- data/lib/service_skeleton/config_variable/kv_list.rb +0 -26
- data/lib/service_skeleton/config_variable/path_list.rb +0 -13
- data/lib/service_skeleton/config_variable/string.rb +0 -18
- data/lib/service_skeleton/config_variable/url.rb +0 -36
- data/lib/service_skeleton/config_variable/yaml_file.rb +0 -42
- data/lib/service_skeleton/generator.rb +0 -165
- data/lib/service_skeleton/metric_method_name.rb +0 -9
- data/lib/service_skeleton/runner.rb +0 -46
- data/lib/service_skeleton/service_name.rb +0 -20
- data/lib/service_skeleton/signal_manager.rb +0 -202
- data/lib/service_skeleton/signals_methods.rb +0 -15
- data/lib/service_skeleton/ultravisor_children.rb +0 -20
- data/lib/service_skeleton/ultravisor_loggerstash.rb +0 -11
- data/ultravisor/.yardopts +0 -1
- data/ultravisor/Guardfile +0 -9
- data/ultravisor/README.md +0 -404
- data/ultravisor/lib/ultravisor/child/call.rb +0 -21
- data/ultravisor/lib/ultravisor/child/call_receiver.rb +0 -14
- data/ultravisor/lib/ultravisor/child/cast.rb +0 -16
- data/ultravisor/lib/ultravisor/child/cast_receiver.rb +0 -11
- data/ultravisor/lib/ultravisor/child/process_cast_call.rb +0 -39
- data/ultravisor/lib/ultravisor/child.rb +0 -481
- data/ultravisor/lib/ultravisor/error.rb +0 -25
- data/ultravisor/lib/ultravisor/logging_helpers.rb +0 -32
- data/ultravisor/lib/ultravisor.rb +0 -216
- data/ultravisor/spec/example_group_methods.rb +0 -19
- data/ultravisor/spec/example_methods.rb +0 -8
- data/ultravisor/spec/spec_helper.rb +0 -52
- data/ultravisor/spec/ultravisor/add_child_spec.rb +0 -79
- data/ultravisor/spec/ultravisor/child/call_spec.rb +0 -121
- data/ultravisor/spec/ultravisor/child/cast_spec.rb +0 -111
- data/ultravisor/spec/ultravisor/child/id_spec.rb +0 -21
- data/ultravisor/spec/ultravisor/child/new_spec.rb +0 -152
- data/ultravisor/spec/ultravisor/child/restart_delay_spec.rb +0 -40
- data/ultravisor/spec/ultravisor/child/restart_spec.rb +0 -70
- data/ultravisor/spec/ultravisor/child/run_spec.rb +0 -95
- data/ultravisor/spec/ultravisor/child/shutdown_spec.rb +0 -124
- data/ultravisor/spec/ultravisor/child/spawn_spec.rb +0 -107
- data/ultravisor/spec/ultravisor/child/unsafe_instance_spec.rb +0 -55
- data/ultravisor/spec/ultravisor/child/wait_spec.rb +0 -32
- data/ultravisor/spec/ultravisor/new_spec.rb +0 -71
- data/ultravisor/spec/ultravisor/remove_child_spec.rb +0 -49
- data/ultravisor/spec/ultravisor/run_spec.rb +0 -334
- data/ultravisor/spec/ultravisor/shutdown_spec.rb +0 -106
@@ -1,202 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "./logging_helpers"
|
4
|
-
|
5
|
-
module ServiceSkeleton
|
6
|
-
# Manage signals in a sane and safe manner.
|
7
|
-
#
|
8
|
-
# Signal handling is a shit of a thing. The code that runs when a signal is
|
9
|
-
# triggered can't use mutexes (which are used in all sorts of places you
|
10
|
-
# might not expect, like Logger!) or anything else that might block. This
|
11
|
-
# greatly constrains what you can do inside a signal handler, so the standard
|
12
|
-
# approach is to stuff a character down a pipe, and then have the *real*
|
13
|
-
# signal handling run later.
|
14
|
-
#
|
15
|
-
# Also, there's always the (slim) possibility that something else might have
|
16
|
-
# hooked into a signal we want to receive. Because only a single signal
|
17
|
-
# handler can be active for a given signal at a time, we need to "chain" the
|
18
|
-
# existing handler, by calling the previous signal handler from our signal
|
19
|
-
# handler after we've done what we need to do. This class takes care of
|
20
|
-
# that, too, because it's a legend.
|
21
|
-
#
|
22
|
-
# So that's what this class does: it allows you to specify signals and
|
23
|
-
# associated blocks of code to run, it sets up signal handlers which send
|
24
|
-
# notifications to a background thread and chain correctly, and it manages
|
25
|
-
# the background thread to receive the notifications and execute the
|
26
|
-
# associated blocks of code outside of the context of the signal handler.
|
27
|
-
#
|
28
|
-
class SignalManager
|
29
|
-
include ServiceSkeleton::LoggingHelpers
|
30
|
-
|
31
|
-
# Setup a signal handler instance.
|
32
|
-
#
|
33
|
-
# @param logger [Logger] the logger to use for all the interesting information
|
34
|
-
# about what we're up to.
|
35
|
-
#
|
36
|
-
def initialize(logger:, counter:, signals:)
|
37
|
-
@logger, @signal_counter, @signal_list = logger, counter, signals
|
38
|
-
|
39
|
-
@registry = Hash.new { |h, k| h[k] = SignalHandler.new(k) }
|
40
|
-
|
41
|
-
@signal_list.each do |sig, proc|
|
42
|
-
@registry[signum(sig)] << proc
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def run
|
47
|
-
logger.info(logloc) { "Starting signal manager for #{@signal_list.length} signals" }
|
48
|
-
|
49
|
-
@r, @w = IO.pipe
|
50
|
-
|
51
|
-
install_signal_handlers
|
52
|
-
|
53
|
-
signals_loop
|
54
|
-
ensure
|
55
|
-
remove_signal_handlers
|
56
|
-
end
|
57
|
-
|
58
|
-
def shutdown
|
59
|
-
@r.close
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
attr_reader :logger
|
65
|
-
|
66
|
-
def signals_loop
|
67
|
-
#:nocov:
|
68
|
-
loop do
|
69
|
-
begin
|
70
|
-
if ios = IO.select([@r])
|
71
|
-
if ios.first.include?(@r)
|
72
|
-
if ios.first.first.eof?
|
73
|
-
logger.info(logloc) { "Signal pipe closed; shutting down" }
|
74
|
-
break
|
75
|
-
else
|
76
|
-
c = ios.first.first.read_nonblock(1)
|
77
|
-
logger.debug(logloc) { "Received character #{c.inspect} from signal pipe" }
|
78
|
-
handle_signal(c)
|
79
|
-
end
|
80
|
-
else
|
81
|
-
logger.error(logloc) { "Mysterious return from select: #{ios.inspect}" }
|
82
|
-
end
|
83
|
-
end
|
84
|
-
rescue IOError
|
85
|
-
# Something has gone terribly wrong here... bail
|
86
|
-
break
|
87
|
-
rescue StandardError => ex
|
88
|
-
log_exception(ex) { "Exception in select loop" }
|
89
|
-
end
|
90
|
-
end
|
91
|
-
#:nocov:
|
92
|
-
end
|
93
|
-
|
94
|
-
# Given a character (presumably) received via the signal pipe, execute the
|
95
|
-
# associated handler.
|
96
|
-
#
|
97
|
-
# @param char [String] a single character, corresponding to an entry in the
|
98
|
-
# signal registry.
|
99
|
-
#
|
100
|
-
# @return [void]
|
101
|
-
#
|
102
|
-
def handle_signal(char)
|
103
|
-
if @registry.has_key?(char.ord)
|
104
|
-
handler = @registry[char.ord]
|
105
|
-
logger.debug(logloc) { "#{handler.signame} received" }
|
106
|
-
@signal_counter.increment(labels: { signal: handler.signame.to_s })
|
107
|
-
|
108
|
-
begin
|
109
|
-
handler.call
|
110
|
-
rescue StandardError => ex
|
111
|
-
log_exception(ex) { "Exception while calling signal handler" }
|
112
|
-
end
|
113
|
-
else
|
114
|
-
logger.error(logloc) { "Unrecognised signal character: #{char.inspect}" }
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def install_signal_handlers
|
119
|
-
@registry.values.each do |h|
|
120
|
-
h.write_pipe = @w
|
121
|
-
h.hook
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def signum(spec)
|
126
|
-
if spec.is_a?(Integer)
|
127
|
-
return spec
|
128
|
-
end
|
129
|
-
|
130
|
-
if spec.is_a?(Symbol)
|
131
|
-
str = spec.to_s
|
132
|
-
elsif spec.is_a?(String)
|
133
|
-
str = spec.dup
|
134
|
-
else
|
135
|
-
raise ArgumentError,
|
136
|
-
"Unsupported class (#{spec.class}) of signal specifier #{spec.inspect}"
|
137
|
-
end
|
138
|
-
|
139
|
-
str.sub!(/\ASIG/i, '')
|
140
|
-
|
141
|
-
if Signal.list[str.upcase]
|
142
|
-
Signal.list[str.upcase]
|
143
|
-
else
|
144
|
-
raise ArgumentError,
|
145
|
-
"Unrecognised signal specifier #{spec.inspect}"
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
def remove_signal_handlers
|
150
|
-
@registry.values.each { |h| h.unhook }
|
151
|
-
end
|
152
|
-
|
153
|
-
class SignalHandler
|
154
|
-
attr_reader :signame
|
155
|
-
attr_writer :write_pipe
|
156
|
-
|
157
|
-
def initialize(signum)
|
158
|
-
@signum = signum
|
159
|
-
@callbacks = []
|
160
|
-
|
161
|
-
@signame = Signal.list.invert[@signum]
|
162
|
-
end
|
163
|
-
|
164
|
-
def <<(proc)
|
165
|
-
@callbacks << proc
|
166
|
-
end
|
167
|
-
|
168
|
-
def call
|
169
|
-
@callbacks.each { |cb| cb.call }
|
170
|
-
end
|
171
|
-
|
172
|
-
def hook
|
173
|
-
@handler = ->(_) do
|
174
|
-
#:nocov:
|
175
|
-
@write_pipe.write_nonblock(@signum.chr) rescue nil
|
176
|
-
@chain.call if @chain.respond_to?(:call)
|
177
|
-
#:nocov:
|
178
|
-
end
|
179
|
-
|
180
|
-
@chain = Signal.trap(@signum, &@handler)
|
181
|
-
end
|
182
|
-
|
183
|
-
def unhook
|
184
|
-
#:nocov:
|
185
|
-
tmp_handler = Signal.trap(@signum, "IGNORE")
|
186
|
-
if tmp_handler == @handler
|
187
|
-
# The current handler is ours, so we can replace it
|
188
|
-
# with the chained handler
|
189
|
-
Signal.trap(@signum, @chain)
|
190
|
-
else
|
191
|
-
# The current handler *isn't* ours, so we better
|
192
|
-
# put it back, because whoever owns it might get
|
193
|
-
# angry.
|
194
|
-
Signal.trap(@signum, tmp_handler)
|
195
|
-
end
|
196
|
-
#:nocov:
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
private_constant :SignalHandler
|
201
|
-
end
|
202
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ServiceSkeleton
|
4
|
-
module SignalsMethods
|
5
|
-
def registered_signal_handlers
|
6
|
-
@registered_signal_handlers || []
|
7
|
-
end
|
8
|
-
|
9
|
-
def hook_signal(sigspec, &blk)
|
10
|
-
@registered_signal_handlers ||= []
|
11
|
-
|
12
|
-
@registered_signal_handlers << [sigspec, blk]
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ServiceSkeleton
|
4
|
-
module UltravisorChildren
|
5
|
-
def register_ultravisor_children(ultravisor, config:, metrics_registry:)
|
6
|
-
begin
|
7
|
-
ultravisor.add_child(
|
8
|
-
id: self.service_name.to_sym,
|
9
|
-
klass: self,
|
10
|
-
method: :run,
|
11
|
-
args: [config: config, metrics: metrics_registry],
|
12
|
-
access: :unsafe
|
13
|
-
)
|
14
|
-
rescue Ultravisor::InvalidKAMError
|
15
|
-
raise ServiceSkeleton::Error::InvalidServiceClassError,
|
16
|
-
"Class #{self.to_s} does not implement the `run' instance method"
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/ultravisor/.yardopts
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
--markup markdown
|
data/ultravisor/Guardfile
DELETED
data/ultravisor/README.md
DELETED
@@ -1,404 +0,0 @@
|
|
1
|
-
> # WARNING WARNING WARNING
|
2
|
-
>
|
3
|
-
> This README is, at least in part, speculative fiction. I practice
|
4
|
-
> README-driven development, and as such, not everything described in here
|
5
|
-
> actually exists yet, and what does exist may not work right.
|
6
|
-
|
7
|
-
Ultravisor is like a supervisor, but... *ULTRA*. The idea is that you specify
|
8
|
-
objects to instantiate and run in threads, and then the Ultravisor makes that
|
9
|
-
happen behind the scenes, including logging failures, restarting if necessary,
|
10
|
-
and so on. If you're familiar with Erlang supervision trees, then Ultravisor
|
11
|
-
will feel familiar to you, because I stole pretty much every good idea that
|
12
|
-
is in Ultravisor from Erlang. You will get a lot of very excellent insight
|
13
|
-
from reading [the Erlang/OTP Supervision Principles](http://erlang.org/doc/design_principles/sup_princ.html).
|
14
|
-
|
15
|
-
# Usage
|
16
|
-
|
17
|
-
This section gives you a basic overview of the high points of how Ultravisor
|
18
|
-
can be used. It is not intended to be an exhaustive reference of all possible
|
19
|
-
options; the {Ultravisor} class API documentation provides every possible option
|
20
|
-
and its meaning.
|
21
|
-
|
22
|
-
|
23
|
-
## The Basics
|
24
|
-
|
25
|
-
Start by loading the code:
|
26
|
-
|
27
|
-
require "ultravisor"
|
28
|
-
|
29
|
-
Creating a new Ultravisor is a matter of instantiating a new object:
|
30
|
-
|
31
|
-
u = Ultravisor.new
|
32
|
-
|
33
|
-
In order for it to be useful, though, you'll need to add one or more children
|
34
|
-
to the Ultravisor instance, which can either be done as part of the call to
|
35
|
-
`.new`, or afterwards, as you see fit:
|
36
|
-
|
37
|
-
# Defining a child in the constructor
|
38
|
-
u = Ultravisor.new(children: [{id: :child, klass: Child, method: :run}])
|
39
|
-
|
40
|
-
# OR define it afterwards
|
41
|
-
u = Ultravisor.new
|
42
|
-
u.add_child(id: :my_child, klass: Child, method: :run)
|
43
|
-
|
44
|
-
Once you have an Ultravisor with children configured, you can set it running:
|
45
|
-
|
46
|
-
u.run
|
47
|
-
|
48
|
-
This will block until the Ultravisor terminates, one way or another.
|
49
|
-
|
50
|
-
We'll learn about other available initialization arguments, and all the other
|
51
|
-
features of Ultravisor, in the following sections.
|
52
|
-
|
53
|
-
|
54
|
-
## Defining Children
|
55
|
-
|
56
|
-
As children are the primary reason Ultravisor exists, it is worth getting a handle
|
57
|
-
on them first.
|
58
|
-
|
59
|
-
Defining children, as we saw in the introduction, can be done by calling
|
60
|
-
{Ultravisor#add_child} for each child you want to add, or else you can provide
|
61
|
-
a list of children to start as part of the {Ultravisor.new} call, using the
|
62
|
-
`children` named argument. You can also combine the two approaches, if some
|
63
|
-
children are defined statically, while others only get added conditionally.
|
64
|
-
|
65
|
-
Let's take another look at that {Ultravisor#add_child} method from earlier:
|
66
|
-
|
67
|
-
u.add_child(id: :my_child, klass: Child, method: :run)
|
68
|
-
|
69
|
-
First up, every child has an ID. This is fairly straightforward -- it's a
|
70
|
-
unique ID (within a given Ultravisor) that refers to the child. Attempting to
|
71
|
-
add two children with the same ID will raise an exception.
|
72
|
-
|
73
|
-
The `class` and `method` arguments require a little more explanation. One
|
74
|
-
of the foundational principles of "fail fast" is "clean restart" -- that is, if you
|
75
|
-
do need to restart something, it's important to start with as clean a state as possible.
|
76
|
-
Thus, if a child needs to be restarted, we don't want to reuse an existing object, which
|
77
|
-
may be in a messy and unuseable state. Instead, we want a clean, fresh object to work on.
|
78
|
-
That's why you specify a `class` when you define a child -- it is a new instance of that
|
79
|
-
class that will be used every time the child is started (or restarted).
|
80
|
-
|
81
|
-
The `method` argument might now be obvious. Once the new instance of the
|
82
|
-
specified `class` exists, the Ultravisor will call the specified `method` to start
|
83
|
-
work happening. It is expected that this method will ***not return***, in most cases.
|
84
|
-
So you probably want some sort of infinite loop.
|
85
|
-
|
86
|
-
You might think that this is extremely inflexible, only being able to specify a class
|
87
|
-
and a method to call. What if you want to pass in some parameters? Don't worry, we've
|
88
|
-
got you covered:
|
89
|
-
|
90
|
-
u.add_child(
|
91
|
-
id: :my_child,
|
92
|
-
klass: Child,
|
93
|
-
args: ['foo', 42, x: 1, y: 2],
|
94
|
-
method: :run,
|
95
|
-
)
|
96
|
-
|
97
|
-
The call to `Child.new` can take arbitrary arguments, just by defining an array
|
98
|
-
for the `args` named parameter. Did you know you can define a hash inside an
|
99
|
-
array like `['foo', 'bar', x: 1, y: 2] => ['foo', 'bar', {:x => 1, :y => 2}]`?
|
100
|
-
I didn't, either, until I started working on Ultravisor, but you can, and it
|
101
|
-
works *exactly* like named parameters in method calls.
|
102
|
-
|
103
|
-
You can also add children after the Ultravisor has been set running:
|
104
|
-
|
105
|
-
u = Ultravisor.new
|
106
|
-
|
107
|
-
u.add_child(id: :c1, klass: SomeWorker, method: :run)
|
108
|
-
|
109
|
-
u.run # => starts running an instance of SomeWorker, doesn't return
|
110
|
-
|
111
|
-
# In another thread...
|
112
|
-
u.add_child(id: :c2, klass: OtherWorker, method: go!)
|
113
|
-
|
114
|
-
# An instance of OtherWorker will be created and set running
|
115
|
-
|
116
|
-
If you add a child to an already-running Ultravisor, that child will immediately be
|
117
|
-
started running, almost like magic.
|
118
|
-
|
119
|
-
|
120
|
-
### Ordering of Children
|
121
|
-
|
122
|
-
The order in which children are defined is important. When children are (re)started,
|
123
|
-
they are always started in the order they were defined. When children are stopped,
|
124
|
-
either because the Ultravisor is shutting down, or because of a [supervision
|
125
|
-
strategy](#supervision-strategies), they are always stopped in the *reverse* order
|
126
|
-
of their definition.
|
127
|
-
|
128
|
-
All child specifications passed to {Ultravisor.new} always come first, in the
|
129
|
-
order they were in the array. Any children defined via calls to
|
130
|
-
{Ultravisor#add_child} will go next, in the order the `add_child` calls were
|
131
|
-
made.
|
132
|
-
|
133
|
-
|
134
|
-
## Restarting Children
|
135
|
-
|
136
|
-
One of the fundamental purposes of a supervisor like Ultravisor is that it restarts
|
137
|
-
children if they crash, on the principle of "fail fast". There's no point failing fast
|
138
|
-
if things don't get automatically fixed. This is the default behaviour of all
|
139
|
-
Ultravisor children.
|
140
|
-
|
141
|
-
Controlling how children are restarted is the purpose of the "restart policy",
|
142
|
-
which is controlled by the `restart` and `restart_policy` named arguments in
|
143
|
-
the child specification. For example, if you want to create a child that will
|
144
|
-
only ever be run once, regardless of what happens to it, then use `restart:
|
145
|
-
:never`:
|
146
|
-
|
147
|
-
u.add_child(
|
148
|
-
id: :my_one_shot_child,
|
149
|
-
klass: Child,
|
150
|
-
method: :run_maybe,
|
151
|
-
restart: :never
|
152
|
-
)
|
153
|
-
|
154
|
-
If you want a child which gets restarted if its `method` raises an exception,
|
155
|
-
but *not* if it runs to completion without error, then use `restart: :on_failure`:
|
156
|
-
|
157
|
-
u.add_child(
|
158
|
-
id: :my_run_once_child,
|
159
|
-
klass: Child,
|
160
|
-
method: :run_once,
|
161
|
-
restart: :on_failure
|
162
|
-
)
|
163
|
-
|
164
|
-
### The Limits of Failure
|
165
|
-
|
166
|
-
While restarting is great in general, you don't particularly want to fill your
|
167
|
-
logs with an endlessly restarting child -- say, because it doesn't have
|
168
|
-
permission to access a database. To solve that problem, an Ultravisor will
|
169
|
-
only attempt to restart a child a certain number of times before giving up and
|
170
|
-
exiting itself. The parameters of how this works are controlled by the
|
171
|
-
`restart_policy`, which is itself a hash:
|
172
|
-
|
173
|
-
u.add_child(
|
174
|
-
id: :my_restartable_child,
|
175
|
-
klass: Child,
|
176
|
-
method: :run,
|
177
|
-
restart_policy: {
|
178
|
-
period: 5,
|
179
|
-
retries: 2,
|
180
|
-
delay: 1,
|
181
|
-
}
|
182
|
-
)
|
183
|
-
|
184
|
-
The meaning of each of the `restart_policy` keys is best explained as part
|
185
|
-
of how Ultravisor restarts children.
|
186
|
-
|
187
|
-
When a child needs to be restarted, Ultravisor first waits a little while
|
188
|
-
before attempting the restart. The amount of time to wait is specified
|
189
|
-
by the `delay` value in the `restart_policy`. Then a new instance of the
|
190
|
-
`class` is instantiated, and the `method` is called on that instance.
|
191
|
-
|
192
|
-
The `period` and `retries` values of the `restart_policy` come into play
|
193
|
-
when the child exits repeatedly. If a single child needs to be restarted
|
194
|
-
more than `retries` times in `period` seconds, then instead of trying to
|
195
|
-
restart again, Ultravisor gives up. It doesn't try to start the child
|
196
|
-
again, it terminates all the *other* children of the Ultravisor, and
|
197
|
-
then it exits. Note that the `delay` between restarts is *not* part
|
198
|
-
of the `period`; only time spent actually running the child is
|
199
|
-
accounted for.
|
200
|
-
|
201
|
-
|
202
|
-
## Managed Child Termination
|
203
|
-
|
204
|
-
If children need to be terminated, by default, child threads are simply
|
205
|
-
forcibly terminated by calling {Thread#kill} on them. However, for workers
|
206
|
-
which hold resources, this can cause problems.
|
207
|
-
|
208
|
-
Thus, it is possible to control both how a child is terminated, and how long
|
209
|
-
to wait for that termination to occur, by using the `shutdown` named argument
|
210
|
-
when you add a child (either via {Ultravisor#add_child}, or as part of the
|
211
|
-
`children` named argument to {Ultravisor.new}), like this:
|
212
|
-
|
213
|
-
u.add_child(
|
214
|
-
id: :fancy_worker,
|
215
|
-
shutdown: {
|
216
|
-
method: :gentle_landing,
|
217
|
-
timeout: 30
|
218
|
-
}
|
219
|
-
)
|
220
|
-
|
221
|
-
When a child with a custom shutdown policy needs to be terminated, the
|
222
|
-
method named in the `method` key is called on the instance of `class` that
|
223
|
-
represents that child. Once the shutdown has been signalled to the
|
224
|
-
worker, up to `timeout` seconds is allowed to elapse. If the child thread has
|
225
|
-
not terminated by this time, the thread is forcibly terminated by calling
|
226
|
-
{Thread#kill}. This timeout prevents shutdown or group restart from hanging
|
227
|
-
indefinitely.
|
228
|
-
|
229
|
-
Note that the `method` specified in the `shutdown` specification should
|
230
|
-
signal the worker to terminate, and then return immediately. It should
|
231
|
-
*not* wait for termination itself.
|
232
|
-
|
233
|
-
|
234
|
-
## Supervision Strategies
|
235
|
-
|
236
|
-
When a child needs to be restarted, by default only the child that exited
|
237
|
-
will be restarted. However, it is possible to cause other
|
238
|
-
children to be restarted as well, if that is necessary. To do that, you
|
239
|
-
use the `strategy` named parameter when creating the Ultravisor:
|
240
|
-
|
241
|
-
u = Ultravisor.new(strategy: :one_for_all)
|
242
|
-
|
243
|
-
The possible values for the strategy are:
|
244
|
-
|
245
|
-
* `:one_for_one` -- the default restart strategy, this simply causes the
|
246
|
-
child which exited to be started again, in line with its restart policy.
|
247
|
-
|
248
|
-
* `:all_for_one` -- if any child needs to be restarted, all children of the
|
249
|
-
Ultravisor get terminated in reverse of their start order, and then all
|
250
|
-
children are started again, except those which are `restart: :never`, or
|
251
|
-
`restart: :on_failure` which had not already exited without error.
|
252
|
-
|
253
|
-
* `:rest_for_one` -- if any child needs to be restarted, all children of
|
254
|
-
the Ultravisor which are *after* the restarted child get terminated
|
255
|
-
in reverse of their start order, and then all children are started again,
|
256
|
-
except those which are `restart: :never`, or `restart: :on_failure` which
|
257
|
-
had not already exited without error.
|
258
|
-
|
259
|
-
|
260
|
-
## Interacting With Child Objects
|
261
|
-
|
262
|
-
Since the Ultravisor is creating the object instances that run in the worker
|
263
|
-
threads, you don't automatically have access to the object instance itself.
|
264
|
-
This is somewhat by design -- concurrency bugs are hell. However, there *are*
|
265
|
-
ways around this, if you need to.
|
266
|
-
|
267
|
-
|
268
|
-
### The power of cast / call
|
269
|
-
|
270
|
-
A common approach for interacting with an object in an otherwise concurrent
|
271
|
-
environment is the `cast` / `call` pattern. From the outside, the interface
|
272
|
-
is quite straightforward:
|
273
|
-
|
274
|
-
```
|
275
|
-
u = Ultravisor.new(children: [
|
276
|
-
{ id: :castcall, klass: CastCall, method: :run, enable_castcall: true }
|
277
|
-
])
|
278
|
-
|
279
|
-
# This will return `nil` immediately
|
280
|
-
u[:castcall].cast.some_method
|
281
|
-
|
282
|
-
# This will, at some point in the future, return whatever `CastCall#to_s` could
|
283
|
-
u[:castcall].call.some_method
|
284
|
-
```
|
285
|
-
|
286
|
-
To enable `cast` / `call` support for a child, you must set the `enable_castcall`
|
287
|
-
keyword argument on the child. This is because failing to process `cast`s and
|
288
|
-
`call`s can cause all sorts of unpleasant backlogs, so children who intend to
|
289
|
-
receive (and process) `cast`s and `call`s must explicitly opt-in.
|
290
|
-
|
291
|
-
The interface to the object from outside is straightforward. You get a
|
292
|
-
reference to the instance of {Ultravisor::Child} for the child you want to talk
|
293
|
-
to (which is returned by {Ultravisor#add_child}, or {Ultravisor#[]}), and then
|
294
|
-
call `child.cast.<method>` or `child.call.<method>`, passing in arguments as
|
295
|
-
per normal. Any public method can be the target of the `cast` or `call`, and you
|
296
|
-
can pass in any arguments you like, *including blocks* (although bear in mind that
|
297
|
-
any blocks passed will be run in the child instance's thread, and many
|
298
|
-
concurrency dragons await the unwary).
|
299
|
-
|
300
|
-
The difference between the `cast` and `call` methods is in whether or not a
|
301
|
-
return value is expected, and hence when the method call chained through
|
302
|
-
`cast` or `call` returns.
|
303
|
-
|
304
|
-
When you call `cast`, the real method call gets queued for later execution,
|
305
|
-
and since no return value is expected, the `child.cast.<method>` returns
|
306
|
-
`nil` immediately and your code gets on with its day. This is useful
|
307
|
-
when you want to tell the worker something, or instruct it to do something,
|
308
|
-
but there's no value coming back.
|
309
|
-
|
310
|
-
In comparison, when you call `call`, the real method call still gets queued,
|
311
|
-
but the calling code blocks, waiting for the return value from the queued
|
312
|
-
method call. This may seem pointless -- why have concurrency that blocks? --
|
313
|
-
but the value comes from the synchronisation. The method call only happens
|
314
|
-
when the worker loop calls `process_castcall`, which it can do at a time that
|
315
|
-
suits it, and when it knows that nothing else is going on that could cause
|
316
|
-
problems.
|
317
|
-
|
318
|
-
One thing to be aware of when interacting with a worker instance is that it may
|
319
|
-
crash, and be restarted by the Ultravisor, before it gets around to processing
|
320
|
-
a queued message. If you used `child.cast`, then the method call is just...
|
321
|
-
lost, forever. On the other hand, if you used `child.call`, then an
|
322
|
-
{Ultravisor::ChildRestartedError} exception will be raised, which you can deal
|
323
|
-
with as you see fit.
|
324
|
-
|
325
|
-
The really interesting part is what happens *inside* the child instance. The
|
326
|
-
actual execution of code in response to the method calls passed through `cast`
|
327
|
-
and `call` will only happen when the running instance of the child's class
|
328
|
-
calls `process_castcall`. When that happens, all pending casts and calls will
|
329
|
-
be executed. Since this happens within the same thread as the rest of the
|
330
|
-
child instance's code, it's a lot safer than trying to synchronise everything
|
331
|
-
with locks.
|
332
|
-
|
333
|
-
You can, of course, just call `process_castcall` repeatedly, however that's a
|
334
|
-
somewhat herp-a-derp way of doing it. The `castcall_fd` method in the running
|
335
|
-
instance will return an IO object which will become readable whenever there is
|
336
|
-
a pending `cast` or `call` to process. Thus, if you're using `IO.select` or
|
337
|
-
similar to wait for work to do, you can add `castcall_fd` to the readable set
|
338
|
-
and only call `process_castcall` when the relevant IO object comes back. Don't
|
339
|
-
actually try *reading* from it yourself; `process_castcall` takes care of all that.
|
340
|
-
|
341
|
-
If you happen to have a child class whose *only* purpose is to process `cast`s
|
342
|
-
and `call`s, you should configure the Ultravisor to use `process_castcall_loop`
|
343
|
-
as its entry method. This is a wrapper method which blocks on `castcall_fd`
|
344
|
-
becoming readable, and loops infinitely.
|
345
|
-
|
346
|
-
It is important to remember that not all concurrency bugs can be prevented by
|
347
|
-
using `cast` / `call`. For example, read-modify-write operations will still
|
348
|
-
cause all the same problems they always do, so if you find yourself calling
|
349
|
-
`child.call`, modifying the value returned, and then calling `child.cast`
|
350
|
-
with that modified value, you're in for a bad time.
|
351
|
-
|
352
|
-
|
353
|
-
### Direct (Unsafe) Instance Access
|
354
|
-
|
355
|
-
If you have a worker class which you're *really* sure is safe against concurrent
|
356
|
-
access, you can eschew the convenience and safety of `cast` / `call`, and instead
|
357
|
-
allow direct access to the worker instance object.
|
358
|
-
|
359
|
-
To do this, specify `access: :unsafe` in the child specification, and then
|
360
|
-
call `child.unsafe_instance` to get the instance object currently in play.
|
361
|
-
|
362
|
-
Yes, the multiple mentions of `unsafe` are there deliberately, and no, I won't
|
363
|
-
be removing them. They're there to remind you, always, that what you're doing
|
364
|
-
is unsafe.
|
365
|
-
|
366
|
-
If the child is restarting at the time `child.unsafe_instance` is called,
|
367
|
-
the call will block until the child worker is started again, after which
|
368
|
-
you'll get the newly created worker instance object. The worker could crash
|
369
|
-
again at any time, of course, leaving you with a now out-of-date object
|
370
|
-
that is no longer being actively run. It's up to you to figure out how to
|
371
|
-
deal with that. If the Ultravisor associated with the child
|
372
|
-
has terminated, your call to `child.unsafe_instance` will raise an
|
373
|
-
{Ultravisor::ChildRestartedError}.
|
374
|
-
|
375
|
-
Why yes, Gracie, there *are* a lot of things that can go wrong when using
|
376
|
-
direct instance object access. Still wondering why those `unsafe`s are in
|
377
|
-
the name?
|
378
|
-
|
379
|
-
|
380
|
-
## Supervision Trees
|
381
|
-
|
382
|
-
Whilst a collection of workers is a neat thing to have, more powerful systems
|
383
|
-
can be constructed if supervisors can, themselves, be supervised. Primarily
|
384
|
-
this is useful when recovering from persistent errors, because you can use
|
385
|
-
a higher-level supervisor to restart an entire tree of workers which has one
|
386
|
-
which is having problems.
|
387
|
-
|
388
|
-
Creating a supervision tree is straightforward. Because Ultravisor works by
|
389
|
-
instantiating plain old ruby objects, and Ultravisor is, itself, a plain old
|
390
|
-
ruby class, you use it more-or-less like you would any other object:
|
391
|
-
|
392
|
-
u = Ultravisor.new
|
393
|
-
u.add_child(id: :sub_sup, klass: Ultravisor, method: :run, args: [children: [...]])
|
394
|
-
|
395
|
-
That's all there is to it. Whenever the parent Ultravisor wants to work on the
|
396
|
-
child Ultravisor, it treats it like any other child, asking it to terminate,
|
397
|
-
start, etc, and the child Ultravisor's work consists of terminating, starting,
|
398
|
-
etc all of its children.
|
399
|
-
|
400
|
-
The only difference in default behaviour between a regular worker child and an
|
401
|
-
Ultravisor child is that an Ultravisor's `shutdown` policy is automatically set
|
402
|
-
to `method: :stop!, timeout: :infinity`. This is because it is *very* bad news
|
403
|
-
to forcibly terminate an Ultravisor before its children have stopped -- all
|
404
|
-
those children just get cast into the VM, never to be heard from again.
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
class Ultravisor::Child::Call
|
3
|
-
attr_reader :method_name
|
4
|
-
|
5
|
-
def initialize(method_name, args, blk, rv_q, rv_fail)
|
6
|
-
@method_name, @args, @blk, @rv_q, @rv_fail = method_name, args, blk, rv_q, rv_fail
|
7
|
-
end
|
8
|
-
|
9
|
-
def go!(receiver)
|
10
|
-
@rv_q << receiver.__send__(@method_name, *@args, &@blk)
|
11
|
-
rescue Exception => ex
|
12
|
-
@rv_q << @rv_fail
|
13
|
-
raise
|
14
|
-
ensure
|
15
|
-
@rv_q.close
|
16
|
-
end
|
17
|
-
|
18
|
-
def child_restarted!
|
19
|
-
@rv_q << @rv_fail
|
20
|
-
end
|
21
|
-
end
|