service_skeleton 1.0.2 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.git-blame-ignore-revs +2 -0
  3. data/.github/workflows/ci.yml +32 -17
  4. data/.gitignore +0 -7
  5. data/.rubocop.yml +6 -0
  6. data/README.md +1 -53
  7. data/lib/service_skeleton/config.rb +20 -13
  8. data/lib/service_skeleton/generator.rb +4 -4
  9. data/lib/service_skeleton/runner.rb +3 -3
  10. data/lib/service_skeleton/signal_manager.rb +1 -1
  11. data/lib/service_skeleton/ultravisor_loggerstash.rb +9 -1
  12. data/service_skeleton.gemspec +4 -14
  13. data/ultravisor/.yardopts +1 -0
  14. data/ultravisor/Guardfile +9 -0
  15. data/ultravisor/README.md +404 -0
  16. data/ultravisor/lib/ultravisor.rb +216 -0
  17. data/ultravisor/lib/ultravisor/child.rb +485 -0
  18. data/ultravisor/lib/ultravisor/child/call.rb +21 -0
  19. data/ultravisor/lib/ultravisor/child/call_receiver.rb +14 -0
  20. data/ultravisor/lib/ultravisor/child/cast.rb +16 -0
  21. data/ultravisor/lib/ultravisor/child/cast_receiver.rb +11 -0
  22. data/ultravisor/lib/ultravisor/child/process_cast_call.rb +39 -0
  23. data/ultravisor/lib/ultravisor/error.rb +25 -0
  24. data/ultravisor/lib/ultravisor/logging_helpers.rb +32 -0
  25. data/ultravisor/spec/example_group_methods.rb +19 -0
  26. data/ultravisor/spec/example_methods.rb +8 -0
  27. data/ultravisor/spec/spec_helper.rb +56 -0
  28. data/ultravisor/spec/ultravisor/add_child_spec.rb +79 -0
  29. data/ultravisor/spec/ultravisor/child/call_spec.rb +121 -0
  30. data/ultravisor/spec/ultravisor/child/cast_spec.rb +111 -0
  31. data/ultravisor/spec/ultravisor/child/id_spec.rb +21 -0
  32. data/ultravisor/spec/ultravisor/child/new_spec.rb +152 -0
  33. data/ultravisor/spec/ultravisor/child/restart_delay_spec.rb +40 -0
  34. data/ultravisor/spec/ultravisor/child/restart_spec.rb +70 -0
  35. data/ultravisor/spec/ultravisor/child/run_spec.rb +95 -0
  36. data/ultravisor/spec/ultravisor/child/shutdown_spec.rb +124 -0
  37. data/ultravisor/spec/ultravisor/child/spawn_spec.rb +216 -0
  38. data/ultravisor/spec/ultravisor/child/unsafe_instance_spec.rb +55 -0
  39. data/ultravisor/spec/ultravisor/child/wait_spec.rb +32 -0
  40. data/ultravisor/spec/ultravisor/new_spec.rb +71 -0
  41. data/ultravisor/spec/ultravisor/remove_child_spec.rb +49 -0
  42. data/ultravisor/spec/ultravisor/run_spec.rb +334 -0
  43. data/ultravisor/spec/ultravisor/shutdown_spec.rb +106 -0
  44. metadata +43 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0784b080a70e7097113e64f9f217d463ebc35920cb5938c8543423321c1ea0ed'
4
- data.tar.gz: 451b025d9d3283946d8bc791ab085903b516bc8aeadbdac7cd38076ca59afc16
3
+ metadata.gz: 7d49a02ddaf68cb61da78352ed7f114f1ba12034dcd02852ce20aab2c87ea390
4
+ data.tar.gz: 9090f37cac4cc61fa538fc56a34c859fd44adc252e0fd212ea3a63a877a46666
5
5
  SHA512:
6
- metadata.gz: b51c443ac3ea961f24d79c9cb6a0039d5e00e50a8e7349509adab7091b2c789dc51f01c1bbbd2c41b83a24c1ac2d18da7215ae25b6c4878aad461e18ada50193
7
- data.tar.gz: a1f2193a51c33ace2672b0dfa8534bb34402c2e9091c7fdbc58cbbf9ff87466ffc228b60513bd0cce75f7b148de0c1cd93b90dbec9d25d253f0554e40a814f9d
6
+ metadata.gz: 85685190eb12f5072744e7098f529cf12fc4755483fee50a47f8f46db7b83acf587eed549e973a02d903dd4a5d9bf9929c24a515996002e6b072629ff780af9d
7
+ data.tar.gz: e80d673512846c362094c962b21ffdababeae718194d9ba48b60dc73f733b8ccc820fbff8c1766f1a48eb4ccc2deb912fb6194bd788d526d7241bfd10b9d9295
@@ -0,0 +1,2 @@
1
+ # Reformatting ultravisor
2
+ 5a89b13dce618cd2544c5518833c4f1587a38ee7
@@ -1,35 +1,50 @@
1
- name: Service Skeleton Tests
1
+ name: CI
2
2
 
3
3
  on:
4
4
  pull_request:
5
5
  push:
6
6
  branches:
7
7
  - master
8
+ - main
8
9
 
9
10
  jobs:
10
11
  build:
11
12
  runs-on: ubuntu-latest
12
- name: Ruby ${{ matrix.ruby }}
13
+
13
14
  strategy:
14
15
  matrix:
15
- ruby: ["2.5", "2.6", "2.7"]
16
+ ruby:
17
+ - 2.5
18
+ - 2.6
19
+ - 2.7
20
+ - 3.0
21
+
16
22
  steps:
17
23
  - uses: actions/checkout@v2
18
- - uses: actions/setup-ruby@v1
24
+
25
+ - name: Setup ruby
26
+ uses: ruby/setup-ruby@v1
19
27
  with:
20
28
  ruby-version: ${{ matrix.ruby }}
21
- - name: Bundler cache
22
- uses: actions/cache@v2
23
- with:
24
- path: vendor/bundle
25
- key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
26
- restore-keys: |
27
- ${{ runner.os }}-${{ matrix.ruby }}-gems-
28
- - name: Setup gems
29
- run: |
30
- bundle config path vendor/bundle
31
- bundle install --jobs 4
32
- - name: Rubocop
29
+ bundler-cache: true
30
+
31
+ - name: Lint
33
32
  run: bundle exec rubocop
33
+
34
34
  - name: Tests
35
- run: bundle exec rspec
35
+ run: bundle exec rake test
36
+
37
+ publish:
38
+ if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')
39
+ needs: build
40
+ runs-on: ubuntu-latest
41
+
42
+ steps:
43
+ - uses: actions/checkout@v2
44
+
45
+ - name: Release Gem
46
+ uses: discourse/publish-rubygems-action@v2-beta
47
+ env:
48
+ RUBYGEMS_API_KEY: ${{ secrets.RUBYGEMS_API_KEY }}
49
+ GIT_EMAIL: team@discourse.org
50
+ GIT_NAME: discoursebot
data/.gitignore CHANGED
@@ -1,9 +1,2 @@
1
1
  Gemfile.lock
2
- /pkg
3
- /doc
4
- /.yardoc
5
2
  /coverage
6
- /.bundle
7
- /tmp
8
-
9
- .rubocop-https---raw-githubusercontent-com-discourse-discourse-master--rubocop-yml
data/.rubocop.yml CHANGED
@@ -1,5 +1,11 @@
1
1
  inherit_gem:
2
2
  rubocop-discourse: default.yml
3
+
3
4
  inherit_mode:
4
5
  merge:
5
6
  - Exclude
7
+
8
+ RSpec/MessageSpies:
9
+ EnforcedStyle: receive
10
+ Exclude:
11
+ - "ultravisor/**/*"
data/README.md CHANGED
@@ -448,8 +448,7 @@ portion is the all-uppercase [service name](#the-service-name).
448
448
 
449
449
  INFO,buggy=DEBUG,/noisy/i=ERROR
450
450
 
451
- Logging levels can be changed at runtime, via [signals](#default-signals) or
452
- [the HTTP admin interface](#http-admin-interface).
451
+ Logging levels can be changed at runtime via [signals](#default-signals).
453
452
 
454
453
  * **`<SERVICENAME>_LOGSTASH_SERVER`** (string; default `""`) -- if set to a
455
454
  non-empty string, we will engage the services of the [loggerstash
@@ -686,57 +685,6 @@ When the service is shutdown, all signal handlers will be automatically
686
685
  unhooked, which saves you having to do it yourself.
687
686
 
688
687
 
689
- ## HTTP Admin Interface
690
-
691
- In these modern times we live in, it seems everything from nuclear reactors to
692
- toasters can be controlled from a browser. Why should your services be any
693
- different?
694
-
695
-
696
- ### HTTP Admin Configuration
697
-
698
- In the spirit of "secure by default", you must explicitly enable the HTTP admin
699
- interface, and configure an authentication method. To do that, use the
700
- following environment variables, where `<SERVICENAME>_` is the all-uppercase
701
- version of [the service name](#the-service-name).
702
-
703
- * **`<SERVICENAME>_HTTP_ADMIN_PORT`** (integer; range 1..65535; default: `""`)
704
- -- if set to a valid port number (`1` to `65535` inclusive), the HTTP admin
705
- interface will listen on that port, if also enabled by configuring
706
- authentication.
707
-
708
- * **`<SERVICENAME>_HTTP_ADMIN_BASIC_AUTH`** (string; default: `""`) -- if set
709
- to a string containing a username and password separated by a colon, then
710
- authentication via [HTTP Basic auth](https://tools.ietf.org/html/rfc7617)
711
- will be supported. Note that in addition to this setting, an admin port must
712
- also be specified in order for the admin interface to be enabled.
713
-
714
- * **`<SERVICENAME>_HTTP_ADMIN_PROXY_USERNAME_HEADER`** (string; default: `""`)
715
- -- if set to a non-empty string, then incoming requests will be examined for
716
- a HTTP header with the specified name. If such a header exists and has a
717
- non-empty value, then the request will be deemed to have been authenticated
718
- by an upstream authenticating proxy (such as
719
- [`discourse-auth-proxy`](https://github.com/discourse/discourse-auth-proxy))
720
- as the user given in the header value. Note that in addition to this
721
- setting, an admin port must also be specified in order for the admin
722
- interface to be enabled.
723
-
724
-
725
- ### HTTP Admin Usage
726
-
727
- The HTTP admin interface provides both an interactive, browser-based mode,
728
- as well as a RESTful interface, which should, in general, provide equivalent
729
- functionality.
730
-
731
- * Visiting the service's `IP address:port` in a web browser will bring up an HTML
732
- interface showing all the features that are available. Usage should
733
- (hopefully) be self-explanatory.
734
-
735
- * Visiting the service's `IP address:port` whilst accepting `application/json`
736
- responses will provide a directory of links to available endpoints which you
737
- can use to interact with the HTTP admin interface programmatically.
738
-
739
-
740
688
  # Contributing
741
689
 
742
690
  Patches can be sent as [a Github pull
@@ -8,7 +8,7 @@ require "loggerstash"
8
8
 
9
9
  module ServiceSkeleton
10
10
  class Config
11
- attr_reader :logger, :env, :service_name
11
+ attr_reader :logger, :pre_run_logger, :env, :service_name
12
12
 
13
13
  def initialize(env, service_name, variables)
14
14
  @service_name = service_name
@@ -66,6 +66,11 @@ module ServiceSkeleton
66
66
 
67
67
  @logger = Logger.new(log_file || $stderr, shift_age, shift_size)
68
68
 
69
+ # Can be used prior to a call to ultravisor#run. This prevents a race condition
70
+ # when a logstash server is configured but the logstash writer is not yet
71
+ # initialised. This should never be updated after it is configured.
72
+ @pre_run_logger = Logger.new(log_file || $stderr, shift_age, shift_size)
73
+
69
74
  if Thread.main
70
75
  Thread.main[:thread_map_number] = 0
71
76
  else
@@ -76,21 +81,23 @@ module ServiceSkeleton
76
81
 
77
82
  thread_map_mutex = Mutex.new
78
83
 
79
- @logger.formatter = ->(s, t, p, m) do
80
- th_n = if Thread.current.name
81
- #:nocov:
82
- Thread.current.name
83
- #:nocov:
84
- else
85
- thread_map_mutex.synchronize do
86
- Thread.current[:thread_map_number] ||= begin
87
- Thread.list.select { |th| th[:thread_map_number] }.length
84
+ [@logger, @pre_run_logger].each do |logger|
85
+ logger.formatter = ->(s, t, p, m) do
86
+ th_n = if Thread.current.name
87
+ #:nocov:
88
+ Thread.current.name
89
+ #:nocov:
90
+ else
91
+ thread_map_mutex.synchronize do
92
+ Thread.current[:thread_map_number] ||= begin
93
+ Thread.list.select { |th| th[:thread_map_number] }.length
94
+ end
88
95
  end
89
96
  end
90
- end
91
97
 
92
- ts = log_enable_timestamps ? "#{t.utc.strftime("%FT%T.%NZ")} " : ""
93
- "#{ts}#{$$}##{th_n} #{s[0]} [#{p}] #{m}\n"
98
+ ts = log_enable_timestamps ? "#{t.utc.strftime("%FT%T.%NZ")} " : ""
99
+ "#{ts}#{$$}##{th_n} #{s[0]} [#{p}] #{m}\n"
100
+ end
94
101
  end
95
102
 
96
103
  @logger.filters = []
@@ -10,14 +10,14 @@ require "frankenstein/process_metrics"
10
10
  require "frankenstein/server"
11
11
  require "prometheus/client/registry"
12
12
  require "sigdump"
13
- require "ultravisor"
13
+ require_relative "../../ultravisor/lib/ultravisor"
14
14
 
15
15
  module ServiceSkeleton
16
16
  module Generator
17
17
  def generate(config:, metrics_registry:, service_metrics:, service_signal_handlers:)
18
18
  Ultravisor.new(logger: config.logger).tap do |ultravisor|
19
- initialize_metrics(ultravisor, config, metrics_registry, service_metrics)
20
19
  initialize_loggerstash(ultravisor, config, metrics_registry)
20
+ initialize_metrics(ultravisor, config, metrics_registry, service_metrics)
21
21
  initialize_signals(ultravisor, config, service_signal_handlers, metrics_registry)
22
22
  end
23
23
  end
@@ -45,7 +45,7 @@ module ServiceSkeleton
45
45
  end
46
46
 
47
47
  if config.metrics_port
48
- config.logger.info(config.service_name) { "Starting metrics server on port #{config.metrics_port}" }
48
+ config.pre_run_logger.info(config.service_name) { "Starting metrics server on port #{config.metrics_port}" }
49
49
  ultravisor.add_child(
50
50
  id: :metrics_server,
51
51
  klass: Frankenstein::Server,
@@ -62,7 +62,7 @@ module ServiceSkeleton
62
62
 
63
63
  def initialize_loggerstash(ultravisor, config, registry)
64
64
  if config.logstash_server && !config.logstash_server.empty?
65
- config.logger.info(config.service_name) { "Configuring loggerstash to send to #{config.logstash_server}" }
65
+ config.pre_run_logger.info(config.service_name) { "Configuring loggerstash to send to #{config.logstash_server}" }
66
66
 
67
67
  ultravisor.add_child(
68
68
  id: :logstash_writer,
@@ -10,7 +10,7 @@ require "frankenstein/process_metrics"
10
10
  require "frankenstein/server"
11
11
  require "prometheus/client/registry"
12
12
  require "sigdump"
13
- require "ultravisor"
13
+ require_relative "../../ultravisor/lib/ultravisor"
14
14
 
15
15
  module ServiceSkeleton
16
16
  class Runner
@@ -33,8 +33,8 @@ module ServiceSkeleton
33
33
  end
34
34
 
35
35
  def run
36
- logger.info(logloc) { "Starting service #{@config.service_name}" }
37
- logger.info(logloc) { (["Environment:"] + @config.env.map { |k, v| "#{k}=#{v.inspect}" }).join("\n ") }
36
+ @config.pre_run_logger.info(logloc) { "Starting service #{@config.service_name}" }
37
+ @config.pre_run_logger.info(logloc) { (["Environment:"] + @config.env.map { |k, v| "#{k}=#{v.inspect}" }).join("\n ") }
38
38
 
39
39
  @ultravisor.run
40
40
  end
@@ -81,7 +81,7 @@ module ServiceSkeleton
81
81
  logger.error(logloc) { "Mysterious return from select: #{ios.inspect}" }
82
82
  end
83
83
  end
84
- rescue IOError
84
+ rescue IOError, Errno::EBADF
85
85
  # Something has gone terribly wrong here... bail
86
86
  break
87
87
  rescue StandardError => ex
@@ -4,8 +4,16 @@ module ServiceSkeleton
4
4
  module UltravisorLoggerstash
5
5
  def logstash_writer
6
6
  #:nocov:
7
- @ultravisor[:logstash_writer].unsafe_instance
7
+ @ultravisor[:logstash_writer].unsafe_instance(wait: false)
8
8
  #:nocov:
9
9
  end
10
+
11
+ # logstash_writer will be nil if the logstash_writer worker is not running
12
+ # Ultravisor's restart policy ensures this will never happen at runtime. But
13
+ # it does happen during startup and shutdown. In this case, we want to skip
14
+ # writing to logstash, not block forever. STDOUT logging will continue.
15
+ def loggerstash_log_message(*args)
16
+ super if !logstash_writer.nil?
17
+ end
10
18
  end
11
19
  end
@@ -1,16 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- begin
4
- require 'git-version-bump'
5
- rescue LoadError
6
- nil
7
- end
8
-
9
3
  Gem::Specification.new do |s|
10
4
  s.name = "service_skeleton"
11
5
 
12
- s.version = GVB.version rescue "0.0.0.1.NOGVB"
13
- s.date = GVB.date rescue Time.now.strftime("%Y-%m-%d")
6
+ s.version = '2.0.2'
14
7
 
15
8
  s.platform = Gem::Platform::RUBY
16
9
 
@@ -33,23 +26,20 @@ Gem::Specification.new do |s|
33
26
  s.required_ruby_version = ">= 2.5.0"
34
27
 
35
28
  s.add_runtime_dependency "frankenstein", "~> 2.0"
36
- s.add_runtime_dependency "loggerstash", ">= 0.0.9", "< 1"
29
+ s.add_runtime_dependency "loggerstash", "~> 1"
37
30
  s.add_runtime_dependency "prometheus-client", "~> 2.0"
38
31
  s.add_runtime_dependency "sigdump", "~> 0.2"
39
32
  s.add_runtime_dependency "to_regexp", "~> 0.2"
40
- s.add_runtime_dependency "ultravisor", "~> 0.a"
33
+ s.add_runtime_dependency "webrick"
41
34
 
42
35
  s.add_development_dependency 'bundler'
43
- s.add_development_dependency 'github-release'
44
- s.add_development_dependency 'git-version-bump'
45
36
  s.add_development_dependency 'guard-rspec'
46
37
  s.add_development_dependency 'guard-rubocop'
47
38
  s.add_development_dependency 'rack-test'
48
39
  s.add_development_dependency 'rake'
49
40
  s.add_development_dependency 'redcarpet'
50
41
  s.add_development_dependency 'rspec'
51
- s.add_development_dependency 'rubocop'
52
- s.add_development_dependency 'rubocop-discourse'
42
+ s.add_development_dependency 'rubocop-discourse', '~> 2.4.1'
53
43
  s.add_development_dependency 'simplecov'
54
44
  s.add_development_dependency 'yard'
55
45
  end
@@ -0,0 +1 @@
1
+ --markup markdown
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ guard 'rspec',
3
+ cmd: "bundle exec rspec",
4
+ all_on_start: true,
5
+ all_after_pass: true do
6
+ watch(%r{^spec/.+_spec\.rb$})
7
+ watch(%r{^spec/.+_methods\.rb$})
8
+ watch(%r{^lib/}) { "spec" }
9
+ end
@@ -0,0 +1,404 @@
1
+ > # WARNING WARNING WARNING
2
+ >
3
+ > This README is, at least in part, speculative fiction. I practice
4
+ > README-driven development, and as such, not everything described in here
5
+ > actually exists yet, and what does exist may not work right.
6
+
7
+ Ultravisor is like a supervisor, but... *ULTRA*. The idea is that you specify
8
+ objects to instantiate and run in threads, and then the Ultravisor makes that
9
+ happen behind the scenes, including logging failures, restarting if necessary,
10
+ and so on. If you're familiar with Erlang supervision trees, then Ultravisor
11
+ will feel familiar to you, because I stole pretty much every good idea that
12
+ is in Ultravisor from Erlang. You will get a lot of very excellent insight
13
+ from reading [the Erlang/OTP Supervision Principles](http://erlang.org/doc/design_principles/sup_princ.html).
14
+
15
+ # Usage
16
+
17
+ This section gives you a basic overview of the high points of how Ultravisor
18
+ can be used. It is not intended to be an exhaustive reference of all possible
19
+ options; the {Ultravisor} class API documentation provides every possible option
20
+ and its meaning.
21
+
22
+
23
+ ## The Basics
24
+
25
+ Start by loading the code:
26
+
27
+ require "ultravisor"
28
+
29
+ Creating a new Ultravisor is a matter of instantiating a new object:
30
+
31
+ u = Ultravisor.new
32
+
33
+ In order for it to be useful, though, you'll need to add one or more children
34
+ to the Ultravisor instance, which can either be done as part of the call to
35
+ `.new`, or afterwards, as you see fit:
36
+
37
+ # Defining a child in the constructor
38
+ u = Ultravisor.new(children: [{id: :child, klass: Child, method: :run}])
39
+
40
+ # OR define it afterwards
41
+ u = Ultravisor.new
42
+ u.add_child(id: :my_child, klass: Child, method: :run)
43
+
44
+ Once you have an Ultravisor with children configured, you can set it running:
45
+
46
+ u.run
47
+
48
+ This will block until the Ultravisor terminates, one way or another.
49
+
50
+ We'll learn about other available initialization arguments, and all the other
51
+ features of Ultravisor, in the following sections.
52
+
53
+
54
+ ## Defining Children
55
+
56
+ As children are the primary reason Ultravisor exists, it is worth getting a handle
57
+ on them first.
58
+
59
+ Defining children, as we saw in the introduction, can be done by calling
60
+ {Ultravisor#add_child} for each child you want to add, or else you can provide
61
+ a list of children to start as part of the {Ultravisor.new} call, using the
62
+ `children` named argument. You can also combine the two approaches, if some
63
+ children are defined statically, while others only get added conditionally.
64
+
65
+ Let's take another look at that {Ultravisor#add_child} method from earlier:
66
+
67
+ u.add_child(id: :my_child, klass: Child, method: :run)
68
+
69
+ First up, every child has an ID. This is fairly straightforward -- it's a
70
+ unique ID (within a given Ultravisor) that refers to the child. Attempting to
71
+ add two children with the same ID will raise an exception.
72
+
73
+ The `class` and `method` arguments require a little more explanation. One
74
+ of the foundational principles of "fail fast" is "clean restart" -- that is, if you
75
+ do need to restart something, it's important to start with as clean a state as possible.
76
+ Thus, if a child needs to be restarted, we don't want to reuse an existing object, which
77
+ may be in a messy and unuseable state. Instead, we want a clean, fresh object to work on.
78
+ That's why you specify a `class` when you define a child -- it is a new instance of that
79
+ class that will be used every time the child is started (or restarted).
80
+
81
+ The `method` argument might now be obvious. Once the new instance of the
82
+ specified `class` exists, the Ultravisor will call the specified `method` to start
83
+ work happening. It is expected that this method will ***not return***, in most cases.
84
+ So you probably want some sort of infinite loop.
85
+
86
+ You might think that this is extremely inflexible, only being able to specify a class
87
+ and a method to call. What if you want to pass in some parameters? Don't worry, we've
88
+ got you covered:
89
+
90
+ u.add_child(
91
+ id: :my_child,
92
+ klass: Child,
93
+ args: ['foo', 42, x: 1, y: 2],
94
+ method: :run,
95
+ )
96
+
97
+ The call to `Child.new` can take arbitrary arguments, just by defining an array
98
+ for the `args` named parameter. Did you know you can define a hash inside an
99
+ array like `['foo', 'bar', x: 1, y: 2] => ['foo', 'bar', {:x => 1, :y => 2}]`?
100
+ I didn't, either, until I started working on Ultravisor, but you can, and it
101
+ works *exactly* like named parameters in method calls.
102
+
103
+ You can also add children after the Ultravisor has been set running:
104
+
105
+ u = Ultravisor.new
106
+
107
+ u.add_child(id: :c1, klass: SomeWorker, method: :run)
108
+
109
+ u.run # => starts running an instance of SomeWorker, doesn't return
110
+
111
+ # In another thread...
112
+ u.add_child(id: :c2, klass: OtherWorker, method: go!)
113
+
114
+ # An instance of OtherWorker will be created and set running
115
+
116
+ If you add a child to an already-running Ultravisor, that child will immediately be
117
+ started running, almost like magic.
118
+
119
+
120
+ ### Ordering of Children
121
+
122
+ The order in which children are defined is important. When children are (re)started,
123
+ they are always started in the order they were defined. When children are stopped,
124
+ either because the Ultravisor is shutting down, or because of a [supervision
125
+ strategy](#supervision-strategies), they are always stopped in the *reverse* order
126
+ of their definition.
127
+
128
+ All child specifications passed to {Ultravisor.new} always come first, in the
129
+ order they were in the array. Any children defined via calls to
130
+ {Ultravisor#add_child} will go next, in the order the `add_child` calls were
131
+ made.
132
+
133
+
134
+ ## Restarting Children
135
+
136
+ One of the fundamental purposes of a supervisor like Ultravisor is that it restarts
137
+ children if they crash, on the principle of "fail fast". There's no point failing fast
138
+ if things don't get automatically fixed. This is the default behaviour of all
139
+ Ultravisor children.
140
+
141
+ Controlling how children are restarted is the purpose of the "restart policy",
142
+ which is controlled by the `restart` and `restart_policy` named arguments in
143
+ the child specification. For example, if you want to create a child that will
144
+ only ever be run once, regardless of what happens to it, then use `restart:
145
+ :never`:
146
+
147
+ u.add_child(
148
+ id: :my_one_shot_child,
149
+ klass: Child,
150
+ method: :run_maybe,
151
+ restart: :never
152
+ )
153
+
154
+ If you want a child which gets restarted if its `method` raises an exception,
155
+ but *not* if it runs to completion without error, then use `restart: :on_failure`:
156
+
157
+ u.add_child(
158
+ id: :my_run_once_child,
159
+ klass: Child,
160
+ method: :run_once,
161
+ restart: :on_failure
162
+ )
163
+
164
+ ### The Limits of Failure
165
+
166
+ While restarting is great in general, you don't particularly want to fill your
167
+ logs with an endlessly restarting child -- say, because it doesn't have
168
+ permission to access a database. To solve that problem, an Ultravisor will
169
+ only attempt to restart a child a certain number of times before giving up and
170
+ exiting itself. The parameters of how this works are controlled by the
171
+ `restart_policy`, which is itself a hash:
172
+
173
+ u.add_child(
174
+ id: :my_restartable_child,
175
+ klass: Child,
176
+ method: :run,
177
+ restart_policy: {
178
+ period: 5,
179
+ retries: 2,
180
+ delay: 1,
181
+ }
182
+ )
183
+
184
+ The meaning of each of the `restart_policy` keys is best explained as part
185
+ of how Ultravisor restarts children.
186
+
187
+ When a child needs to be restarted, Ultravisor first waits a little while
188
+ before attempting the restart. The amount of time to wait is specified
189
+ by the `delay` value in the `restart_policy`. Then a new instance of the
190
+ `class` is instantiated, and the `method` is called on that instance.
191
+
192
+ The `period` and `retries` values of the `restart_policy` come into play
193
+ when the child exits repeatedly. If a single child needs to be restarted
194
+ more than `retries` times in `period` seconds, then instead of trying to
195
+ restart again, Ultravisor gives up. It doesn't try to start the child
196
+ again, it terminates all the *other* children of the Ultravisor, and
197
+ then it exits. Note that the `delay` between restarts is *not* part
198
+ of the `period`; only time spent actually running the child is
199
+ accounted for.
200
+
201
+
202
+ ## Managed Child Termination
203
+
204
+ If children need to be terminated, by default, child threads are simply
205
+ forcibly terminated by calling {Thread#kill} on them. However, for workers
206
+ which hold resources, this can cause problems.
207
+
208
+ Thus, it is possible to control both how a child is terminated, and how long
209
+ to wait for that termination to occur, by using the `shutdown` named argument
210
+ when you add a child (either via {Ultravisor#add_child}, or as part of the
211
+ `children` named argument to {Ultravisor.new}), like this:
212
+
213
+ u.add_child(
214
+ id: :fancy_worker,
215
+ shutdown: {
216
+ method: :gentle_landing,
217
+ timeout: 30
218
+ }
219
+ )
220
+
221
+ When a child with a custom shutdown policy needs to be terminated, the
222
+ method named in the `method` key is called on the instance of `class` that
223
+ represents that child. Once the shutdown has been signalled to the
224
+ worker, up to `timeout` seconds is allowed to elapse. If the child thread has
225
+ not terminated by this time, the thread is forcibly terminated by calling
226
+ {Thread#kill}. This timeout prevents shutdown or group restart from hanging
227
+ indefinitely.
228
+
229
+ Note that the `method` specified in the `shutdown` specification should
230
+ signal the worker to terminate, and then return immediately. It should
231
+ *not* wait for termination itself.
232
+
233
+
234
+ ## Supervision Strategies
235
+
236
+ When a child needs to be restarted, by default only the child that exited
237
+ will be restarted. However, it is possible to cause other
238
+ children to be restarted as well, if that is necessary. To do that, you
239
+ use the `strategy` named parameter when creating the Ultravisor:
240
+
241
+ u = Ultravisor.new(strategy: :one_for_all)
242
+
243
+ The possible values for the strategy are:
244
+
245
+ * `:one_for_one` -- the default restart strategy, this simply causes the
246
+ child which exited to be started again, in line with its restart policy.
247
+
248
+ * `:all_for_one` -- if any child needs to be restarted, all children of the
249
+ Ultravisor get terminated in reverse of their start order, and then all
250
+ children are started again, except those which are `restart: :never`, or
251
+ `restart: :on_failure` which had not already exited without error.
252
+
253
+ * `:rest_for_one` -- if any child needs to be restarted, all children of
254
+ the Ultravisor which are *after* the restarted child get terminated
255
+ in reverse of their start order, and then all children are started again,
256
+ except those which are `restart: :never`, or `restart: :on_failure` which
257
+ had not already exited without error.
258
+
259
+
260
+ ## Interacting With Child Objects
261
+
262
+ Since the Ultravisor is creating the object instances that run in the worker
263
+ threads, you don't automatically have access to the object instance itself.
264
+ This is somewhat by design -- concurrency bugs are hell. However, there *are*
265
+ ways around this, if you need to.
266
+
267
+
268
+ ### The power of cast / call
269
+
270
+ A common approach for interacting with an object in an otherwise concurrent
271
+ environment is the `cast` / `call` pattern. From the outside, the interface
272
+ is quite straightforward:
273
+
274
+ ```
275
+ u = Ultravisor.new(children: [
276
+ { id: :castcall, klass: CastCall, method: :run, enable_castcall: true }
277
+ ])
278
+
279
+ # This will return `nil` immediately
280
+ u[:castcall].cast.some_method
281
+
282
+ # This will, at some point in the future, return whatever `CastCall#to_s` could
283
+ u[:castcall].call.some_method
284
+ ```
285
+
286
+ To enable `cast` / `call` support for a child, you must set the `enable_castcall`
287
+ keyword argument on the child. This is because failing to process `cast`s and
288
+ `call`s can cause all sorts of unpleasant backlogs, so children who intend to
289
+ receive (and process) `cast`s and `call`s must explicitly opt-in.
290
+
291
+ The interface to the object from outside is straightforward. You get a
292
+ reference to the instance of {Ultravisor::Child} for the child you want to talk
293
+ to (which is returned by {Ultravisor#add_child}, or {Ultravisor#[]}), and then
294
+ call `child.cast.<method>` or `child.call.<method>`, passing in arguments as
295
+ per normal. Any public method can be the target of the `cast` or `call`, and you
296
+ can pass in any arguments you like, *including blocks* (although bear in mind that
297
+ any blocks passed will be run in the child instance's thread, and many
298
+ concurrency dragons await the unwary).
299
+
300
+ The difference between the `cast` and `call` methods is in whether or not a
301
+ return value is expected, and hence when the method call chained through
302
+ `cast` or `call` returns.
303
+
304
+ When you call `cast`, the real method call gets queued for later execution,
305
+ and since no return value is expected, the `child.cast.<method>` returns
306
+ `nil` immediately and your code gets on with its day. This is useful
307
+ when you want to tell the worker something, or instruct it to do something,
308
+ but there's no value coming back.
309
+
310
+ In comparison, when you call `call`, the real method call still gets queued,
311
+ but the calling code blocks, waiting for the return value from the queued
312
+ method call. This may seem pointless -- why have concurrency that blocks? --
313
+ but the value comes from the synchronisation. The method call only happens
314
+ when the worker loop calls `process_castcall`, which it can do at a time that
315
+ suits it, and when it knows that nothing else is going on that could cause
316
+ problems.
317
+
318
+ One thing to be aware of when interacting with a worker instance is that it may
319
+ crash, and be restarted by the Ultravisor, before it gets around to processing
320
+ a queued message. If you used `child.cast`, then the method call is just...
321
+ lost, forever. On the other hand, if you used `child.call`, then an
322
+ {Ultravisor::ChildRestartedError} exception will be raised, which you can deal
323
+ with as you see fit.
324
+
325
+ The really interesting part is what happens *inside* the child instance. The
326
+ actual execution of code in response to the method calls passed through `cast`
327
+ and `call` will only happen when the running instance of the child's class
328
+ calls `process_castcall`. When that happens, all pending casts and calls will
329
+ be executed. Since this happens within the same thread as the rest of the
330
+ child instance's code, it's a lot safer than trying to synchronise everything
331
+ with locks.
332
+
333
+ You can, of course, just call `process_castcall` repeatedly, however that's a
334
+ somewhat herp-a-derp way of doing it. The `castcall_fd` method in the running
335
+ instance will return an IO object which will become readable whenever there is
336
+ a pending `cast` or `call` to process. Thus, if you're using `IO.select` or
337
+ similar to wait for work to do, you can add `castcall_fd` to the readable set
338
+ and only call `process_castcall` when the relevant IO object comes back. Don't
339
+ actually try *reading* from it yourself; `process_castcall` takes care of all that.
340
+
341
+ If you happen to have a child class whose *only* purpose is to process `cast`s
342
+ and `call`s, you should configure the Ultravisor to use `process_castcall_loop`
343
+ as its entry method. This is a wrapper method which blocks on `castcall_fd`
344
+ becoming readable, and loops infinitely.
345
+
346
+ It is important to remember that not all concurrency bugs can be prevented by
347
+ using `cast` / `call`. For example, read-modify-write operations will still
348
+ cause all the same problems they always do, so if you find yourself calling
349
+ `child.call`, modifying the value returned, and then calling `child.cast`
350
+ with that modified value, you're in for a bad time.
351
+
352
+
353
+ ### Direct (Unsafe) Instance Access
354
+
355
+ If you have a worker class which you're *really* sure is safe against concurrent
356
+ access, you can eschew the convenience and safety of `cast` / `call`, and instead
357
+ allow direct access to the worker instance object.
358
+
359
+ To do this, specify `access: :unsafe` in the child specification, and then
360
+ call `child.unsafe_instance` to get the instance object currently in play.
361
+
362
+ Yes, the multiple mentions of `unsafe` are there deliberately, and no, I won't
363
+ be removing them. They're there to remind you, always, that what you're doing
364
+ is unsafe.
365
+
366
+ If the child is restarting at the time `child.unsafe_instance` is called,
367
+ the call will block until the child worker is started again, after which
368
+ you'll get the newly created worker instance object. The worker could crash
369
+ again at any time, of course, leaving you with a now out-of-date object
370
+ that is no longer being actively run. It's up to you to figure out how to
371
+ deal with that. If the Ultravisor associated with the child
372
+ has terminated, your call to `child.unsafe_instance` will raise an
373
+ {Ultravisor::ChildRestartedError}.
374
+
375
+ Why yes, Gracie, there *are* a lot of things that can go wrong when using
376
+ direct instance object access. Still wondering why those `unsafe`s are in
377
+ the name?
378
+
379
+
380
+ ## Supervision Trees
381
+
382
+ Whilst a collection of workers is a neat thing to have, more powerful systems
383
+ can be constructed if supervisors can, themselves, be supervised. Primarily
384
+ this is useful when recovering from persistent errors, because you can use
385
+ a higher-level supervisor to restart an entire tree of workers which has one
386
+ which is having problems.
387
+
388
+ Creating a supervision tree is straightforward. Because Ultravisor works by
389
+ instantiating plain old ruby objects, and Ultravisor is, itself, a plain old
390
+ ruby class, you use it more-or-less like you would any other object:
391
+
392
+ u = Ultravisor.new
393
+ u.add_child(id: :sub_sup, klass: Ultravisor, method: :run, args: [children: [...]])
394
+
395
+ That's all there is to it. Whenever the parent Ultravisor wants to work on the
396
+ child Ultravisor, it treats it like any other child, asking it to terminate,
397
+ start, etc, and the child Ultravisor's work consists of terminating, starting,
398
+ etc all of its children.
399
+
400
+ The only difference in default behaviour between a regular worker child and an
401
+ Ultravisor child is that an Ultravisor's `shutdown` policy is automatically set
402
+ to `method: :stop!, timeout: :infinity`. This is because it is *very* bad news
403
+ to forcibly terminate an Ultravisor before its children have stopped -- all
404
+ those children just get cast into the VM, never to be heard from again.