trident 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -1
- data/CHANGELOG +5 -0
- data/Gemfile +1 -0
- data/README.md +10 -1
- data/lib/trident.rb +1 -0
- data/lib/trident/cli.rb +2 -2
- data/lib/trident/pool.rb +120 -27
- data/lib/trident/pool_handler.rb +1 -2
- data/lib/trident/pool_manager.rb +2 -2
- data/lib/trident/version.rb +1 -1
- data/lib/trident/worker.rb +33 -0
- data/test/fixtures/integration_project/config/trident.yml +5 -0
- data/test/integration/trident_test.rb +0 -6
- data/test/test_helper.rb +15 -46
- data/test/unit/trident/cli_test.rb +1 -1
- data/test/unit/trident/pool_manager_test.rb +2 -12
- data/test/unit/trident/pool_test.rb +289 -48
- data/test/unit/trident/worker_test.rb +52 -0
- data/trident.example.yml +4 -0
- data/trident.gemspec +1 -1
- metadata +27 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1f99b2b31837fe6cd9fee43af02ebaaae7afe1f
|
4
|
+
data.tar.gz: 87df8e84d60b0162c5575eb6b2235373e5a03e2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f7053abe1c7276399377d9cbf8641dceb246202f216cb21d92f93e3f04d1950927b73c712126c690fbb17e0553b8db5737fa900d201fe8246e5a8441163344b
|
7
|
+
data.tar.gz: b0a2226f1931c27c736f14394cdf673f678fc4dd29566a6cad91e56e6ee15bf949e26a4049b8cb53a84fd93f889fd6d2bd90fd4c0bc7db7580e295ac15913077
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -31,9 +31,18 @@ See other command line options with
|
|
31
31
|
|
32
32
|
trident --help
|
33
33
|
|
34
|
+
## Orphaned workers
|
35
|
+
The ability to track pool processes across a restart - allows a restart to spin up new processes as old ones die off gracefully.
|
36
|
+
|
37
|
+
Limitations - It will treat any process that has the same pid from a previous pool as part of
|
38
|
+
the orphaned processes if the process can be signalled from the pool process. To get around this
|
39
|
+
you run the pool as a different user, which will prevent the pool from being able to signal the
|
40
|
+
process.
|
41
|
+
|
34
42
|
|
35
43
|
## TODO
|
36
44
|
|
37
45
|
* Add support for reloading the trident config with a HUP signal
|
38
46
|
* Add support in yml for specifying [process limits](http://www.ruby-doc.org/core-1.9.3/Process.html#method-c-setrlimit) (memory especially)
|
39
|
-
* Add
|
47
|
+
* Add support for killing off orphans/processes that have been running for an excessively (configurable) long time.
|
48
|
+
|
data/lib/trident.rb
CHANGED
data/lib/trident/cli.rb
CHANGED
@@ -123,10 +123,10 @@ module Trident
|
|
123
123
|
|
124
124
|
next if pool_filter.size > 0 && ! pool_filter.include?(name)
|
125
125
|
|
126
|
-
pool = Trident::Pool.new(name, handler, pool_config
|
126
|
+
pool = Trident::Pool.new(name, handler, pool_config)
|
127
127
|
pools[name] = pool
|
128
128
|
end
|
129
129
|
pools
|
130
130
|
end
|
131
131
|
end
|
132
|
-
end
|
132
|
+
end
|
data/lib/trident/pool.rb
CHANGED
@@ -3,14 +3,35 @@ module Trident
|
|
3
3
|
include GemLogger::LoggerSupport
|
4
4
|
include Trident::Utils
|
5
5
|
|
6
|
-
attr_reader :name, :handler, :size, :options, :workers
|
6
|
+
attr_reader :name, :handler, :size, :options, :workers, :orphans, :orphans_dir
|
7
7
|
|
8
|
-
def initialize(name, handler,
|
8
|
+
def initialize(name, handler, options={})
|
9
9
|
@name = name
|
10
10
|
@handler = handler
|
11
|
-
@size = size
|
11
|
+
@size = options.delete('size') || 2
|
12
12
|
@options = options || {}
|
13
13
|
@workers = Set.new
|
14
|
+
@orphans_dir = options.delete('pids_dir') || File.join(Dir.pwd, 'trident-pools', name, 'pids')
|
15
|
+
@orphans = load_orphans(orphans_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
def load_orphans(path_to_orphans_dir)
|
19
|
+
unless File.exists?(path_to_orphans_dir)
|
20
|
+
FileUtils.mkdir_p(path_to_orphans_dir)
|
21
|
+
end
|
22
|
+
|
23
|
+
orphans = Set.new
|
24
|
+
|
25
|
+
Dir.foreach(path_to_orphans_dir) do |file|
|
26
|
+
path = File.join(path_to_orphans_dir, file)
|
27
|
+
next if File.directory?(path)
|
28
|
+
|
29
|
+
pid = Integer(IO.read(path))
|
30
|
+
orphan_worker = Worker.new(pid, self)
|
31
|
+
orphans << orphan_worker
|
32
|
+
end
|
33
|
+
|
34
|
+
orphans
|
14
35
|
end
|
15
36
|
|
16
37
|
def start
|
@@ -38,35 +59,97 @@ module Trident
|
|
38
59
|
logger.info "<pool-#{name}> Pool up to date"
|
39
60
|
end
|
40
61
|
|
62
|
+
# @return [Boolean] true iff total_workers_count > size.
|
63
|
+
# false otherwise
|
64
|
+
def above_threshold?
|
65
|
+
size < total_workers_count
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Boolean] true iff total_workers_count == size.
|
69
|
+
# false otherwise
|
70
|
+
def at_threshold?
|
71
|
+
size == total_workers_count
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [Boolean] true iff workers.size > 0.
|
75
|
+
# false otherwise
|
76
|
+
def has_workers?
|
77
|
+
workers.size > 0
|
78
|
+
end
|
79
|
+
|
80
|
+
# @return [Integer] total number of workers including orphaned
|
81
|
+
# workers.
|
82
|
+
def total_workers_count
|
83
|
+
workers.size + orphans.size
|
84
|
+
end
|
85
|
+
|
41
86
|
private
|
42
87
|
|
43
88
|
def maintain_worker_count(kill_action)
|
89
|
+
cleanup_orphaned_workers
|
44
90
|
cleanup_dead_workers(false)
|
45
91
|
|
46
|
-
if
|
47
|
-
|
48
|
-
|
49
|
-
|
92
|
+
if at_threshold?
|
93
|
+
logger.debug "<pool-#{name}> Worker count is correct."
|
94
|
+
# If we are above the threshold and we have workers
|
95
|
+
# then reduce the number of workers.
|
96
|
+
elsif above_threshold? && has_workers?
|
97
|
+
overthreshold = total_workers_count - size
|
98
|
+
workers_to_kill = [overthreshold, workers.size].min
|
99
|
+
|
100
|
+
logger.info("<pool-#{name}> Total workers #{workers.size} above threshold #{size} killing #{workers_to_kill}.")
|
101
|
+
kill_workers(workers_to_kill, kill_action)
|
102
|
+
# If we are above the threshold, and no workers
|
103
|
+
# then we can't do anything, but lets log out a
|
104
|
+
# message indicating this state.
|
105
|
+
elsif above_threshold?
|
106
|
+
logger.info("<pool-#{name}> Waiting on orphans before spawning workers.")
|
107
|
+
# If the sum of both the workers and orphan workers is under our
|
108
|
+
# size requirement let's spawn the number of workers required to
|
109
|
+
# reach that size.
|
50
110
|
else
|
51
|
-
logger.
|
111
|
+
logger.info("<pool-#{name}> Orphans #{orphans.size}, Workers #{workers.size}")
|
112
|
+
spawn_workers(size - total_workers_count)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Remove orphan workers which are either not running
|
117
|
+
# or which we don't have permission to signal (thereby telling us they
|
118
|
+
# where never a part of the pool)
|
119
|
+
def cleanup_orphaned_workers
|
120
|
+
orphans.clone.each do |worker|
|
121
|
+
begin
|
122
|
+
# Check if the process is running
|
123
|
+
Process.kill(0, worker.pid)
|
124
|
+
rescue Errno::EPERM, Errno::ESRCH => e
|
125
|
+
# If we get EPERM (Permission error) or ESRCH (No process with that pid)
|
126
|
+
# stop tracking that worker
|
127
|
+
logger.info("<pool-#{name}> Cleaning up orphaned worker #{worker.pid} because #{e.class.name}:#{e.message})")
|
128
|
+
orphans.delete(worker)
|
129
|
+
worker.destroy
|
130
|
+
rescue => e
|
131
|
+
# Make sure we catch any unexpected errors when signaling the process.
|
132
|
+
logger.error("<pool-#{name}> failed cleaning up worker #{worker.pid} because #{e.class.name}:#{e.message})")
|
133
|
+
end
|
52
134
|
end
|
53
135
|
end
|
54
136
|
|
55
137
|
def cleanup_dead_workers(blocking=true)
|
56
138
|
wait_flags = blocking ? 0 : Process::WNOHANG
|
57
|
-
workers.clone.each do |
|
139
|
+
workers.clone.each do |worker|
|
58
140
|
begin
|
59
|
-
|
141
|
+
if Process.wait(worker.pid, wait_flags)
|
142
|
+
workers.delete(worker)
|
143
|
+
end
|
60
144
|
rescue Errno::EINTR
|
61
145
|
logger.warn("<pool-#{name}> Interrupted cleaning up workers, retrying")
|
62
146
|
retry
|
63
147
|
rescue Errno::ECHILD
|
64
148
|
logger.warn("<pool-#{name}> Error cleaning up workers, ignoring")
|
65
|
-
# Calling
|
66
|
-
# a
|
67
|
-
|
149
|
+
# Calling Process.wait on a pid that was already waited on throws
|
150
|
+
# a ECHILD, so may as well remove it from our list of workers
|
151
|
+
workers.delete(worker)
|
68
152
|
end
|
69
|
-
workers.delete(wpid) if wpid
|
70
153
|
end
|
71
154
|
end
|
72
155
|
|
@@ -79,30 +162,40 @@ module Trident
|
|
79
162
|
|
80
163
|
def kill_workers(count, action)
|
81
164
|
logger.info "<pool-#{name}> Killing #{count} workers with #{action}"
|
82
|
-
workers.to_a[-count, count].each do |
|
83
|
-
kill_worker(
|
165
|
+
workers.to_a[-count, count].each do |worker|
|
166
|
+
kill_worker(worker, action)
|
84
167
|
end
|
85
168
|
end
|
86
169
|
|
87
170
|
def spawn_worker
|
88
171
|
pid = fork do
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
172
|
+
begin
|
173
|
+
procline "pool-#{name}-worker", "starting handler #{handler.name}"
|
174
|
+
Trident::SignalHandler.reset_for_fork
|
175
|
+
handler.load
|
176
|
+
handler.start(options)
|
177
|
+
ensure
|
178
|
+
worker = Worker.new(Process.pid, self)
|
179
|
+
worker.destroy
|
180
|
+
end
|
93
181
|
end
|
94
|
-
|
182
|
+
|
183
|
+
worker = Worker.new(pid, self)
|
184
|
+
worker.save
|
185
|
+
|
186
|
+
workers << worker
|
95
187
|
logger.info "<pool-#{name}> Spawned worker #{pid}, worker count now at #{workers.size}"
|
96
188
|
end
|
97
189
|
|
98
|
-
def kill_worker(
|
190
|
+
def kill_worker(worker, action)
|
99
191
|
sig = handler.signal_for(action)
|
100
192
|
raise "<pool-#{name}> No signal for action: #{action}" unless sig
|
101
|
-
logger.info "<pool-#{name}> Sending signal to worker: #{pid}/#{sig}/#{action}"
|
102
|
-
Process.kill(sig, pid)
|
103
|
-
|
104
|
-
|
105
|
-
end
|
193
|
+
logger.info "<pool-#{name}> Sending signal to worker: #{worker.pid}/#{sig}/#{action}"
|
194
|
+
Process.kill(sig, worker.pid)
|
195
|
+
|
196
|
+
workers.delete(worker)
|
106
197
|
|
198
|
+
logger.info "<pool-#{name}> Killed worker #{worker.pid}, worker count now at #{workers.size}"
|
199
|
+
end
|
107
200
|
end
|
108
201
|
end
|
data/lib/trident/pool_handler.rb
CHANGED
data/lib/trident/pool_manager.rb
CHANGED
@@ -50,7 +50,7 @@ module Trident
|
|
50
50
|
private
|
51
51
|
|
52
52
|
def procline_display
|
53
|
-
pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.
|
53
|
+
pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.collect(&:pid)}" }.join(" ")
|
54
54
|
end
|
55
55
|
|
56
56
|
def load_handlers
|
@@ -71,4 +71,4 @@ module Trident
|
|
71
71
|
end
|
72
72
|
|
73
73
|
end
|
74
|
-
end
|
74
|
+
end
|
data/lib/trident/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
module Trident
|
2
|
+
# @param [Integer] pid - pid of the worker process
|
3
|
+
# @param [Trident::Pool] pool - pool managing the worker process.
|
4
|
+
class Worker < Struct.new(:pid, :pool)
|
5
|
+
# Crate a pidfile for this worker so that
|
6
|
+
# we may track it
|
7
|
+
def save
|
8
|
+
File.open(path, 'w') do |f|
|
9
|
+
f << "#{pid}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Remove the pidfile associated with this
|
14
|
+
# worker
|
15
|
+
def destroy
|
16
|
+
FileUtils.rm path if File.exists?(path)
|
17
|
+
end
|
18
|
+
|
19
|
+
# We determine the time that this worker was
|
20
|
+
# created from the creation timestamp on its
|
21
|
+
# pidfile
|
22
|
+
def created_at
|
23
|
+
@created_at ||= File.stat(path).ctime
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
# Path to this worker's pid file
|
29
|
+
def path
|
30
|
+
File.join(pool.orphans_dir, "#{pid}.pid")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -40,6 +40,9 @@ pools:
|
|
40
40
|
# options passed to each handler's initializer (merged into handler options above)
|
41
41
|
options:
|
42
42
|
name: one
|
43
|
+
# directory for storing child pids
|
44
|
+
pids_dir: '/tmp/mypool1'
|
45
|
+
|
43
46
|
mypool2:
|
44
47
|
# number of worker processes
|
45
48
|
size: 2
|
@@ -48,4 +51,6 @@ pools:
|
|
48
51
|
# options passed to each handler's initializer (merged into handler options above)
|
49
52
|
options:
|
50
53
|
name: two
|
54
|
+
# directory for storing child pids
|
55
|
+
pids_dir: '/tmp/mypool2'
|
51
56
|
|
@@ -1,13 +1,11 @@
|
|
1
1
|
require_relative '../test_helper'
|
2
2
|
|
3
3
|
class Trident::TridentTest < MiniTest::Should::TestCase
|
4
|
-
|
5
4
|
setup do
|
6
5
|
@project_root = File.expand_path('../../fixtures/integration_project', __FILE__)
|
7
6
|
@cli = "#{File.expand_path('../../..', __FILE__)}/bin/trident"
|
8
7
|
end
|
9
8
|
|
10
|
-
|
11
9
|
def parse_manager(manager_str)
|
12
10
|
pools = {}
|
13
11
|
manager_str.scan(/(\w+)\[([0-9, ]+)\]/) do |pool, pids|
|
@@ -18,7 +16,6 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
18
16
|
end
|
19
17
|
|
20
18
|
context "basic usage" do
|
21
|
-
|
22
19
|
should "start and stop pools" do
|
23
20
|
cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
|
24
21
|
io = IO.popen(cmd, :err=>[:child, :out])
|
@@ -46,11 +43,9 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
46
43
|
Process.wait(io.pid)
|
47
44
|
assert_empty child_processes
|
48
45
|
end
|
49
|
-
|
50
46
|
end
|
51
47
|
|
52
48
|
context "worker maintenance" do
|
53
|
-
|
54
49
|
should "restart failed workers" do
|
55
50
|
cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
|
56
51
|
io = IO.popen(cmd, :err=>[:child, :out])
|
@@ -78,6 +73,5 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
78
73
|
Process.wait(io.pid)
|
79
74
|
assert_empty child_processes
|
80
75
|
end
|
81
|
-
|
82
76
|
end
|
83
77
|
end
|
data/test/test_helper.rb
CHANGED
@@ -15,7 +15,6 @@ rescue Bundler::BundlerError => e
|
|
15
15
|
end
|
16
16
|
|
17
17
|
require 'minitest/autorun'
|
18
|
-
require 'minitest/should'
|
19
18
|
require "minitest/reporters"
|
20
19
|
require "mocha/setup"
|
21
20
|
require 'timeout'
|
@@ -135,53 +134,23 @@ def kill_all_child_processes
|
|
135
134
|
Process.waitall
|
136
135
|
end
|
137
136
|
|
138
|
-
|
139
|
-
|
137
|
+
module Minitest::Should
|
138
|
+
class TestCase < MiniTest::Spec
|
140
139
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# Allow triggering single tests when running from rubymine
|
148
|
-
# reopen the installed runner so we don't step on runner customizations
|
149
|
-
class << MiniTest::Unit.runner
|
150
|
-
# Rubymine sends --name=/\Atest\: <context> should <should>\./
|
151
|
-
# Minitest runs each context as a suite
|
152
|
-
# Minitest filters methods by matching against: <suite>#test_0001_<should>
|
153
|
-
# Nested contexts are separted by spaces in rubymine, but ::s in minitest
|
154
|
-
|
155
|
-
def _run_suites(suites, type)
|
156
|
-
if options[:filter]
|
157
|
-
if options[:filter] =~ /\/\\Atest\\: (.*) should (.*)\\\.\//
|
158
|
-
context_filter = $1
|
159
|
-
should_filter = $2
|
160
|
-
should_filter.strip!
|
161
|
-
should_filter.gsub!(" ", "_")
|
162
|
-
should_filter.gsub!(/\W/, "")
|
163
|
-
context_filter = context_filter.gsub(" ", "((::)| )")
|
164
|
-
options[:filter] = "/\\A#{context_filter}(Test)?#test(_\\d+)?_should_#{should_filter}\\Z/"
|
165
|
-
end
|
140
|
+
# make minitest spec dsl similar to shoulda
|
141
|
+
class << self
|
142
|
+
alias :setup :before
|
143
|
+
alias :teardown :after
|
144
|
+
alias :context :describe
|
145
|
+
alias :should :it
|
166
146
|
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
filter = options[:filter] || '/./'
|
175
|
-
filter = Regexp.new $1 if filter =~ /\/(.*)\//
|
176
|
-
all_test_methods = suite.send "#{type}_methods"
|
177
|
-
filtered_test_methods = all_test_methods.find_all { |m|
|
178
|
-
filter === m || filter === "#{suite}##{m}"
|
179
|
-
}
|
180
|
-
|
181
|
-
if filtered_test_methods.size > 0
|
182
|
-
super
|
183
|
-
else
|
184
|
-
[0, 0]
|
147
|
+
|
148
|
+
ORIGINAL_PROCLINE = $0
|
149
|
+
|
150
|
+
setup do
|
151
|
+
$0 = ORIGINAL_PROCLINE
|
152
|
+
kill_all_child_processes
|
185
153
|
end
|
186
154
|
end
|
187
155
|
end
|
156
|
+
|