trident 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +7 -1
- data/CHANGELOG +5 -0
- data/Gemfile +1 -0
- data/README.md +10 -1
- data/lib/trident.rb +1 -0
- data/lib/trident/cli.rb +2 -2
- data/lib/trident/pool.rb +120 -27
- data/lib/trident/pool_handler.rb +1 -2
- data/lib/trident/pool_manager.rb +2 -2
- data/lib/trident/version.rb +1 -1
- data/lib/trident/worker.rb +33 -0
- data/test/fixtures/integration_project/config/trident.yml +5 -0
- data/test/integration/trident_test.rb +0 -6
- data/test/test_helper.rb +15 -46
- data/test/unit/trident/cli_test.rb +1 -1
- data/test/unit/trident/pool_manager_test.rb +2 -12
- data/test/unit/trident/pool_test.rb +289 -48
- data/test/unit/trident/worker_test.rb +52 -0
- data/trident.example.yml +4 -0
- data/trident.gemspec +1 -1
- metadata +27 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1f99b2b31837fe6cd9fee43af02ebaaae7afe1f
|
4
|
+
data.tar.gz: 87df8e84d60b0162c5575eb6b2235373e5a03e2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f7053abe1c7276399377d9cbf8641dceb246202f216cb21d92f93e3f04d1950927b73c712126c690fbb17e0553b8db5737fa900d201fe8246e5a8441163344b
|
7
|
+
data.tar.gz: b0a2226f1931c27c736f14394cdf673f678fc4dd29566a6cad91e56e6ee15bf949e26a4049b8cb53a84fd93f889fd6d2bd90fd4c0bc7db7580e295ac15913077
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -31,9 +31,18 @@ See other command line options with
|
|
31
31
|
|
32
32
|
trident --help
|
33
33
|
|
34
|
+
## Orphaned workers
|
35
|
+
The ability to track pool processes across a restart - allows a restart to spin up new processes as old ones die off gracefully.
|
36
|
+
|
37
|
+
Limitations - It will treat any process that has the same pid from a previous pool as part of
|
38
|
+
the orphaned processes if the process can be signalled from the pool process. To get around this
|
39
|
+
you run the pool as a different user, which will prevent the pool from being able to signal the
|
40
|
+
process.
|
41
|
+
|
34
42
|
|
35
43
|
## TODO
|
36
44
|
|
37
45
|
* Add support for reloading the trident config with a HUP signal
|
38
46
|
* Add support in yml for specifying [process limits](http://www.ruby-doc.org/core-1.9.3/Process.html#method-c-setrlimit) (memory especially)
|
39
|
-
* Add
|
47
|
+
* Add support for killing off orphans/processes that have been running for an excessively (configurable) long time.
|
48
|
+
|
data/lib/trident.rb
CHANGED
data/lib/trident/cli.rb
CHANGED
@@ -123,10 +123,10 @@ module Trident
|
|
123
123
|
|
124
124
|
next if pool_filter.size > 0 && ! pool_filter.include?(name)
|
125
125
|
|
126
|
-
pool = Trident::Pool.new(name, handler, pool_config
|
126
|
+
pool = Trident::Pool.new(name, handler, pool_config)
|
127
127
|
pools[name] = pool
|
128
128
|
end
|
129
129
|
pools
|
130
130
|
end
|
131
131
|
end
|
132
|
-
end
|
132
|
+
end
|
data/lib/trident/pool.rb
CHANGED
@@ -3,14 +3,35 @@ module Trident
|
|
3
3
|
include GemLogger::LoggerSupport
|
4
4
|
include Trident::Utils
|
5
5
|
|
6
|
-
attr_reader :name, :handler, :size, :options, :workers
|
6
|
+
attr_reader :name, :handler, :size, :options, :workers, :orphans, :orphans_dir
|
7
7
|
|
8
|
-
def initialize(name, handler,
|
8
|
+
def initialize(name, handler, options={})
|
9
9
|
@name = name
|
10
10
|
@handler = handler
|
11
|
-
@size = size
|
11
|
+
@size = options.delete('size') || 2
|
12
12
|
@options = options || {}
|
13
13
|
@workers = Set.new
|
14
|
+
@orphans_dir = options.delete('pids_dir') || File.join(Dir.pwd, 'trident-pools', name, 'pids')
|
15
|
+
@orphans = load_orphans(orphans_dir)
|
16
|
+
end
|
17
|
+
|
18
|
+
def load_orphans(path_to_orphans_dir)
|
19
|
+
unless File.exists?(path_to_orphans_dir)
|
20
|
+
FileUtils.mkdir_p(path_to_orphans_dir)
|
21
|
+
end
|
22
|
+
|
23
|
+
orphans = Set.new
|
24
|
+
|
25
|
+
Dir.foreach(path_to_orphans_dir) do |file|
|
26
|
+
path = File.join(path_to_orphans_dir, file)
|
27
|
+
next if File.directory?(path)
|
28
|
+
|
29
|
+
pid = Integer(IO.read(path))
|
30
|
+
orphan_worker = Worker.new(pid, self)
|
31
|
+
orphans << orphan_worker
|
32
|
+
end
|
33
|
+
|
34
|
+
orphans
|
14
35
|
end
|
15
36
|
|
16
37
|
def start
|
@@ -38,35 +59,97 @@ module Trident
|
|
38
59
|
logger.info "<pool-#{name}> Pool up to date"
|
39
60
|
end
|
40
61
|
|
62
|
+
# @return [Boolean] true iff total_workers_count > size.
|
63
|
+
# false otherwise
|
64
|
+
def above_threshold?
|
65
|
+
size < total_workers_count
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Boolean] true iff total_workers_count == size.
|
69
|
+
# false otherwise
|
70
|
+
def at_threshold?
|
71
|
+
size == total_workers_count
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [Boolean] true iff workers.size > 0.
|
75
|
+
# false otherwise
|
76
|
+
def has_workers?
|
77
|
+
workers.size > 0
|
78
|
+
end
|
79
|
+
|
80
|
+
# @return [Integer] total number of workers including orphaned
|
81
|
+
# workers.
|
82
|
+
def total_workers_count
|
83
|
+
workers.size + orphans.size
|
84
|
+
end
|
85
|
+
|
41
86
|
private
|
42
87
|
|
43
88
|
def maintain_worker_count(kill_action)
|
89
|
+
cleanup_orphaned_workers
|
44
90
|
cleanup_dead_workers(false)
|
45
91
|
|
46
|
-
if
|
47
|
-
|
48
|
-
|
49
|
-
|
92
|
+
if at_threshold?
|
93
|
+
logger.debug "<pool-#{name}> Worker count is correct."
|
94
|
+
# If we are above the threshold and we have workers
|
95
|
+
# then reduce the number of workers.
|
96
|
+
elsif above_threshold? && has_workers?
|
97
|
+
overthreshold = total_workers_count - size
|
98
|
+
workers_to_kill = [overthreshold, workers.size].min
|
99
|
+
|
100
|
+
logger.info("<pool-#{name}> Total workers #{workers.size} above threshold #{size} killing #{workers_to_kill}.")
|
101
|
+
kill_workers(workers_to_kill, kill_action)
|
102
|
+
# If we are above the threshold, and no workers
|
103
|
+
# then we can't do anything, but lets log out a
|
104
|
+
# message indicating this state.
|
105
|
+
elsif above_threshold?
|
106
|
+
logger.info("<pool-#{name}> Waiting on orphans before spawning workers.")
|
107
|
+
# If the sum of both the workers and orphan workers is under our
|
108
|
+
# size requirement let's spawn the number of workers required to
|
109
|
+
# reach that size.
|
50
110
|
else
|
51
|
-
logger.
|
111
|
+
logger.info("<pool-#{name}> Orphans #{orphans.size}, Workers #{workers.size}")
|
112
|
+
spawn_workers(size - total_workers_count)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Remove orphan workers which are either not running
|
117
|
+
# or which we don't have permission to signal (thereby telling us they
|
118
|
+
# where never a part of the pool)
|
119
|
+
def cleanup_orphaned_workers
|
120
|
+
orphans.clone.each do |worker|
|
121
|
+
begin
|
122
|
+
# Check if the process is running
|
123
|
+
Process.kill(0, worker.pid)
|
124
|
+
rescue Errno::EPERM, Errno::ESRCH => e
|
125
|
+
# If we get EPERM (Permission error) or ESRCH (No process with that pid)
|
126
|
+
# stop tracking that worker
|
127
|
+
logger.info("<pool-#{name}> Cleaning up orphaned worker #{worker.pid} because #{e.class.name}:#{e.message})")
|
128
|
+
orphans.delete(worker)
|
129
|
+
worker.destroy
|
130
|
+
rescue => e
|
131
|
+
# Make sure we catch any unexpected errors when signaling the process.
|
132
|
+
logger.error("<pool-#{name}> failed cleaning up worker #{worker.pid} because #{e.class.name}:#{e.message})")
|
133
|
+
end
|
52
134
|
end
|
53
135
|
end
|
54
136
|
|
55
137
|
def cleanup_dead_workers(blocking=true)
|
56
138
|
wait_flags = blocking ? 0 : Process::WNOHANG
|
57
|
-
workers.clone.each do |
|
139
|
+
workers.clone.each do |worker|
|
58
140
|
begin
|
59
|
-
|
141
|
+
if Process.wait(worker.pid, wait_flags)
|
142
|
+
workers.delete(worker)
|
143
|
+
end
|
60
144
|
rescue Errno::EINTR
|
61
145
|
logger.warn("<pool-#{name}> Interrupted cleaning up workers, retrying")
|
62
146
|
retry
|
63
147
|
rescue Errno::ECHILD
|
64
148
|
logger.warn("<pool-#{name}> Error cleaning up workers, ignoring")
|
65
|
-
# Calling
|
66
|
-
# a
|
67
|
-
|
149
|
+
# Calling Process.wait on a pid that was already waited on throws
|
150
|
+
# a ECHILD, so may as well remove it from our list of workers
|
151
|
+
workers.delete(worker)
|
68
152
|
end
|
69
|
-
workers.delete(wpid) if wpid
|
70
153
|
end
|
71
154
|
end
|
72
155
|
|
@@ -79,30 +162,40 @@ module Trident
|
|
79
162
|
|
80
163
|
def kill_workers(count, action)
|
81
164
|
logger.info "<pool-#{name}> Killing #{count} workers with #{action}"
|
82
|
-
workers.to_a[-count, count].each do |
|
83
|
-
kill_worker(
|
165
|
+
workers.to_a[-count, count].each do |worker|
|
166
|
+
kill_worker(worker, action)
|
84
167
|
end
|
85
168
|
end
|
86
169
|
|
87
170
|
def spawn_worker
|
88
171
|
pid = fork do
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
172
|
+
begin
|
173
|
+
procline "pool-#{name}-worker", "starting handler #{handler.name}"
|
174
|
+
Trident::SignalHandler.reset_for_fork
|
175
|
+
handler.load
|
176
|
+
handler.start(options)
|
177
|
+
ensure
|
178
|
+
worker = Worker.new(Process.pid, self)
|
179
|
+
worker.destroy
|
180
|
+
end
|
93
181
|
end
|
94
|
-
|
182
|
+
|
183
|
+
worker = Worker.new(pid, self)
|
184
|
+
worker.save
|
185
|
+
|
186
|
+
workers << worker
|
95
187
|
logger.info "<pool-#{name}> Spawned worker #{pid}, worker count now at #{workers.size}"
|
96
188
|
end
|
97
189
|
|
98
|
-
def kill_worker(
|
190
|
+
def kill_worker(worker, action)
|
99
191
|
sig = handler.signal_for(action)
|
100
192
|
raise "<pool-#{name}> No signal for action: #{action}" unless sig
|
101
|
-
logger.info "<pool-#{name}> Sending signal to worker: #{pid}/#{sig}/#{action}"
|
102
|
-
Process.kill(sig, pid)
|
103
|
-
|
104
|
-
|
105
|
-
end
|
193
|
+
logger.info "<pool-#{name}> Sending signal to worker: #{worker.pid}/#{sig}/#{action}"
|
194
|
+
Process.kill(sig, worker.pid)
|
195
|
+
|
196
|
+
workers.delete(worker)
|
106
197
|
|
198
|
+
logger.info "<pool-#{name}> Killed worker #{worker.pid}, worker count now at #{workers.size}"
|
199
|
+
end
|
107
200
|
end
|
108
201
|
end
|
data/lib/trident/pool_handler.rb
CHANGED
data/lib/trident/pool_manager.rb
CHANGED
@@ -50,7 +50,7 @@ module Trident
|
|
50
50
|
private
|
51
51
|
|
52
52
|
def procline_display
|
53
|
-
pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.
|
53
|
+
pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.collect(&:pid)}" }.join(" ")
|
54
54
|
end
|
55
55
|
|
56
56
|
def load_handlers
|
@@ -71,4 +71,4 @@ module Trident
|
|
71
71
|
end
|
72
72
|
|
73
73
|
end
|
74
|
-
end
|
74
|
+
end
|
data/lib/trident/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
module Trident
|
2
|
+
# @param [Integer] pid - pid of the worker process
|
3
|
+
# @param [Trident::Pool] pool - pool managing the worker process.
|
4
|
+
class Worker < Struct.new(:pid, :pool)
|
5
|
+
# Crate a pidfile for this worker so that
|
6
|
+
# we may track it
|
7
|
+
def save
|
8
|
+
File.open(path, 'w') do |f|
|
9
|
+
f << "#{pid}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Remove the pidfile associated with this
|
14
|
+
# worker
|
15
|
+
def destroy
|
16
|
+
FileUtils.rm path if File.exists?(path)
|
17
|
+
end
|
18
|
+
|
19
|
+
# We determine the time that this worker was
|
20
|
+
# created from the creation timestamp on its
|
21
|
+
# pidfile
|
22
|
+
def created_at
|
23
|
+
@created_at ||= File.stat(path).ctime
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
# Path to this worker's pid file
|
29
|
+
def path
|
30
|
+
File.join(pool.orphans_dir, "#{pid}.pid")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -40,6 +40,9 @@ pools:
|
|
40
40
|
# options passed to each handler's initializer (merged into handler options above)
|
41
41
|
options:
|
42
42
|
name: one
|
43
|
+
# directory for storing child pids
|
44
|
+
pids_dir: '/tmp/mypool1'
|
45
|
+
|
43
46
|
mypool2:
|
44
47
|
# number of worker processes
|
45
48
|
size: 2
|
@@ -48,4 +51,6 @@ pools:
|
|
48
51
|
# options passed to each handler's initializer (merged into handler options above)
|
49
52
|
options:
|
50
53
|
name: two
|
54
|
+
# directory for storing child pids
|
55
|
+
pids_dir: '/tmp/mypool2'
|
51
56
|
|
@@ -1,13 +1,11 @@
|
|
1
1
|
require_relative '../test_helper'
|
2
2
|
|
3
3
|
class Trident::TridentTest < MiniTest::Should::TestCase
|
4
|
-
|
5
4
|
setup do
|
6
5
|
@project_root = File.expand_path('../../fixtures/integration_project', __FILE__)
|
7
6
|
@cli = "#{File.expand_path('../../..', __FILE__)}/bin/trident"
|
8
7
|
end
|
9
8
|
|
10
|
-
|
11
9
|
def parse_manager(manager_str)
|
12
10
|
pools = {}
|
13
11
|
manager_str.scan(/(\w+)\[([0-9, ]+)\]/) do |pool, pids|
|
@@ -18,7 +16,6 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
18
16
|
end
|
19
17
|
|
20
18
|
context "basic usage" do
|
21
|
-
|
22
19
|
should "start and stop pools" do
|
23
20
|
cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
|
24
21
|
io = IO.popen(cmd, :err=>[:child, :out])
|
@@ -46,11 +43,9 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
46
43
|
Process.wait(io.pid)
|
47
44
|
assert_empty child_processes
|
48
45
|
end
|
49
|
-
|
50
46
|
end
|
51
47
|
|
52
48
|
context "worker maintenance" do
|
53
|
-
|
54
49
|
should "restart failed workers" do
|
55
50
|
cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
|
56
51
|
io = IO.popen(cmd, :err=>[:child, :out])
|
@@ -78,6 +73,5 @@ class Trident::TridentTest < MiniTest::Should::TestCase
|
|
78
73
|
Process.wait(io.pid)
|
79
74
|
assert_empty child_processes
|
80
75
|
end
|
81
|
-
|
82
76
|
end
|
83
77
|
end
|
data/test/test_helper.rb
CHANGED
@@ -15,7 +15,6 @@ rescue Bundler::BundlerError => e
|
|
15
15
|
end
|
16
16
|
|
17
17
|
require 'minitest/autorun'
|
18
|
-
require 'minitest/should'
|
19
18
|
require "minitest/reporters"
|
20
19
|
require "mocha/setup"
|
21
20
|
require 'timeout'
|
@@ -135,53 +134,23 @@ def kill_all_child_processes
|
|
135
134
|
Process.waitall
|
136
135
|
end
|
137
136
|
|
138
|
-
|
139
|
-
|
137
|
+
module Minitest::Should
|
138
|
+
class TestCase < MiniTest::Spec
|
140
139
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# Allow triggering single tests when running from rubymine
|
148
|
-
# reopen the installed runner so we don't step on runner customizations
|
149
|
-
class << MiniTest::Unit.runner
|
150
|
-
# Rubymine sends --name=/\Atest\: <context> should <should>\./
|
151
|
-
# Minitest runs each context as a suite
|
152
|
-
# Minitest filters methods by matching against: <suite>#test_0001_<should>
|
153
|
-
# Nested contexts are separted by spaces in rubymine, but ::s in minitest
|
154
|
-
|
155
|
-
def _run_suites(suites, type)
|
156
|
-
if options[:filter]
|
157
|
-
if options[:filter] =~ /\/\\Atest\\: (.*) should (.*)\\\.\//
|
158
|
-
context_filter = $1
|
159
|
-
should_filter = $2
|
160
|
-
should_filter.strip!
|
161
|
-
should_filter.gsub!(" ", "_")
|
162
|
-
should_filter.gsub!(/\W/, "")
|
163
|
-
context_filter = context_filter.gsub(" ", "((::)| )")
|
164
|
-
options[:filter] = "/\\A#{context_filter}(Test)?#test(_\\d+)?_should_#{should_filter}\\Z/"
|
165
|
-
end
|
140
|
+
# make minitest spec dsl similar to shoulda
|
141
|
+
class << self
|
142
|
+
alias :setup :before
|
143
|
+
alias :teardown :after
|
144
|
+
alias :context :describe
|
145
|
+
alias :should :it
|
166
146
|
end
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
filter = options[:filter] || '/./'
|
175
|
-
filter = Regexp.new $1 if filter =~ /\/(.*)\//
|
176
|
-
all_test_methods = suite.send "#{type}_methods"
|
177
|
-
filtered_test_methods = all_test_methods.find_all { |m|
|
178
|
-
filter === m || filter === "#{suite}##{m}"
|
179
|
-
}
|
180
|
-
|
181
|
-
if filtered_test_methods.size > 0
|
182
|
-
super
|
183
|
-
else
|
184
|
-
[0, 0]
|
147
|
+
|
148
|
+
ORIGINAL_PROCLINE = $0
|
149
|
+
|
150
|
+
setup do
|
151
|
+
$0 = ORIGINAL_PROCLINE
|
152
|
+
kill_all_child_processes
|
185
153
|
end
|
186
154
|
end
|
187
155
|
end
|
156
|
+
|