trident 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 368b964b09f272043b062c0220aa86140d69fd90
4
- data.tar.gz: 83985e089b57fed096b1880f7a854383e4d2233d
3
+ metadata.gz: b1f99b2b31837fe6cd9fee43af02ebaaae7afe1f
4
+ data.tar.gz: 87df8e84d60b0162c5575eb6b2235373e5a03e2a
5
5
  SHA512:
6
- metadata.gz: 9d87f08599ae67424158561c90292075bd2db709dcff5e49abd5de61b66e66e982c7cb3ff9051dea975bba527d028f674693c7096331ba0774b9b9dcf95a6d6f
7
- data.tar.gz: 5e4b5003ae49958e13f3a0527f44b7214bb4c0301a261ea51149070da089e9161314dff36965f43f1a4dbf58f3c72a63abe26600e713310c41de14b4abd8c50d
6
+ metadata.gz: 3f7053abe1c7276399377d9cbf8641dceb246202f216cb21d92f93e3f04d1950927b73c712126c690fbb17e0553b8db5737fa900d201fe8246e5a8441163344b
7
+ data.tar.gz: b0a2226f1931c27c736f14394cdf673f678fc4dd29566a6cad91e56e6ee15bf949e26a4049b8cb53a84fd93f889fd6d2bd90fd4c0bc7db7580e295ac15913077
@@ -3,6 +3,12 @@ cache: bundler
3
3
  rvm:
4
4
  - 1.9.3
5
5
  - 2.0.0
6
- # - rbx-19mode
6
+ - 2.1.1
7
+ - rbx
7
8
 
8
9
  script: bundle exec rake
10
+
11
+ matrix:
12
+ allow_failures:
13
+ - rvm: rbx
14
+
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ 0.5.0 (07/07/2014)
2
+ ------------------
3
+
4
+ Merge pull request #1 from backupify/orphan_workers <92a5c49> [Gregory Salmon] [james-lawrence]
5
+
1
6
  0.4.2 (11/20/2013)
2
7
  ------------------
3
8
 
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gemspec
7
7
  gem 'coveralls', :require => false
8
8
 
9
9
  gem "mocha", :require => false
10
+ gem 'rubymine_minitest_spec', :git => 'git@github.com:backupify/rubymine_minitest_spec.git'
data/README.md CHANGED
@@ -31,9 +31,18 @@ See other command line options with
31
31
 
32
32
  trident --help
33
33
 
34
+ ## Orphaned workers
35
+ The ability to track pool processes across a restart - allows a restart to spin up new processes as old ones die off gracefully.
36
+
37
+ Limitations - It will treat any process that has the same pid from a previous pool as part of
38
+ the orphaned processes if the process can be signalled from the pool process. To get around this
39
+ you run the pool as a different user, which will prevent the pool from being able to signal the
40
+ process.
41
+
34
42
 
35
43
  ## TODO
36
44
 
37
45
  * Add support for reloading the trident config with a HUP signal
38
46
  * Add support in yml for specifying [process limits](http://www.ruby-doc.org/core-1.9.3/Process.html#method-c-setrlimit) (memory especially)
39
- * Add ability to track pool processes across a restart (or maybe only across a HUP) - allows a restart to spin up new processes as old ones die off gracefully.
47
+ * Add support for killing off orphans/processes that have been running for an excessively (configurable) long time.
48
+
@@ -6,3 +6,4 @@ require "trident/pool"
6
6
  require "trident/pool_handler"
7
7
  require "trident/pool_manager"
8
8
  require "trident/signal_handler"
9
+ require "trident/worker"
@@ -123,10 +123,10 @@ module Trident
123
123
 
124
124
  next if pool_filter.size > 0 && ! pool_filter.include?(name)
125
125
 
126
- pool = Trident::Pool.new(name, handler, pool_config['size'], pool_config['options'])
126
+ pool = Trident::Pool.new(name, handler, pool_config)
127
127
  pools[name] = pool
128
128
  end
129
129
  pools
130
130
  end
131
131
  end
132
- end
132
+ end
@@ -3,14 +3,35 @@ module Trident
3
3
  include GemLogger::LoggerSupport
4
4
  include Trident::Utils
5
5
 
6
- attr_reader :name, :handler, :size, :options, :workers
6
+ attr_reader :name, :handler, :size, :options, :workers, :orphans, :orphans_dir
7
7
 
8
- def initialize(name, handler, size, options={})
8
+ def initialize(name, handler, options={})
9
9
  @name = name
10
10
  @handler = handler
11
- @size = size
11
+ @size = options.delete('size') || 2
12
12
  @options = options || {}
13
13
  @workers = Set.new
14
+ @orphans_dir = options.delete('pids_dir') || File.join(Dir.pwd, 'trident-pools', name, 'pids')
15
+ @orphans = load_orphans(orphans_dir)
16
+ end
17
+
18
+ def load_orphans(path_to_orphans_dir)
19
+ unless File.exists?(path_to_orphans_dir)
20
+ FileUtils.mkdir_p(path_to_orphans_dir)
21
+ end
22
+
23
+ orphans = Set.new
24
+
25
+ Dir.foreach(path_to_orphans_dir) do |file|
26
+ path = File.join(path_to_orphans_dir, file)
27
+ next if File.directory?(path)
28
+
29
+ pid = Integer(IO.read(path))
30
+ orphan_worker = Worker.new(pid, self)
31
+ orphans << orphan_worker
32
+ end
33
+
34
+ orphans
14
35
  end
15
36
 
16
37
  def start
@@ -38,35 +59,97 @@ module Trident
38
59
  logger.info "<pool-#{name}> Pool up to date"
39
60
  end
40
61
 
62
+ # @return [Boolean] true iff total_workers_count > size.
63
+ # false otherwise
64
+ def above_threshold?
65
+ size < total_workers_count
66
+ end
67
+
68
+ # @return [Boolean] true iff total_workers_count == size.
69
+ # false otherwise
70
+ def at_threshold?
71
+ size == total_workers_count
72
+ end
73
+
74
+ # @return [Boolean] true iff workers.size > 0.
75
+ # false otherwise
76
+ def has_workers?
77
+ workers.size > 0
78
+ end
79
+
80
+ # @return [Integer] total number of workers including orphaned
81
+ # workers.
82
+ def total_workers_count
83
+ workers.size + orphans.size
84
+ end
85
+
41
86
  private
42
87
 
43
88
  def maintain_worker_count(kill_action)
89
+ cleanup_orphaned_workers
44
90
  cleanup_dead_workers(false)
45
91
 
46
- if size > workers.size
47
- spawn_workers(size - workers.size)
48
- elsif size < workers.size
49
- kill_workers(workers.size - size, kill_action)
92
+ if at_threshold?
93
+ logger.debug "<pool-#{name}> Worker count is correct."
94
+ # If we are above the threshold and we have workers
95
+ # then reduce the number of workers.
96
+ elsif above_threshold? && has_workers?
97
+ overthreshold = total_workers_count - size
98
+ workers_to_kill = [overthreshold, workers.size].min
99
+
100
+ logger.info("<pool-#{name}> Total workers #{workers.size} above threshold #{size} killing #{workers_to_kill}.")
101
+ kill_workers(workers_to_kill, kill_action)
102
+ # If we are above the threshold, and no workers
103
+ # then we can't do anything, but lets log out a
104
+ # message indicating this state.
105
+ elsif above_threshold?
106
+ logger.info("<pool-#{name}> Waiting on orphans before spawning workers.")
107
+ # If the sum of both the workers and orphan workers is under our
108
+ # size requirement let's spawn the number of workers required to
109
+ # reach that size.
50
110
  else
51
- logger.debug "<pool-#{name}> Worker count is correct"
111
+ logger.info("<pool-#{name}> Orphans #{orphans.size}, Workers #{workers.size}")
112
+ spawn_workers(size - total_workers_count)
113
+ end
114
+ end
115
+
116
+ # Remove orphan workers which are either not running
117
+ # or which we don't have permission to signal (thereby telling us they
118
+ # where never a part of the pool)
119
+ def cleanup_orphaned_workers
120
+ orphans.clone.each do |worker|
121
+ begin
122
+ # Check if the process is running
123
+ Process.kill(0, worker.pid)
124
+ rescue Errno::EPERM, Errno::ESRCH => e
125
+ # If we get EPERM (Permission error) or ESRCH (No process with that pid)
126
+ # stop tracking that worker
127
+ logger.info("<pool-#{name}> Cleaning up orphaned worker #{worker.pid} because #{e.class.name}:#{e.message})")
128
+ orphans.delete(worker)
129
+ worker.destroy
130
+ rescue => e
131
+ # Make sure we catch any unexpected errors when signaling the process.
132
+ logger.error("<pool-#{name}> failed cleaning up worker #{worker.pid} because #{e.class.name}:#{e.message})")
133
+ end
52
134
  end
53
135
  end
54
136
 
55
137
  def cleanup_dead_workers(blocking=true)
56
138
  wait_flags = blocking ? 0 : Process::WNOHANG
57
- workers.clone.each do |pid|
139
+ workers.clone.each do |worker|
58
140
  begin
59
- wpid = Process.wait(pid, wait_flags)
141
+ if Process.wait(worker.pid, wait_flags)
142
+ workers.delete(worker)
143
+ end
60
144
  rescue Errno::EINTR
61
145
  logger.warn("<pool-#{name}> Interrupted cleaning up workers, retrying")
62
146
  retry
63
147
  rescue Errno::ECHILD
64
148
  logger.warn("<pool-#{name}> Error cleaning up workers, ignoring")
65
- # Calling process.wait on a pid that was already waited on throws
66
- # a ECHLD, so may as well remove it from our list of workers
67
- wpid = pid
149
+ # Calling Process.wait on a pid that was already waited on throws
150
+ # a ECHILD, so may as well remove it from our list of workers
151
+ workers.delete(worker)
68
152
  end
69
- workers.delete(wpid) if wpid
70
153
  end
71
154
  end
72
155
 
@@ -79,30 +162,40 @@ module Trident
79
162
 
80
163
  def kill_workers(count, action)
81
164
  logger.info "<pool-#{name}> Killing #{count} workers with #{action}"
82
- workers.to_a[-count, count].each do |pid|
83
- kill_worker(pid, action)
165
+ workers.to_a[-count, count].each do |worker|
166
+ kill_worker(worker, action)
84
167
  end
85
168
  end
86
169
 
87
170
  def spawn_worker
88
171
  pid = fork do
89
- procline "pool-#{name}-worker", "starting handler #{handler.name}"
90
- Trident::SignalHandler.reset_for_fork
91
- handler.load
92
- handler.start(options)
172
+ begin
173
+ procline "pool-#{name}-worker", "starting handler #{handler.name}"
174
+ Trident::SignalHandler.reset_for_fork
175
+ handler.load
176
+ handler.start(options)
177
+ ensure
178
+ worker = Worker.new(Process.pid, self)
179
+ worker.destroy
180
+ end
93
181
  end
94
- workers << pid
182
+
183
+ worker = Worker.new(pid, self)
184
+ worker.save
185
+
186
+ workers << worker
95
187
  logger.info "<pool-#{name}> Spawned worker #{pid}, worker count now at #{workers.size}"
96
188
  end
97
189
 
98
- def kill_worker(pid, action)
190
+ def kill_worker(worker, action)
99
191
  sig = handler.signal_for(action)
100
192
  raise "<pool-#{name}> No signal for action: #{action}" unless sig
101
- logger.info "<pool-#{name}> Sending signal to worker: #{pid}/#{sig}/#{action}"
102
- Process.kill(sig, pid)
103
- workers.delete(pid)
104
- logger.info "<pool-#{name}> Killed worker #{pid}, worker count now at #{workers.size}"
105
- end
193
+ logger.info "<pool-#{name}> Sending signal to worker: #{worker.pid}/#{sig}/#{action}"
194
+ Process.kill(sig, worker.pid)
195
+
196
+ workers.delete(worker)
106
197
 
198
+ logger.info "<pool-#{name}> Killed worker #{worker.pid}, worker count now at #{workers.size}"
199
+ end
107
200
  end
108
201
  end
@@ -26,6 +26,5 @@ module Trident
26
26
  def signal_for(action)
27
27
  signal_mappings[action] || signal_mappings['default'] || "SIGTERM"
28
28
  end
29
-
30
29
  end
31
- end
30
+ end
@@ -50,7 +50,7 @@ module Trident
50
50
  private
51
51
 
52
52
  def procline_display
53
- pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.inspect}" }.join(" ")
53
+ pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.collect(&:pid)}" }.join(" ")
54
54
  end
55
55
 
56
56
  def load_handlers
@@ -71,4 +71,4 @@ module Trident
71
71
  end
72
72
 
73
73
  end
74
- end
74
+ end
@@ -1,3 +1,3 @@
1
1
  module Trident
2
- VERSION = "0.4.2"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -0,0 +1,33 @@
1
+ module Trident
2
+ # @param [Integer] pid - pid of the worker process
3
+ # @param [Trident::Pool] pool - pool managing the worker process.
4
+ class Worker < Struct.new(:pid, :pool)
5
+ # Crate a pidfile for this worker so that
6
+ # we may track it
7
+ def save
8
+ File.open(path, 'w') do |f|
9
+ f << "#{pid}"
10
+ end
11
+ end
12
+
13
+ # Remove the pidfile associated with this
14
+ # worker
15
+ def destroy
16
+ FileUtils.rm path if File.exists?(path)
17
+ end
18
+
19
+ # We determine the time that this worker was
20
+ # created from the creation timestamp on its
21
+ # pidfile
22
+ def created_at
23
+ @created_at ||= File.stat(path).ctime
24
+ end
25
+
26
+ protected
27
+
28
+ # Path to this worker's pid file
29
+ def path
30
+ File.join(pool.orphans_dir, "#{pid}.pid")
31
+ end
32
+ end
33
+ end
@@ -40,6 +40,9 @@ pools:
40
40
  # options passed to each handler's initializer (merged into handler options above)
41
41
  options:
42
42
  name: one
43
+ # directory for storing child pids
44
+ pids_dir: '/tmp/mypool1'
45
+
43
46
  mypool2:
44
47
  # number of worker processes
45
48
  size: 2
@@ -48,4 +51,6 @@ pools:
48
51
  # options passed to each handler's initializer (merged into handler options above)
49
52
  options:
50
53
  name: two
54
+ # directory for storing child pids
55
+ pids_dir: '/tmp/mypool2'
51
56
 
@@ -1,13 +1,11 @@
1
1
  require_relative '../test_helper'
2
2
 
3
3
  class Trident::TridentTest < MiniTest::Should::TestCase
4
-
5
4
  setup do
6
5
  @project_root = File.expand_path('../../fixtures/integration_project', __FILE__)
7
6
  @cli = "#{File.expand_path('../../..', __FILE__)}/bin/trident"
8
7
  end
9
8
 
10
-
11
9
  def parse_manager(manager_str)
12
10
  pools = {}
13
11
  manager_str.scan(/(\w+)\[([0-9, ]+)\]/) do |pool, pids|
@@ -18,7 +16,6 @@ class Trident::TridentTest < MiniTest::Should::TestCase
18
16
  end
19
17
 
20
18
  context "basic usage" do
21
-
22
19
  should "start and stop pools" do
23
20
  cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
24
21
  io = IO.popen(cmd, :err=>[:child, :out])
@@ -46,11 +43,9 @@ class Trident::TridentTest < MiniTest::Should::TestCase
46
43
  Process.wait(io.pid)
47
44
  assert_empty child_processes
48
45
  end
49
-
50
46
  end
51
47
 
52
48
  context "worker maintenance" do
53
-
54
49
  should "restart failed workers" do
55
50
  cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
56
51
  io = IO.popen(cmd, :err=>[:child, :out])
@@ -78,6 +73,5 @@ class Trident::TridentTest < MiniTest::Should::TestCase
78
73
  Process.wait(io.pid)
79
74
  assert_empty child_processes
80
75
  end
81
-
82
76
  end
83
77
  end
@@ -15,7 +15,6 @@ rescue Bundler::BundlerError => e
15
15
  end
16
16
 
17
17
  require 'minitest/autorun'
18
- require 'minitest/should'
19
18
  require "minitest/reporters"
20
19
  require "mocha/setup"
21
20
  require 'timeout'
@@ -135,53 +134,23 @@ def kill_all_child_processes
135
134
  Process.waitall
136
135
  end
137
136
 
138
- class MiniTest::Should::TestCase
139
- ORIGINAL_PROCLINE = $0
137
+ module Minitest::Should
138
+ class TestCase < MiniTest::Spec
140
139
 
141
- setup do
142
- $0 = ORIGINAL_PROCLINE
143
- kill_all_child_processes
144
- end
145
- end
146
-
147
- # Allow triggering single tests when running from rubymine
148
- # reopen the installed runner so we don't step on runner customizations
149
- class << MiniTest::Unit.runner
150
- # Rubymine sends --name=/\Atest\: <context> should <should>\./
151
- # Minitest runs each context as a suite
152
- # Minitest filters methods by matching against: <suite>#test_0001_<should>
153
- # Nested contexts are separted by spaces in rubymine, but ::s in minitest
154
-
155
- def _run_suites(suites, type)
156
- if options[:filter]
157
- if options[:filter] =~ /\/\\Atest\\: (.*) should (.*)\\\.\//
158
- context_filter = $1
159
- should_filter = $2
160
- should_filter.strip!
161
- should_filter.gsub!(" ", "_")
162
- should_filter.gsub!(/\W/, "")
163
- context_filter = context_filter.gsub(" ", "((::)| )")
164
- options[:filter] = "/\\A#{context_filter}(Test)?#test(_\\d+)?_should_#{should_filter}\\Z/"
165
- end
140
+ # make minitest spec dsl similar to shoulda
141
+ class << self
142
+ alias :setup :before
143
+ alias :teardown :after
144
+ alias :context :describe
145
+ alias :should :it
166
146
  end
167
-
168
- super
169
- end
170
-
171
- # Prevent "Empty test suite" verbosity when running in rubymine
172
- def _run_suite(suite, type)
173
-
174
- filter = options[:filter] || '/./'
175
- filter = Regexp.new $1 if filter =~ /\/(.*)\//
176
- all_test_methods = suite.send "#{type}_methods"
177
- filtered_test_methods = all_test_methods.find_all { |m|
178
- filter === m || filter === "#{suite}##{m}"
179
- }
180
-
181
- if filtered_test_methods.size > 0
182
- super
183
- else
184
- [0, 0]
147
+
148
+ ORIGINAL_PROCLINE = $0
149
+
150
+ setup do
151
+ $0 = ORIGINAL_PROCLINE
152
+ kill_all_child_processes
185
153
  end
186
154
  end
187
155
  end
156
+