trident 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 368b964b09f272043b062c0220aa86140d69fd90
4
- data.tar.gz: 83985e089b57fed096b1880f7a854383e4d2233d
3
+ metadata.gz: b1f99b2b31837fe6cd9fee43af02ebaaae7afe1f
4
+ data.tar.gz: 87df8e84d60b0162c5575eb6b2235373e5a03e2a
5
5
  SHA512:
6
- metadata.gz: 9d87f08599ae67424158561c90292075bd2db709dcff5e49abd5de61b66e66e982c7cb3ff9051dea975bba527d028f674693c7096331ba0774b9b9dcf95a6d6f
7
- data.tar.gz: 5e4b5003ae49958e13f3a0527f44b7214bb4c0301a261ea51149070da089e9161314dff36965f43f1a4dbf58f3c72a63abe26600e713310c41de14b4abd8c50d
6
+ metadata.gz: 3f7053abe1c7276399377d9cbf8641dceb246202f216cb21d92f93e3f04d1950927b73c712126c690fbb17e0553b8db5737fa900d201fe8246e5a8441163344b
7
+ data.tar.gz: b0a2226f1931c27c736f14394cdf673f678fc4dd29566a6cad91e56e6ee15bf949e26a4049b8cb53a84fd93f889fd6d2bd90fd4c0bc7db7580e295ac15913077
@@ -3,6 +3,12 @@ cache: bundler
3
3
  rvm:
4
4
  - 1.9.3
5
5
  - 2.0.0
6
- # - rbx-19mode
6
+ - 2.1.1
7
+ - rbx
7
8
 
8
9
  script: bundle exec rake
10
+
11
+ matrix:
12
+ allow_failures:
13
+ - rvm: rbx
14
+
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ 0.5.0 (07/07/2014)
2
+ ------------------
3
+
4
+ Merge pull request #1 from backupify/orphan_workers <92a5c49> [Gregory Salmon] [james-lawrence]
5
+
1
6
  0.4.2 (11/20/2013)
2
7
  ------------------
3
8
 
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gemspec
7
7
  gem 'coveralls', :require => false
8
8
 
9
9
  gem "mocha", :require => false
10
+ gem 'rubymine_minitest_spec', :git => 'git@github.com:backupify/rubymine_minitest_spec.git'
data/README.md CHANGED
@@ -31,9 +31,18 @@ See other command line options with
31
31
 
32
32
  trident --help
33
33
 
34
+ ## Orphaned workers
35
+ The ability to track pool processes across a restart - allows a restart to spin up new processes as old ones die off gracefully.
36
+
37
+ Limitations - It will treat any process that has the same pid from a previous pool as part of
38
+ the orphaned processes if the process can be signalled from the pool process. To get around this
39
+ you run the pool as a different user, which will prevent the pool from being able to signal the
40
+ process.
41
+
34
42
 
35
43
  ## TODO
36
44
 
37
45
  * Add support for reloading the trident config with a HUP signal
38
46
  * Add support in yml for specifying [process limits](http://www.ruby-doc.org/core-1.9.3/Process.html#method-c-setrlimit) (memory especially)
39
- * Add ability to track pool processes across a restart (or maybe only across a HUP) - allows a restart to spin up new processes as old ones die off gracefully.
47
+ * Add support for killing off orphans/processes that have been running for an excessively (configurable) long time.
48
+
@@ -6,3 +6,4 @@ require "trident/pool"
6
6
  require "trident/pool_handler"
7
7
  require "trident/pool_manager"
8
8
  require "trident/signal_handler"
9
+ require "trident/worker"
@@ -123,10 +123,10 @@ module Trident
123
123
 
124
124
  next if pool_filter.size > 0 && ! pool_filter.include?(name)
125
125
 
126
- pool = Trident::Pool.new(name, handler, pool_config['size'], pool_config['options'])
126
+ pool = Trident::Pool.new(name, handler, pool_config)
127
127
  pools[name] = pool
128
128
  end
129
129
  pools
130
130
  end
131
131
  end
132
- end
132
+ end
@@ -3,14 +3,35 @@ module Trident
3
3
  include GemLogger::LoggerSupport
4
4
  include Trident::Utils
5
5
 
6
- attr_reader :name, :handler, :size, :options, :workers
6
+ attr_reader :name, :handler, :size, :options, :workers, :orphans, :orphans_dir
7
7
 
8
- def initialize(name, handler, size, options={})
8
+ def initialize(name, handler, options={})
9
9
  @name = name
10
10
  @handler = handler
11
- @size = size
11
+ @size = options.delete('size') || 2
12
12
  @options = options || {}
13
13
  @workers = Set.new
14
+ @orphans_dir = options.delete('pids_dir') || File.join(Dir.pwd, 'trident-pools', name, 'pids')
15
+ @orphans = load_orphans(orphans_dir)
16
+ end
17
+
18
+ def load_orphans(path_to_orphans_dir)
19
+ unless File.exists?(path_to_orphans_dir)
20
+ FileUtils.mkdir_p(path_to_orphans_dir)
21
+ end
22
+
23
+ orphans = Set.new
24
+
25
+ Dir.foreach(path_to_orphans_dir) do |file|
26
+ path = File.join(path_to_orphans_dir, file)
27
+ next if File.directory?(path)
28
+
29
+ pid = Integer(IO.read(path))
30
+ orphan_worker = Worker.new(pid, self)
31
+ orphans << orphan_worker
32
+ end
33
+
34
+ orphans
14
35
  end
15
36
 
16
37
  def start
@@ -38,35 +59,97 @@ module Trident
38
59
  logger.info "<pool-#{name}> Pool up to date"
39
60
  end
40
61
 
62
+ # @return [Boolean] true iff total_workers_count > size.
63
+ # false otherwise
64
+ def above_threshold?
65
+ size < total_workers_count
66
+ end
67
+
68
+ # @return [Boolean] true iff total_workers_count == size.
69
+ # false otherwise
70
+ def at_threshold?
71
+ size == total_workers_count
72
+ end
73
+
74
+ # @return [Boolean] true iff workers.size > 0.
75
+ # false otherwise
76
+ def has_workers?
77
+ workers.size > 0
78
+ end
79
+
80
+ # @return [Integer] total number of workers including orphaned
81
+ # workers.
82
+ def total_workers_count
83
+ workers.size + orphans.size
84
+ end
85
+
41
86
  private
42
87
 
43
88
  def maintain_worker_count(kill_action)
89
+ cleanup_orphaned_workers
44
90
  cleanup_dead_workers(false)
45
91
 
46
- if size > workers.size
47
- spawn_workers(size - workers.size)
48
- elsif size < workers.size
49
- kill_workers(workers.size - size, kill_action)
92
+ if at_threshold?
93
+ logger.debug "<pool-#{name}> Worker count is correct."
94
+ # If we are above the threshold and we have workers
95
+ # then reduce the number of workers.
96
+ elsif above_threshold? && has_workers?
97
+ overthreshold = total_workers_count - size
98
+ workers_to_kill = [overthreshold, workers.size].min
99
+
100
+ logger.info("<pool-#{name}> Total workers #{workers.size} above threshold #{size} killing #{workers_to_kill}.")
101
+ kill_workers(workers_to_kill, kill_action)
102
+ # If we are above the threshold, and no workers
103
+ # then we can't do anything, but lets log out a
104
+ # message indicating this state.
105
+ elsif above_threshold?
106
+ logger.info("<pool-#{name}> Waiting on orphans before spawning workers.")
107
+ # If the sum of both the workers and orphan workers is under our
108
+ # size requirement let's spawn the number of workers required to
109
+ # reach that size.
50
110
  else
51
- logger.debug "<pool-#{name}> Worker count is correct"
111
+ logger.info("<pool-#{name}> Orphans #{orphans.size}, Workers #{workers.size}")
112
+ spawn_workers(size - total_workers_count)
113
+ end
114
+ end
115
+
116
+ # Remove orphan workers which are either not running
117
+ # or which we don't have permission to signal (thereby telling us they
118
+ # where never a part of the pool)
119
+ def cleanup_orphaned_workers
120
+ orphans.clone.each do |worker|
121
+ begin
122
+ # Check if the process is running
123
+ Process.kill(0, worker.pid)
124
+ rescue Errno::EPERM, Errno::ESRCH => e
125
+ # If we get EPERM (Permission error) or ESRCH (No process with that pid)
126
+ # stop tracking that worker
127
+ logger.info("<pool-#{name}> Cleaning up orphaned worker #{worker.pid} because #{e.class.name}:#{e.message})")
128
+ orphans.delete(worker)
129
+ worker.destroy
130
+ rescue => e
131
+ # Make sure we catch any unexpected errors when signaling the process.
132
+ logger.error("<pool-#{name}> failed cleaning up worker #{worker.pid} because #{e.class.name}:#{e.message})")
133
+ end
52
134
  end
53
135
  end
54
136
 
55
137
  def cleanup_dead_workers(blocking=true)
56
138
  wait_flags = blocking ? 0 : Process::WNOHANG
57
- workers.clone.each do |pid|
139
+ workers.clone.each do |worker|
58
140
  begin
59
- wpid = Process.wait(pid, wait_flags)
141
+ if Process.wait(worker.pid, wait_flags)
142
+ workers.delete(worker)
143
+ end
60
144
  rescue Errno::EINTR
61
145
  logger.warn("<pool-#{name}> Interrupted cleaning up workers, retrying")
62
146
  retry
63
147
  rescue Errno::ECHILD
64
148
  logger.warn("<pool-#{name}> Error cleaning up workers, ignoring")
65
- # Calling process.wait on a pid that was already waited on throws
66
- # a ECHLD, so may as well remove it from our list of workers
67
- wpid = pid
149
+ # Calling Process.wait on a pid that was already waited on throws
150
+ # a ECHILD, so may as well remove it from our list of workers
151
+ workers.delete(worker)
68
152
  end
69
- workers.delete(wpid) if wpid
70
153
  end
71
154
  end
72
155
 
@@ -79,30 +162,40 @@ module Trident
79
162
 
80
163
  def kill_workers(count, action)
81
164
  logger.info "<pool-#{name}> Killing #{count} workers with #{action}"
82
- workers.to_a[-count, count].each do |pid|
83
- kill_worker(pid, action)
165
+ workers.to_a[-count, count].each do |worker|
166
+ kill_worker(worker, action)
84
167
  end
85
168
  end
86
169
 
87
170
  def spawn_worker
88
171
  pid = fork do
89
- procline "pool-#{name}-worker", "starting handler #{handler.name}"
90
- Trident::SignalHandler.reset_for_fork
91
- handler.load
92
- handler.start(options)
172
+ begin
173
+ procline "pool-#{name}-worker", "starting handler #{handler.name}"
174
+ Trident::SignalHandler.reset_for_fork
175
+ handler.load
176
+ handler.start(options)
177
+ ensure
178
+ worker = Worker.new(Process.pid, self)
179
+ worker.destroy
180
+ end
93
181
  end
94
- workers << pid
182
+
183
+ worker = Worker.new(pid, self)
184
+ worker.save
185
+
186
+ workers << worker
95
187
  logger.info "<pool-#{name}> Spawned worker #{pid}, worker count now at #{workers.size}"
96
188
  end
97
189
 
98
- def kill_worker(pid, action)
190
+ def kill_worker(worker, action)
99
191
  sig = handler.signal_for(action)
100
192
  raise "<pool-#{name}> No signal for action: #{action}" unless sig
101
- logger.info "<pool-#{name}> Sending signal to worker: #{pid}/#{sig}/#{action}"
102
- Process.kill(sig, pid)
103
- workers.delete(pid)
104
- logger.info "<pool-#{name}> Killed worker #{pid}, worker count now at #{workers.size}"
105
- end
193
+ logger.info "<pool-#{name}> Sending signal to worker: #{worker.pid}/#{sig}/#{action}"
194
+ Process.kill(sig, worker.pid)
195
+
196
+ workers.delete(worker)
106
197
 
198
+ logger.info "<pool-#{name}> Killed worker #{worker.pid}, worker count now at #{workers.size}"
199
+ end
107
200
  end
108
201
  end
@@ -26,6 +26,5 @@ module Trident
26
26
  def signal_for(action)
27
27
  signal_mappings[action] || signal_mappings['default'] || "SIGTERM"
28
28
  end
29
-
30
29
  end
31
- end
30
+ end
@@ -50,7 +50,7 @@ module Trident
50
50
  private
51
51
 
52
52
  def procline_display
53
- pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.inspect}" }.join(" ")
53
+ pools.collect {|pool| "#{pool.name}#{pool.workers.to_a.collect(&:pid)}" }.join(" ")
54
54
  end
55
55
 
56
56
  def load_handlers
@@ -71,4 +71,4 @@ module Trident
71
71
  end
72
72
 
73
73
  end
74
- end
74
+ end
@@ -1,3 +1,3 @@
1
1
  module Trident
2
- VERSION = "0.4.2"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -0,0 +1,33 @@
1
+ module Trident
2
+ # @param [Integer] pid - pid of the worker process
3
+ # @param [Trident::Pool] pool - pool managing the worker process.
4
+ class Worker < Struct.new(:pid, :pool)
5
+ # Crate a pidfile for this worker so that
6
+ # we may track it
7
+ def save
8
+ File.open(path, 'w') do |f|
9
+ f << "#{pid}"
10
+ end
11
+ end
12
+
13
+ # Remove the pidfile associated with this
14
+ # worker
15
+ def destroy
16
+ FileUtils.rm path if File.exists?(path)
17
+ end
18
+
19
+ # We determine the time that this worker was
20
+ # created from the creation timestamp on its
21
+ # pidfile
22
+ def created_at
23
+ @created_at ||= File.stat(path).ctime
24
+ end
25
+
26
+ protected
27
+
28
+ # Path to this worker's pid file
29
+ def path
30
+ File.join(pool.orphans_dir, "#{pid}.pid")
31
+ end
32
+ end
33
+ end
@@ -40,6 +40,9 @@ pools:
40
40
  # options passed to each handler's initializer (merged into handler options above)
41
41
  options:
42
42
  name: one
43
+ # directory for storing child pids
44
+ pids_dir: '/tmp/mypool1'
45
+
43
46
  mypool2:
44
47
  # number of worker processes
45
48
  size: 2
@@ -48,4 +51,6 @@ pools:
48
51
  # options passed to each handler's initializer (merged into handler options above)
49
52
  options:
50
53
  name: two
54
+ # directory for storing child pids
55
+ pids_dir: '/tmp/mypool2'
51
56
 
@@ -1,13 +1,11 @@
1
1
  require_relative '../test_helper'
2
2
 
3
3
  class Trident::TridentTest < MiniTest::Should::TestCase
4
-
5
4
  setup do
6
5
  @project_root = File.expand_path('../../fixtures/integration_project', __FILE__)
7
6
  @cli = "#{File.expand_path('../../..', __FILE__)}/bin/trident"
8
7
  end
9
8
 
10
-
11
9
  def parse_manager(manager_str)
12
10
  pools = {}
13
11
  manager_str.scan(/(\w+)\[([0-9, ]+)\]/) do |pool, pids|
@@ -18,7 +16,6 @@ class Trident::TridentTest < MiniTest::Should::TestCase
18
16
  end
19
17
 
20
18
  context "basic usage" do
21
-
22
19
  should "start and stop pools" do
23
20
  cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
24
21
  io = IO.popen(cmd, :err=>[:child, :out])
@@ -46,11 +43,9 @@ class Trident::TridentTest < MiniTest::Should::TestCase
46
43
  Process.wait(io.pid)
47
44
  assert_empty child_processes
48
45
  end
49
-
50
46
  end
51
47
 
52
48
  context "worker maintenance" do
53
-
54
49
  should "restart failed workers" do
55
50
  cmd = "#{@cli} --verbose --config #{@project_root}/config/trident.yml"
56
51
  io = IO.popen(cmd, :err=>[:child, :out])
@@ -78,6 +73,5 @@ class Trident::TridentTest < MiniTest::Should::TestCase
78
73
  Process.wait(io.pid)
79
74
  assert_empty child_processes
80
75
  end
81
-
82
76
  end
83
77
  end
@@ -15,7 +15,6 @@ rescue Bundler::BundlerError => e
15
15
  end
16
16
 
17
17
  require 'minitest/autorun'
18
- require 'minitest/should'
19
18
  require "minitest/reporters"
20
19
  require "mocha/setup"
21
20
  require 'timeout'
@@ -135,53 +134,23 @@ def kill_all_child_processes
135
134
  Process.waitall
136
135
  end
137
136
 
138
- class MiniTest::Should::TestCase
139
- ORIGINAL_PROCLINE = $0
137
+ module Minitest::Should
138
+ class TestCase < MiniTest::Spec
140
139
 
141
- setup do
142
- $0 = ORIGINAL_PROCLINE
143
- kill_all_child_processes
144
- end
145
- end
146
-
147
- # Allow triggering single tests when running from rubymine
148
- # reopen the installed runner so we don't step on runner customizations
149
- class << MiniTest::Unit.runner
150
- # Rubymine sends --name=/\Atest\: <context> should <should>\./
151
- # Minitest runs each context as a suite
152
- # Minitest filters methods by matching against: <suite>#test_0001_<should>
153
- # Nested contexts are separted by spaces in rubymine, but ::s in minitest
154
-
155
- def _run_suites(suites, type)
156
- if options[:filter]
157
- if options[:filter] =~ /\/\\Atest\\: (.*) should (.*)\\\.\//
158
- context_filter = $1
159
- should_filter = $2
160
- should_filter.strip!
161
- should_filter.gsub!(" ", "_")
162
- should_filter.gsub!(/\W/, "")
163
- context_filter = context_filter.gsub(" ", "((::)| )")
164
- options[:filter] = "/\\A#{context_filter}(Test)?#test(_\\d+)?_should_#{should_filter}\\Z/"
165
- end
140
+ # make minitest spec dsl similar to shoulda
141
+ class << self
142
+ alias :setup :before
143
+ alias :teardown :after
144
+ alias :context :describe
145
+ alias :should :it
166
146
  end
167
-
168
- super
169
- end
170
-
171
- # Prevent "Empty test suite" verbosity when running in rubymine
172
- def _run_suite(suite, type)
173
-
174
- filter = options[:filter] || '/./'
175
- filter = Regexp.new $1 if filter =~ /\/(.*)\//
176
- all_test_methods = suite.send "#{type}_methods"
177
- filtered_test_methods = all_test_methods.find_all { |m|
178
- filter === m || filter === "#{suite}##{m}"
179
- }
180
-
181
- if filtered_test_methods.size > 0
182
- super
183
- else
184
- [0, 0]
147
+
148
+ ORIGINAL_PROCLINE = $0
149
+
150
+ setup do
151
+ $0 = ORIGINAL_PROCLINE
152
+ kill_all_child_processes
185
153
  end
186
154
  end
187
155
  end
156
+