ecs_deploy 0.3.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +117 -0
- data/README.md +269 -23
- data/Rakefile +4 -0
- data/ecs_deploy.gemspec +9 -3
- data/lib/ecs_deploy.rb +1 -1
- data/lib/ecs_deploy/auto_scaler.rb +105 -339
- data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
- data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
- data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
- data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
- data/lib/ecs_deploy/auto_scaler/service_config.rb +222 -0
- data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
- data/lib/ecs_deploy/auto_scaler/trigger_config.rb +36 -0
- data/lib/ecs_deploy/capistrano.rb +70 -1
- data/lib/ecs_deploy/configuration.rb +3 -2
- data/lib/ecs_deploy/instance_fluctuation_manager.rb +173 -0
- data/lib/ecs_deploy/scheduled_task.rb +15 -3
- data/lib/ecs_deploy/service.rb +89 -7
- data/lib/ecs_deploy/task_definition.rb +22 -8
- data/lib/ecs_deploy/version.rb +1 -1
- metadata +110 -11
data/Rakefile
CHANGED
data/ecs_deploy.gemspec
CHANGED
@@ -18,10 +18,16 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_runtime_dependency "aws-sdk", "~>
|
21
|
+
spec.add_runtime_dependency "aws-sdk-autoscaling", "~> 1"
|
22
|
+
spec.add_runtime_dependency "aws-sdk-cloudwatch", "~> 1"
|
23
|
+
spec.add_runtime_dependency "aws-sdk-cloudwatchevents", "~> 1"
|
24
|
+
spec.add_runtime_dependency "aws-sdk-ec2", "~> 1"
|
25
|
+
spec.add_runtime_dependency "aws-sdk-ecs", "~> 1"
|
26
|
+
spec.add_runtime_dependency "aws-sdk-sqs", "~> 1"
|
22
27
|
spec.add_runtime_dependency "terminal-table"
|
23
28
|
spec.add_runtime_dependency "paint"
|
24
29
|
|
25
|
-
spec.add_development_dependency "bundler", "
|
26
|
-
spec.add_development_dependency "rake", "
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.11", "< 3"
|
31
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
33
|
end
|
data/lib/ecs_deploy.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "logger"
|
2
|
+
require "time"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
require "ecs_deploy/auto_scaler/auto_scaling_group_config"
|
6
|
+
require "ecs_deploy/auto_scaler/instance_drainer"
|
7
|
+
require "ecs_deploy/auto_scaler/service_config"
|
8
|
+
require "ecs_deploy/auto_scaler/spot_fleet_request_config"
|
4
9
|
|
5
10
|
module EcsDeploy
|
6
11
|
module AutoScaler
|
@@ -8,8 +13,8 @@ module EcsDeploy
|
|
8
13
|
attr_reader :logger, :error_logger
|
9
14
|
|
10
15
|
def run(yaml_path, log_file = nil, error_log_file = nil)
|
11
|
-
|
12
|
-
|
16
|
+
@enable_auto_scaling = true
|
17
|
+
setup_signal_handlers
|
13
18
|
@logger = Logger.new(log_file || STDOUT)
|
14
19
|
@logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
15
20
|
STDOUT.sync = true unless log_file
|
@@ -17,90 +22,129 @@ module EcsDeploy
|
|
17
22
|
@error_logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
18
23
|
STDERR.sync = true unless error_log_file
|
19
24
|
load_config(yaml_path)
|
20
|
-
service_configs
|
21
|
-
auto_scaling_group_configs
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
ths = (auto_scaling_group_configs + spot_fleet_request_configs).map do |cluster_scaling_config|
|
27
|
+
Thread.new(cluster_scaling_config, &method(:main_loop)).tap { |th| th.abort_on_exception = true }
|
28
|
+
end
|
29
|
+
|
30
|
+
if @config["spot_instance_intrp_warns_queue_urls"]
|
31
|
+
drainer = EcsDeploy::AutoScaler::InstanceDrainer.new(
|
32
|
+
auto_scaling_group_configs: auto_scaling_group_configs,
|
33
|
+
spot_fleet_request_configs: spot_fleet_request_configs,
|
34
|
+
logger: logger,
|
35
|
+
)
|
36
|
+
polling_ths = @config["spot_instance_intrp_warns_queue_urls"].map do |queue_url|
|
37
|
+
Thread.new(queue_url) do |url|
|
38
|
+
drainer.poll_spot_instance_interruption_warnings(url)
|
39
|
+
end.tap { |th| th.abort_on_exception = true }
|
40
|
+
end
|
27
41
|
end
|
28
42
|
|
29
43
|
ths.each(&:join)
|
44
|
+
|
45
|
+
drainer&.stop
|
46
|
+
polling_ths&.each(&:join)
|
30
47
|
end
|
31
48
|
|
32
|
-
def main_loop(
|
33
|
-
loop_with_polling_interval("loop of #{
|
34
|
-
ths =
|
49
|
+
def main_loop(cluster_scaling_config)
|
50
|
+
loop_with_polling_interval("loop of #{cluster_scaling_config.name}") do
|
51
|
+
ths = cluster_scaling_config.service_configs.map do |service_config|
|
35
52
|
Thread.new(service_config) do |s|
|
36
53
|
@logger.debug "Start service scaling of #{s.name}"
|
37
|
-
|
38
|
-
if s.idle?
|
39
|
-
@logger.debug "#{s.name} is idling"
|
40
|
-
next
|
41
|
-
end
|
42
|
-
|
43
|
-
difference = 0
|
44
|
-
s.upscale_triggers.each do |trigger|
|
45
|
-
step = trigger.step || s.step
|
46
|
-
next if difference >= step
|
47
|
-
|
48
|
-
if trigger.match?
|
49
|
-
logger.info "Fire upscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
50
|
-
difference = step
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
if difference == 0 && s.desired_count > s.current_min_task_count
|
55
|
-
s.downscale_triggers.each do |trigger|
|
56
|
-
next unless trigger.match?
|
57
|
-
|
58
|
-
logger.info "Fire downscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
59
|
-
step = trigger.step || s.step
|
60
|
-
difference = [difference, -step].min
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
if s.current_min_task_count > s.desired_count + difference
|
65
|
-
difference = s.current_min_task_count - s.desired_count
|
66
|
-
end
|
67
|
-
|
68
|
-
if difference >= 0 && s.desired_count > s.max_task_count.max
|
69
|
-
difference = s.max_task_count.max - s.desired_count
|
70
|
-
end
|
71
|
-
|
72
|
-
if difference != 0
|
73
|
-
s.update_service(difference)
|
74
|
-
end
|
54
|
+
s.adjust_desired_count(cluster_scaling_config.cluster_resource_manager)
|
75
55
|
end
|
76
56
|
end
|
77
57
|
ths.each { |th| th.abort_on_exception = true }
|
78
58
|
|
79
59
|
ths.each(&:join)
|
80
60
|
|
81
|
-
@logger.debug "Start
|
61
|
+
@logger.debug "Start cluster scaling of #{cluster_scaling_config.name}"
|
62
|
+
|
63
|
+
required_capacity = cluster_scaling_config.service_configs.sum { |s| s.desired_count * s.required_capacity }
|
64
|
+
cluster_scaling_config.update_desired_capacity(required_capacity)
|
82
65
|
|
83
|
-
|
84
|
-
asg_config.update_auto_scaling_group(total_service_count, configs[0])
|
85
|
-
asg_config.detach_and_terminate_orphan_instances(configs[0])
|
66
|
+
cluster_scaling_config.service_configs.each(&:wait_until_desired_count_updated)
|
86
67
|
end
|
87
68
|
end
|
88
69
|
|
89
70
|
def load_config(yaml_path)
|
90
71
|
@config = YAML.load_file(yaml_path)
|
91
72
|
@polling_interval = @config["polling_interval"] || 30
|
92
|
-
|
73
|
+
if @config["services"]
|
74
|
+
@error_logger&.warn('"services" property in root-level is deprecated. Please define it in "auto_scaling_groups" property or "spot_fleet_requests" property.')
|
75
|
+
@config.delete("services").each do |svc|
|
76
|
+
if svc["auto_scaling_group_name"] && svc["spot_fleet_request_id"]
|
77
|
+
raise "You can specify only one of 'auto_scaling_group_name' or 'spot_fleet_request_name'"
|
78
|
+
end
|
79
|
+
|
80
|
+
svc_region = svc.delete("region")
|
81
|
+
if svc["auto_scaling_group_name"]
|
82
|
+
asg_name = svc.delete("auto_scaling_group_name")
|
83
|
+
asg = @config["auto_scaling_groups"].find { |g| g["region"] == svc_region && g["name"] == asg_name }
|
84
|
+
asg["services"] ||= []
|
85
|
+
asg["services"] << svc
|
86
|
+
asg["cluster"] = svc.delete("cluster")
|
87
|
+
end
|
93
88
|
|
94
|
-
|
95
|
-
|
89
|
+
if svc["spot_fleet_request_id"]
|
90
|
+
sfr_id = svc.delete("spot_fleet_request_id")
|
91
|
+
sfr = @config["spot_fleet_requests"].find { |r| r["region"] == svc_region && r["id"] == sfr_id }
|
92
|
+
sfr["services"] ||= []
|
93
|
+
sfr["services"] << svc
|
94
|
+
sfr["cluster"] = svc.delete("cluster")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
96
98
|
end
|
97
99
|
|
98
100
|
def auto_scaling_group_configs
|
99
|
-
@auto_scaling_group_configs ||= @config["auto_scaling_groups"].
|
101
|
+
@auto_scaling_group_configs ||= (@config["auto_scaling_groups"] || []).each.with_object({}) do |c, configs|
|
102
|
+
configs[c["name"]] ||= {}
|
103
|
+
if configs[c["name"]][c["region"]]
|
104
|
+
raise "Duplicate entry in auto_scaling_groups (name: #{c["name"]}, region: #{c["region"]})"
|
105
|
+
end
|
106
|
+
configs[c["name"]][c["region"]] = AutoScalingGroupConfig.new(c, @logger)
|
107
|
+
end.values.flat_map(&:values)
|
108
|
+
end
|
109
|
+
|
110
|
+
def spot_fleet_request_configs
|
111
|
+
@spot_fleet_request_configs ||= (@config["spot_fleet_requests"] || []).each.with_object({}) do |c, configs|
|
112
|
+
configs[c["id"]] ||= {}
|
113
|
+
if configs[c["id"]][c["region"]]
|
114
|
+
raise "Duplicate entry in spot_fleet_requests (id: #{c["id"]}, region: #{c["region"]})"
|
115
|
+
end
|
116
|
+
configs[c["id"]][c["region"]] = SpotFleetRequestConfig.new(c, @logger)
|
117
|
+
end.values.flat_map(&:values)
|
100
118
|
end
|
101
119
|
|
102
120
|
private
|
103
121
|
|
122
|
+
def setup_signal_handlers
|
123
|
+
# Use a thread and a queue to avoid "log writing failed. can't be called from trap context"
|
124
|
+
# cf. https://bugs.ruby-lang.org/issues/14222#note-3
|
125
|
+
signals = Queue.new
|
126
|
+
%i(TERM INT CONT TSTP).each do |sig|
|
127
|
+
trap(sig) { signals << sig }
|
128
|
+
end
|
129
|
+
|
130
|
+
Thread.new do
|
131
|
+
loop do
|
132
|
+
sig = signals.pop
|
133
|
+
case sig
|
134
|
+
when :INT, :TERM
|
135
|
+
@logger.info "Received SIG#{sig}, shutting down gracefully"
|
136
|
+
@stop = true
|
137
|
+
when :CONT
|
138
|
+
@logger.info "Received SIGCONT, resume auto scaling"
|
139
|
+
@enable_auto_scaling = true
|
140
|
+
when :TSTP
|
141
|
+
@logger.info "Received SIGTSTP, pause auto scaling. Send SIGCONT to resume it."
|
142
|
+
@enable_auto_scaling = false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
104
148
|
def wait_polling_interval?(last_executed_at)
|
105
149
|
current = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
106
150
|
diff = current - last_executed_at
|
@@ -114,6 +158,7 @@ module EcsDeploy
|
|
114
158
|
loop do
|
115
159
|
break if @stop
|
116
160
|
sleep 1
|
161
|
+
next unless @enable_auto_scaling
|
117
162
|
next if wait_polling_interval?(last_executed_at)
|
118
163
|
yield
|
119
164
|
last_executed_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
@@ -123,284 +168,5 @@ module EcsDeploy
|
|
123
168
|
@logger.debug "Stop #{name}"
|
124
169
|
end
|
125
170
|
end
|
126
|
-
|
127
|
-
module ConfigBase
|
128
|
-
def initialize(attributes = {})
|
129
|
-
attributes.each do |key, val|
|
130
|
-
send("#{key}=", val)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
SERVICE_CONFIG_ATTRIBUTES = %i(name cluster region auto_scaling_group_name step max_task_count min_task_count idle_time scheduled_min_task_count cooldown_time_for_reach_max upscale_triggers downscale_triggers desired_count)
|
136
|
-
ServiceConfig = Struct.new(*SERVICE_CONFIG_ATTRIBUTES) do
|
137
|
-
include ConfigBase
|
138
|
-
|
139
|
-
def initialize(attributes = {})
|
140
|
-
super(attributes)
|
141
|
-
self.idle_time ||= 60
|
142
|
-
self.max_task_count = Array(max_task_count)
|
143
|
-
self.upscale_triggers = upscale_triggers.to_a.map do |t|
|
144
|
-
TriggerConfig.new(t.merge(region: region))
|
145
|
-
end
|
146
|
-
self.downscale_triggers = downscale_triggers.to_a.map do |t|
|
147
|
-
TriggerConfig.new(t.merge(region: region))
|
148
|
-
end
|
149
|
-
self.max_task_count.sort!
|
150
|
-
self.desired_count = fetch_service.desired_count
|
151
|
-
@reach_max_at = nil
|
152
|
-
@last_updated_at = nil
|
153
|
-
end
|
154
|
-
|
155
|
-
def client
|
156
|
-
Aws::ECS::Client.new(
|
157
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
158
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
159
|
-
region: region
|
160
|
-
)
|
161
|
-
end
|
162
|
-
|
163
|
-
def idle?
|
164
|
-
return false unless @last_updated_at
|
165
|
-
|
166
|
-
diff = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @last_updated_at
|
167
|
-
diff < idle_time
|
168
|
-
end
|
169
|
-
|
170
|
-
def current_min_task_count
|
171
|
-
return min_task_count if scheduled_min_task_count.nil? || scheduled_min_task_count.empty?
|
172
|
-
|
173
|
-
scheduled_min_task_count.find(-> { {"count" => min_task_count} }) { |s|
|
174
|
-
from = Time.parse(s["from"])
|
175
|
-
to = Time.parse(s["to"])
|
176
|
-
(from..to).cover?(Time.now)
|
177
|
-
}["count"]
|
178
|
-
end
|
179
|
-
|
180
|
-
def overheat?
|
181
|
-
return false unless @reach_max_at
|
182
|
-
(Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @reach_max_at) > cooldown_time_for_reach_max
|
183
|
-
end
|
184
|
-
|
185
|
-
def fetch_service
|
186
|
-
res = client.describe_services(cluster: cluster, services: [name])
|
187
|
-
raise "Service \"#{name}\" is not found" if res.services.empty?
|
188
|
-
res.services[0]
|
189
|
-
rescue => e
|
190
|
-
AutoScaler.error_logger.error(e)
|
191
|
-
end
|
192
|
-
|
193
|
-
def update_service(difference)
|
194
|
-
next_desired_count = desired_count + difference
|
195
|
-
current_level = max_task_level(desired_count)
|
196
|
-
next_level = max_task_level(next_desired_count)
|
197
|
-
if current_level < next_level && overheat? # next max
|
198
|
-
level = next_level
|
199
|
-
@reach_max_at = nil
|
200
|
-
AutoScaler.logger.info "Service \"#{name}\" is overheat, uses next max count"
|
201
|
-
elsif current_level < next_level && !overheat? # wait cooldown
|
202
|
-
level = current_level
|
203
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
204
|
-
@reach_max_at ||= now
|
205
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
206
|
-
elsif current_level == next_level && next_desired_count >= max_task_count[current_level] # reach current max
|
207
|
-
level = current_level
|
208
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
209
|
-
@reach_max_at ||= now
|
210
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
211
|
-
elsif current_level == next_level && next_desired_count < max_task_count[current_level]
|
212
|
-
level = current_level
|
213
|
-
@reach_max_at = nil
|
214
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
215
|
-
elsif current_level > next_level
|
216
|
-
level = next_level
|
217
|
-
@reach_max_at = nil
|
218
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
219
|
-
end
|
220
|
-
|
221
|
-
cl = client
|
222
|
-
next_desired_count = [next_desired_count, max_task_count[level]].min
|
223
|
-
cl.update_service(
|
224
|
-
cluster: cluster,
|
225
|
-
service: name,
|
226
|
-
desired_count: next_desired_count,
|
227
|
-
)
|
228
|
-
cl.wait_until(:services_stable, cluster: cluster, services: [name]) do |w|
|
229
|
-
w.before_wait do
|
230
|
-
AutoScaler.logger.debug "wait service stable [#{name}]"
|
231
|
-
end
|
232
|
-
end if difference < 0
|
233
|
-
@last_updated_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
234
|
-
self.desired_count = next_desired_count
|
235
|
-
AutoScaler.logger.info "Update service \"#{name}\": desired_count -> #{next_desired_count}"
|
236
|
-
rescue => e
|
237
|
-
AutoScaler.error_logger.error(e)
|
238
|
-
end
|
239
|
-
|
240
|
-
def fetch_container_instances
|
241
|
-
arns = []
|
242
|
-
resp = nil
|
243
|
-
cl = client
|
244
|
-
loop do
|
245
|
-
options = {cluster: cluster}
|
246
|
-
options.merge(next_token: resp.next_token) if resp && resp.next_token
|
247
|
-
resp = cl.list_container_instances(options)
|
248
|
-
arns.concat(resp.container_instance_arns)
|
249
|
-
break unless resp.next_token
|
250
|
-
end
|
251
|
-
|
252
|
-
chunk_size = 50
|
253
|
-
container_instances = []
|
254
|
-
arns.each_slice(chunk_size) do |arn_chunk|
|
255
|
-
is = cl.describe_container_instances(cluster: cluster, container_instances: arn_chunk).container_instances
|
256
|
-
container_instances.concat(is)
|
257
|
-
end
|
258
|
-
|
259
|
-
container_instances
|
260
|
-
end
|
261
|
-
|
262
|
-
private
|
263
|
-
|
264
|
-
def max_task_level(count)
|
265
|
-
max_task_count.index { |i| count <= i } || max_task_count.size - 1
|
266
|
-
end
|
267
|
-
end
|
268
|
-
|
269
|
-
TriggerConfig = Struct.new(:alarm_name, :region, :state, :step) do
|
270
|
-
include ConfigBase
|
271
|
-
|
272
|
-
def client
|
273
|
-
Aws::CloudWatch::Client.new(
|
274
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
275
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
276
|
-
region: region
|
277
|
-
)
|
278
|
-
end
|
279
|
-
|
280
|
-
def match?
|
281
|
-
fetch_alarm.state_value == state
|
282
|
-
end
|
283
|
-
|
284
|
-
def fetch_alarm
|
285
|
-
res = client.describe_alarms(alarm_names: [alarm_name])
|
286
|
-
|
287
|
-
raise "Alarm \"#{alarm_name}\" is not found" if res.metric_alarms.empty?
|
288
|
-
res.metric_alarms[0].tap do |alarm|
|
289
|
-
AutoScaler.logger.debug("#{alarm.alarm_name} state is #{alarm.state_value}")
|
290
|
-
end
|
291
|
-
rescue => e
|
292
|
-
AutoScaler.error_logger.error(e)
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
AutoScalingConfig = Struct.new(:name, :region, :buffer) do
|
297
|
-
include ConfigBase
|
298
|
-
|
299
|
-
def client
|
300
|
-
Aws::AutoScaling::Client.new(
|
301
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
302
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
303
|
-
region: region
|
304
|
-
)
|
305
|
-
end
|
306
|
-
|
307
|
-
def ec2_client
|
308
|
-
Aws::EC2::Client.new(
|
309
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
310
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
311
|
-
region: region
|
312
|
-
)
|
313
|
-
end
|
314
|
-
|
315
|
-
def instances(reload: false)
|
316
|
-
if reload || @instances.nil?
|
317
|
-
resp = client.describe_auto_scaling_groups({
|
318
|
-
auto_scaling_group_names: [name],
|
319
|
-
})
|
320
|
-
@instances = resp.auto_scaling_groups[0].instances
|
321
|
-
else
|
322
|
-
@instances
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
def update_auto_scaling_group(total_service_count, service_config)
|
327
|
-
desired_capacity = total_service_count + buffer.to_i
|
328
|
-
|
329
|
-
current_asg = client.describe_auto_scaling_groups({
|
330
|
-
auto_scaling_group_names: [name],
|
331
|
-
}).auto_scaling_groups[0]
|
332
|
-
|
333
|
-
if current_asg.desired_capacity > desired_capacity
|
334
|
-
diff = current_asg.desired_capacity - desired_capacity
|
335
|
-
container_instances = service_config.fetch_container_instances
|
336
|
-
deregisterable_instances = container_instances.select do |i|
|
337
|
-
i.pending_tasks_count == 0 && i.running_tasks_count == 0
|
338
|
-
end
|
339
|
-
|
340
|
-
AutoScaler.logger.info "Fetch deregisterable instances: #{deregisterable_instances.map(&:ec2_instance_id).inspect}"
|
341
|
-
|
342
|
-
deregistered_instance_ids = []
|
343
|
-
deregisterable_instances.each do |i|
|
344
|
-
break if deregistered_instance_ids.size >= diff
|
345
|
-
|
346
|
-
begin
|
347
|
-
service_config.client.deregister_container_instance(cluster: service_config.cluster, container_instance: i.container_instance_arn, force: false)
|
348
|
-
deregistered_instance_ids << i.ec2_instance_id
|
349
|
-
rescue Aws::ECS::Errors::InvalidParameterException
|
350
|
-
end
|
351
|
-
end
|
352
|
-
|
353
|
-
AutoScaler.logger.info "Deregistered instances: #{deregistered_instance_ids.inspect}"
|
354
|
-
|
355
|
-
detach_and_terminate_instances(deregistered_instance_ids)
|
356
|
-
|
357
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
358
|
-
elsif current_asg.desired_capacity < desired_capacity
|
359
|
-
client.update_auto_scaling_group(
|
360
|
-
auto_scaling_group_name: name,
|
361
|
-
min_size: 0,
|
362
|
-
max_size: [current_asg.max_size, desired_capacity].max,
|
363
|
-
desired_capacity: desired_capacity,
|
364
|
-
)
|
365
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
366
|
-
end
|
367
|
-
rescue => e
|
368
|
-
AutoScaler.error_logger.error(e)
|
369
|
-
end
|
370
|
-
|
371
|
-
def detach_and_terminate_instances(instance_ids)
|
372
|
-
return if instance_ids.empty?
|
373
|
-
|
374
|
-
client.detach_instances(
|
375
|
-
auto_scaling_group_name: name,
|
376
|
-
instance_ids: instance_ids,
|
377
|
-
should_decrement_desired_capacity: true
|
378
|
-
)
|
379
|
-
|
380
|
-
AutoScaler.logger.info "Detach instances from ASG #{name}: #{instance_ids.inspect}"
|
381
|
-
sleep 3
|
382
|
-
|
383
|
-
ec2_client.terminate_instances(instance_ids: instance_ids)
|
384
|
-
|
385
|
-
AutoScaler.logger.info "Terminated instances: #{instance_ids.inspect}"
|
386
|
-
rescue => e
|
387
|
-
AutoScaler.error_logger.error(e)
|
388
|
-
end
|
389
|
-
|
390
|
-
def detach_and_terminate_orphan_instances(service_config)
|
391
|
-
container_instance_ids = service_config.fetch_container_instances.map(&:ec2_instance_id)
|
392
|
-
orphans = instances(reload: true).reject { |i| container_instance_ids.include?(i.instance_id) }.map(&:instance_id)
|
393
|
-
|
394
|
-
return if orphans.empty?
|
395
|
-
|
396
|
-
targets = ec2_client.describe_instances(instance_ids: orphans).reservations[0].instances.select do |i|
|
397
|
-
(Time.now - i.launch_time) > 600
|
398
|
-
end
|
399
|
-
|
400
|
-
detach_and_terminate_instances(targets.map(&:instance_id))
|
401
|
-
rescue => e
|
402
|
-
AutoScaler.error_logger.error(e)
|
403
|
-
end
|
404
|
-
end
|
405
171
|
end
|
406
172
|
end
|