ecs_deploy 0.2.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +132 -0
- data/README.md +291 -28
- data/Rakefile +4 -0
- data/ecs_deploy.gemspec +9 -3
- data/lib/ecs_deploy.rb +2 -1
- data/lib/ecs_deploy/auto_scaler.rb +107 -358
- data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
- data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
- data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
- data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
- data/lib/ecs_deploy/auto_scaler/service_config.rb +223 -0
- data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
- data/lib/ecs_deploy/auto_scaler/trigger_config.rb +42 -0
- data/lib/ecs_deploy/capistrano.rb +108 -11
- data/lib/ecs_deploy/configuration.rb +6 -2
- data/lib/ecs_deploy/instance_fluctuation_manager.rb +195 -0
- data/lib/ecs_deploy/scheduled_task.rb +101 -0
- data/lib/ecs_deploy/service.rb +99 -20
- data/lib/ecs_deploy/task_definition.rb +37 -47
- data/lib/ecs_deploy/version.rb +1 -1
- metadata +114 -14
data/Rakefile
CHANGED
data/ecs_deploy.gemspec
CHANGED
@@ -18,10 +18,16 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_runtime_dependency "aws-sdk", "~>
|
21
|
+
spec.add_runtime_dependency "aws-sdk-autoscaling", "~> 1"
|
22
|
+
spec.add_runtime_dependency "aws-sdk-cloudwatch", "~> 1"
|
23
|
+
spec.add_runtime_dependency "aws-sdk-cloudwatchevents", "~> 1"
|
24
|
+
spec.add_runtime_dependency "aws-sdk-ec2", "~> 1"
|
25
|
+
spec.add_runtime_dependency "aws-sdk-ecs", "~> 1"
|
26
|
+
spec.add_runtime_dependency "aws-sdk-sqs", "~> 1"
|
22
27
|
spec.add_runtime_dependency "terminal-table"
|
23
28
|
spec.add_runtime_dependency "paint"
|
24
29
|
|
25
|
-
spec.add_development_dependency "bundler", "
|
26
|
-
spec.add_development_dependency "rake", "
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.11", "< 3"
|
31
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
33
|
end
|
data/lib/ecs_deploy.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "ecs_deploy/version"
|
2
2
|
require "ecs_deploy/configuration"
|
3
3
|
|
4
|
-
require 'aws-sdk'
|
4
|
+
require 'aws-sdk-ecs'
|
5
5
|
require 'logger'
|
6
6
|
require 'terminal-table'
|
7
7
|
require 'paint'
|
@@ -27,3 +27,4 @@ end
|
|
27
27
|
|
28
28
|
require "ecs_deploy/task_definition"
|
29
29
|
require "ecs_deploy/service"
|
30
|
+
require "ecs_deploy/scheduled_task"
|
@@ -1,6 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "logger"
|
2
|
+
require "time"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
require "ecs_deploy/auto_scaler/auto_scaling_group_config"
|
6
|
+
require "ecs_deploy/auto_scaler/instance_drainer"
|
7
|
+
require "ecs_deploy/auto_scaler/service_config"
|
8
|
+
require "ecs_deploy/auto_scaler/spot_fleet_request_config"
|
4
9
|
|
5
10
|
module EcsDeploy
|
6
11
|
module AutoScaler
|
@@ -8,8 +13,8 @@ module EcsDeploy
|
|
8
13
|
attr_reader :logger, :error_logger
|
9
14
|
|
10
15
|
def run(yaml_path, log_file = nil, error_log_file = nil)
|
11
|
-
|
12
|
-
|
16
|
+
@enable_auto_scaling = true
|
17
|
+
setup_signal_handlers
|
13
18
|
@logger = Logger.new(log_file || STDOUT)
|
14
19
|
@logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
15
20
|
STDOUT.sync = true unless log_file
|
@@ -17,86 +22,129 @@ module EcsDeploy
|
|
17
22
|
@error_logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
18
23
|
STDERR.sync = true unless error_log_file
|
19
24
|
load_config(yaml_path)
|
20
|
-
service_configs
|
21
|
-
auto_scaling_group_configs
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
ths = (auto_scaling_group_configs + spot_fleet_request_configs).map do |cluster_scaling_config|
|
27
|
+
Thread.new(cluster_scaling_config, &method(:main_loop)).tap { |th| th.abort_on_exception = true }
|
28
|
+
end
|
29
|
+
|
30
|
+
if @config["spot_instance_intrp_warns_queue_urls"]
|
31
|
+
drainer = EcsDeploy::AutoScaler::InstanceDrainer.new(
|
32
|
+
auto_scaling_group_configs: auto_scaling_group_configs,
|
33
|
+
spot_fleet_request_configs: spot_fleet_request_configs,
|
34
|
+
logger: logger,
|
35
|
+
)
|
36
|
+
polling_ths = @config["spot_instance_intrp_warns_queue_urls"].map do |queue_url|
|
37
|
+
Thread.new(queue_url) do |url|
|
38
|
+
drainer.poll_spot_instance_interruption_warnings(url)
|
39
|
+
end.tap { |th| th.abort_on_exception = true }
|
40
|
+
end
|
27
41
|
end
|
28
42
|
|
29
43
|
ths.each(&:join)
|
44
|
+
|
45
|
+
drainer&.stop
|
46
|
+
polling_ths&.each(&:join)
|
30
47
|
end
|
31
48
|
|
32
|
-
def main_loop(
|
33
|
-
loop_with_polling_interval("loop of #{
|
34
|
-
ths =
|
49
|
+
def main_loop(cluster_scaling_config)
|
50
|
+
loop_with_polling_interval("loop of #{cluster_scaling_config.name}") do
|
51
|
+
ths = cluster_scaling_config.service_configs.map do |service_config|
|
35
52
|
Thread.new(service_config) do |s|
|
36
|
-
next if s.idle?
|
37
|
-
|
38
53
|
@logger.debug "Start service scaling of #{s.name}"
|
39
|
-
|
40
|
-
difference = 0
|
41
|
-
s.upscale_triggers.each do |trigger|
|
42
|
-
step = trigger.step || s.step
|
43
|
-
next if difference >= step
|
44
|
-
|
45
|
-
if trigger.match?
|
46
|
-
logger.info "Fire upscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
47
|
-
difference = step
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
if difference == 0 && s.desired_count > s.current_min_task_count
|
52
|
-
s.downscale_triggers.each do |trigger|
|
53
|
-
next unless trigger.match?
|
54
|
-
|
55
|
-
logger.info "Fire downscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
56
|
-
step = trigger.step || s.step
|
57
|
-
difference = [difference, -step].min
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
if s.current_min_task_count > s.desired_count + difference
|
62
|
-
difference = s.current_min_task_count - s.desired_count
|
63
|
-
end
|
64
|
-
|
65
|
-
if difference >= 0 && s.desired_count > s.max_task_count.max
|
66
|
-
difference = s.max_task_count.max - s.desired_count
|
67
|
-
end
|
68
|
-
|
69
|
-
if difference != 0
|
70
|
-
s.update_service(difference)
|
71
|
-
end
|
54
|
+
s.adjust_desired_count(cluster_scaling_config.cluster_resource_manager)
|
72
55
|
end
|
73
56
|
end
|
57
|
+
ths.each { |th| th.abort_on_exception = true }
|
74
58
|
|
75
59
|
ths.each(&:join)
|
76
60
|
|
77
|
-
@logger.debug "Start
|
61
|
+
@logger.debug "Start cluster scaling of #{cluster_scaling_config.name}"
|
78
62
|
|
79
|
-
|
80
|
-
|
81
|
-
|
63
|
+
required_capacity = cluster_scaling_config.service_configs.sum { |s| s.desired_count * s.required_capacity }
|
64
|
+
cluster_scaling_config.update_desired_capacity(required_capacity)
|
65
|
+
|
66
|
+
cluster_scaling_config.service_configs.each(&:wait_until_desired_count_updated)
|
82
67
|
end
|
83
68
|
end
|
84
69
|
|
85
70
|
def load_config(yaml_path)
|
86
71
|
@config = YAML.load_file(yaml_path)
|
87
72
|
@polling_interval = @config["polling_interval"] || 30
|
88
|
-
|
73
|
+
if @config["services"]
|
74
|
+
@error_logger&.warn('"services" property in root-level is deprecated. Please define it in "auto_scaling_groups" property or "spot_fleet_requests" property.')
|
75
|
+
@config.delete("services").each do |svc|
|
76
|
+
if svc["auto_scaling_group_name"] && svc["spot_fleet_request_id"]
|
77
|
+
raise "You can specify only one of 'auto_scaling_group_name' or 'spot_fleet_request_name'"
|
78
|
+
end
|
79
|
+
|
80
|
+
svc_region = svc.delete("region")
|
81
|
+
if svc["auto_scaling_group_name"]
|
82
|
+
asg_name = svc.delete("auto_scaling_group_name")
|
83
|
+
asg = @config["auto_scaling_groups"].find { |g| g["region"] == svc_region && g["name"] == asg_name }
|
84
|
+
asg["services"] ||= []
|
85
|
+
asg["services"] << svc
|
86
|
+
asg["cluster"] = svc.delete("cluster")
|
87
|
+
end
|
89
88
|
|
90
|
-
|
91
|
-
|
89
|
+
if svc["spot_fleet_request_id"]
|
90
|
+
sfr_id = svc.delete("spot_fleet_request_id")
|
91
|
+
sfr = @config["spot_fleet_requests"].find { |r| r["region"] == svc_region && r["id"] == sfr_id }
|
92
|
+
sfr["services"] ||= []
|
93
|
+
sfr["services"] << svc
|
94
|
+
sfr["cluster"] = svc.delete("cluster")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
92
98
|
end
|
93
99
|
|
94
100
|
def auto_scaling_group_configs
|
95
|
-
@auto_scaling_group_configs ||= @config["auto_scaling_groups"].
|
101
|
+
@auto_scaling_group_configs ||= (@config["auto_scaling_groups"] || []).each.with_object({}) do |c, configs|
|
102
|
+
configs[c["name"]] ||= {}
|
103
|
+
if configs[c["name"]][c["region"]]
|
104
|
+
raise "Duplicate entry in auto_scaling_groups (name: #{c["name"]}, region: #{c["region"]})"
|
105
|
+
end
|
106
|
+
configs[c["name"]][c["region"]] = AutoScalingGroupConfig.new(c, @logger)
|
107
|
+
end.values.flat_map(&:values)
|
108
|
+
end
|
109
|
+
|
110
|
+
def spot_fleet_request_configs
|
111
|
+
@spot_fleet_request_configs ||= (@config["spot_fleet_requests"] || []).each.with_object({}) do |c, configs|
|
112
|
+
configs[c["id"]] ||= {}
|
113
|
+
if configs[c["id"]][c["region"]]
|
114
|
+
raise "Duplicate entry in spot_fleet_requests (id: #{c["id"]}, region: #{c["region"]})"
|
115
|
+
end
|
116
|
+
configs[c["id"]][c["region"]] = SpotFleetRequestConfig.new(c, @logger)
|
117
|
+
end.values.flat_map(&:values)
|
96
118
|
end
|
97
119
|
|
98
120
|
private
|
99
121
|
|
122
|
+
def setup_signal_handlers
|
123
|
+
# Use a thread and a queue to avoid "log writing failed. can't be called from trap context"
|
124
|
+
# cf. https://bugs.ruby-lang.org/issues/14222#note-3
|
125
|
+
signals = Queue.new
|
126
|
+
%i(TERM INT CONT TSTP).each do |sig|
|
127
|
+
trap(sig) { signals << sig }
|
128
|
+
end
|
129
|
+
|
130
|
+
Thread.new do
|
131
|
+
loop do
|
132
|
+
sig = signals.pop
|
133
|
+
case sig
|
134
|
+
when :INT, :TERM
|
135
|
+
@logger.info "Received SIG#{sig}, shutting down gracefully"
|
136
|
+
@stop = true
|
137
|
+
when :CONT
|
138
|
+
@logger.info "Received SIGCONT, resume auto scaling"
|
139
|
+
@enable_auto_scaling = true
|
140
|
+
when :TSTP
|
141
|
+
@logger.info "Received SIGTSTP, pause auto scaling. Send SIGCONT to resume it."
|
142
|
+
@enable_auto_scaling = false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
100
148
|
def wait_polling_interval?(last_executed_at)
|
101
149
|
current = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
102
150
|
diff = current - last_executed_at
|
@@ -110,314 +158,15 @@ module EcsDeploy
|
|
110
158
|
loop do
|
111
159
|
break if @stop
|
112
160
|
sleep 1
|
161
|
+
next unless @enable_auto_scaling
|
113
162
|
next if wait_polling_interval?(last_executed_at)
|
114
163
|
yield
|
115
164
|
last_executed_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
165
|
+
@logger.debug "#{name} is last executed at #{last_executed_at}"
|
116
166
|
end
|
117
167
|
|
118
168
|
@logger.debug "Stop #{name}"
|
119
169
|
end
|
120
170
|
end
|
121
|
-
|
122
|
-
module ConfigBase
|
123
|
-
def initialize(attributes = {})
|
124
|
-
attributes.each do |key, val|
|
125
|
-
send("#{key}=", val)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
SERVICE_CONFIG_ATTRIBUTES = %i(name cluster region auto_scaling_group_name step max_task_count min_task_count idle_time scheduled_min_task_count cooldown_time_for_reach_max upscale_triggers downscale_triggers desired_count)
|
131
|
-
ServiceConfig = Struct.new(*SERVICE_CONFIG_ATTRIBUTES) do
|
132
|
-
include ConfigBase
|
133
|
-
|
134
|
-
def initialize(attributes = {})
|
135
|
-
super(attributes)
|
136
|
-
self.idle_time ||= 60
|
137
|
-
self.max_task_count = Array(max_task_count)
|
138
|
-
self.upscale_triggers = upscale_triggers.to_a.map do |t|
|
139
|
-
TriggerConfig.new(t.merge(region: region))
|
140
|
-
end
|
141
|
-
self.downscale_triggers = downscale_triggers.to_a.map do |t|
|
142
|
-
TriggerConfig.new(t.merge(region: region))
|
143
|
-
end
|
144
|
-
self.max_task_count.sort!
|
145
|
-
self.desired_count = fetch_service.desired_count
|
146
|
-
@reach_max_at = nil
|
147
|
-
@last_updated_at = nil
|
148
|
-
end
|
149
|
-
|
150
|
-
def client
|
151
|
-
Thread.current["ecs_auto_scaler_ecs_#{region}"] ||= Aws::ECS::Client.new(
|
152
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
153
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
154
|
-
region: region
|
155
|
-
)
|
156
|
-
end
|
157
|
-
|
158
|
-
def clear_client
|
159
|
-
Thread.current["ecs_auto_scaler_ecs_#{region}"] = nil
|
160
|
-
end
|
161
|
-
|
162
|
-
def idle?
|
163
|
-
return false unless @last_updated_at
|
164
|
-
|
165
|
-
diff = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @last_updated_at
|
166
|
-
diff < idle_time
|
167
|
-
end
|
168
|
-
|
169
|
-
def current_min_task_count
|
170
|
-
return min_task_count if scheduled_min_task_count.nil? || scheduled_min_task_count.empty?
|
171
|
-
|
172
|
-
scheduled_min_task_count.find(-> { {"count" => min_task_count} }) { |s|
|
173
|
-
from = Time.parse(s["from"])
|
174
|
-
to = Time.parse(s["to"])
|
175
|
-
(from..to).cover?(Time.now)
|
176
|
-
}["count"]
|
177
|
-
end
|
178
|
-
|
179
|
-
def overheat?
|
180
|
-
return false unless @reach_max_at
|
181
|
-
(Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @reach_max_at) > cooldown_time_for_reach_max
|
182
|
-
end
|
183
|
-
|
184
|
-
def fetch_service
|
185
|
-
res = client.describe_services(cluster: cluster, services: [name])
|
186
|
-
raise "Service \"#{name}\" is not found" if res.services.empty?
|
187
|
-
res.services[0]
|
188
|
-
rescue => e
|
189
|
-
AutoScaler.error_logger.error(e)
|
190
|
-
clear_client
|
191
|
-
end
|
192
|
-
|
193
|
-
def update_service(difference)
|
194
|
-
next_desired_count = desired_count + difference
|
195
|
-
current_level = max_task_level(desired_count)
|
196
|
-
next_level = max_task_level(next_desired_count)
|
197
|
-
if current_level < next_level && overheat? # next max
|
198
|
-
level = next_level
|
199
|
-
@reach_max_at = nil
|
200
|
-
AutoScaler.logger.info "Service \"#{name}\" is overheat, uses next max count"
|
201
|
-
elsif current_level < next_level && !overheat? # wait cooldown
|
202
|
-
level = current_level
|
203
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
204
|
-
@reach_max_at ||= now
|
205
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
206
|
-
elsif current_level == next_level && next_desired_count >= max_task_count[current_level] # reach current max
|
207
|
-
level = current_level
|
208
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
209
|
-
@reach_max_at ||= now
|
210
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
211
|
-
elsif current_level == next_level && next_desired_count < max_task_count[current_level]
|
212
|
-
level = current_level
|
213
|
-
@reach_max_at = nil
|
214
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
215
|
-
elsif current_level > next_level
|
216
|
-
level = next_level
|
217
|
-
@reach_max_at = nil
|
218
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
219
|
-
end
|
220
|
-
|
221
|
-
next_desired_count = [next_desired_count, max_task_count[level]].min
|
222
|
-
client.update_service(
|
223
|
-
cluster: cluster,
|
224
|
-
service: name,
|
225
|
-
desired_count: next_desired_count,
|
226
|
-
)
|
227
|
-
client.wait_until(:services_stable, cluster: cluster, services: [name]) do |w|
|
228
|
-
w.before_wait do
|
229
|
-
AutoScaler.logger.debug "wait service stable [#{name}]"
|
230
|
-
end
|
231
|
-
end if difference < 0
|
232
|
-
@last_updated_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
233
|
-
self.desired_count = next_desired_count
|
234
|
-
AutoScaler.logger.info "Update service \"#{name}\": desired_count -> #{next_desired_count}"
|
235
|
-
rescue => e
|
236
|
-
AutoScaler.error_logger.error(e)
|
237
|
-
clear_client
|
238
|
-
end
|
239
|
-
|
240
|
-
def fetch_container_instances
|
241
|
-
arns = []
|
242
|
-
resp = nil
|
243
|
-
loop do
|
244
|
-
options = {cluster: cluster}
|
245
|
-
options.merge(next_token: resp.next_token) if resp && resp.next_token
|
246
|
-
resp = client.list_container_instances(options)
|
247
|
-
arns.concat(resp.container_instance_arns)
|
248
|
-
break unless resp.next_token
|
249
|
-
end
|
250
|
-
|
251
|
-
chunk_size = 50
|
252
|
-
container_instances = []
|
253
|
-
arns.each_slice(chunk_size) do |arn_chunk|
|
254
|
-
is = client.describe_container_instances(cluster: cluster, container_instances: arn_chunk).container_instances
|
255
|
-
container_instances.concat(is)
|
256
|
-
end
|
257
|
-
|
258
|
-
container_instances
|
259
|
-
end
|
260
|
-
|
261
|
-
private
|
262
|
-
|
263
|
-
def max_task_level(count)
|
264
|
-
max_task_count.index { |i| count <= i } || max_task_count.size - 1
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
TriggerConfig = Struct.new(:alarm_name, :region, :state, :step) do
|
269
|
-
include ConfigBase
|
270
|
-
|
271
|
-
def client
|
272
|
-
Thread.current["ecs_auto_scaler_cloud_watch_#{region}"] ||= Aws::CloudWatch::Client.new(
|
273
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
274
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
275
|
-
region: region
|
276
|
-
)
|
277
|
-
end
|
278
|
-
|
279
|
-
def clear_client
|
280
|
-
Thread.current["ecs_auto_scaler_cloud_watch_#{region}"] = nil
|
281
|
-
end
|
282
|
-
|
283
|
-
def match?
|
284
|
-
fetch_alarm.state_value == state
|
285
|
-
end
|
286
|
-
|
287
|
-
def fetch_alarm
|
288
|
-
res = client.describe_alarms(alarm_names: [alarm_name])
|
289
|
-
|
290
|
-
raise "Alarm \"#{alarm_name}\" is not found" if res.metric_alarms.empty?
|
291
|
-
res.metric_alarms[0].tap do |alarm|
|
292
|
-
AutoScaler.logger.debug("#{alarm.alarm_name} state is #{alarm.state_value}")
|
293
|
-
end
|
294
|
-
rescue => e
|
295
|
-
AutoScaler.error_logger.error(e)
|
296
|
-
clear_client
|
297
|
-
end
|
298
|
-
end
|
299
|
-
|
300
|
-
AutoScalingConfig = Struct.new(:name, :region, :buffer) do
|
301
|
-
include ConfigBase
|
302
|
-
|
303
|
-
def client
|
304
|
-
Thread.current["ecs_auto_scaler_auto_scaling_#{region}"] ||= Aws::AutoScaling::Client.new(
|
305
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
306
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
307
|
-
region: region
|
308
|
-
)
|
309
|
-
end
|
310
|
-
|
311
|
-
def clear_client
|
312
|
-
Thread.current["ecs_auto_scaler_auto_scaling_#{region}"] = nil
|
313
|
-
end
|
314
|
-
|
315
|
-
def ec2_client
|
316
|
-
Thread.current["ecs_auto_scaler_ec2_#{region}"] ||= Aws::EC2::Client.new(
|
317
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
318
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
319
|
-
region: region
|
320
|
-
)
|
321
|
-
end
|
322
|
-
|
323
|
-
def clear_ec2_client
|
324
|
-
Thread.current["ecs_auto_scaler_ec2_#{region}"] = nil
|
325
|
-
end
|
326
|
-
|
327
|
-
def instances(reload: false)
|
328
|
-
if reload || @instances.nil?
|
329
|
-
resp = client.describe_auto_scaling_groups({
|
330
|
-
auto_scaling_group_names: [name],
|
331
|
-
})
|
332
|
-
@instances = resp.auto_scaling_groups[0].instances
|
333
|
-
else
|
334
|
-
@instances
|
335
|
-
end
|
336
|
-
end
|
337
|
-
|
338
|
-
def update_auto_scaling_group(total_service_count, service_config)
|
339
|
-
desired_capacity = total_service_count + buffer.to_i
|
340
|
-
|
341
|
-
current_asg = client.describe_auto_scaling_groups({
|
342
|
-
auto_scaling_group_names: [name],
|
343
|
-
}).auto_scaling_groups[0]
|
344
|
-
|
345
|
-
if current_asg.desired_capacity > desired_capacity
|
346
|
-
diff = current_asg.desired_capacity - desired_capacity
|
347
|
-
container_instances = service_config.fetch_container_instances
|
348
|
-
deregisterable_instances = container_instances.select do |i|
|
349
|
-
i.pending_tasks_count == 0 && i.running_tasks_count == 0
|
350
|
-
end
|
351
|
-
|
352
|
-
AutoScaler.logger.info "Fetch deregisterable instances: #{deregisterable_instances.map(&:ec2_instance_id).inspect}"
|
353
|
-
|
354
|
-
deregistered_instance_ids = []
|
355
|
-
deregisterable_instances.each do |i|
|
356
|
-
break if deregistered_instance_ids.size >= diff
|
357
|
-
|
358
|
-
begin
|
359
|
-
service_config.client.deregister_container_instance(cluster: service_config.cluster, container_instance: i.container_instance_arn, force: false)
|
360
|
-
deregistered_instance_ids << i.ec2_instance_id
|
361
|
-
rescue Aws::ECS::Errors::InvalidParameterException
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
AutoScaler.logger.info "Deregistered instances: #{deregistered_instance_ids.inspect}"
|
366
|
-
|
367
|
-
detach_and_terminate_instances(deregistered_instance_ids)
|
368
|
-
|
369
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
370
|
-
elsif current_asg.desired_capacity < desired_capacity
|
371
|
-
client.update_auto_scaling_group(
|
372
|
-
auto_scaling_group_name: name,
|
373
|
-
min_size: 0,
|
374
|
-
max_size: [current_asg.max_size, desired_capacity].max,
|
375
|
-
desired_capacity: desired_capacity,
|
376
|
-
)
|
377
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
378
|
-
end
|
379
|
-
rescue => e
|
380
|
-
AutoScaler.error_logger.error(e)
|
381
|
-
clear_client
|
382
|
-
end
|
383
|
-
|
384
|
-
def detach_and_terminate_instances(instance_ids)
|
385
|
-
return if instance_ids.empty?
|
386
|
-
|
387
|
-
client.detach_instances(
|
388
|
-
auto_scaling_group_name: name,
|
389
|
-
instance_ids: instance_ids,
|
390
|
-
should_decrement_desired_capacity: true
|
391
|
-
)
|
392
|
-
|
393
|
-
AutoScaler.logger.info "Detach instances from ASG #{name}: #{instance_ids.inspect}"
|
394
|
-
sleep 3
|
395
|
-
|
396
|
-
ec2_client.terminate_instances(instance_ids: instance_ids)
|
397
|
-
|
398
|
-
AutoScaler.logger.info "Terminated instances: #{instance_ids.inspect}"
|
399
|
-
rescue => e
|
400
|
-
AutoScaler.error_logger.error(e)
|
401
|
-
clear_client
|
402
|
-
clear_ec2_client
|
403
|
-
end
|
404
|
-
|
405
|
-
def detach_and_terminate_orphan_instances(service_config)
|
406
|
-
container_instance_ids = service_config.fetch_container_instances.map(&:ec2_instance_id)
|
407
|
-
orphans = instances(reload: true).reject { |i| container_instance_ids.include?(i.instance_id) }.map(&:instance_id)
|
408
|
-
|
409
|
-
return if orphans.empty?
|
410
|
-
|
411
|
-
targets = ec2_client.describe_instances(instance_ids: orphans).reservations[0].instances.select do |i|
|
412
|
-
(Time.now - i.launch_time) > 600
|
413
|
-
end
|
414
|
-
|
415
|
-
detach_and_terminate_instances(targets.map(&:instance_id))
|
416
|
-
rescue => e
|
417
|
-
AutoScaler.error_logger.error(e)
|
418
|
-
clear_client
|
419
|
-
clear_ec2_client
|
420
|
-
end
|
421
|
-
end
|
422
171
|
end
|
423
172
|
end
|