ecs_deploy 0.3.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +150 -0
- data/README.md +272 -23
- data/Rakefile +4 -0
- data/ecs_deploy.gemspec +9 -3
- data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
- data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
- data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
- data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
- data/lib/ecs_deploy/auto_scaler/service_config.rb +223 -0
- data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
- data/lib/ecs_deploy/auto_scaler/trigger_config.rb +42 -0
- data/lib/ecs_deploy/auto_scaler.rb +105 -339
- data/lib/ecs_deploy/capistrano.rb +73 -3
- data/lib/ecs_deploy/configuration.rb +6 -2
- data/lib/ecs_deploy/instance_fluctuation_manager.rb +198 -0
- data/lib/ecs_deploy/scheduled_task.rb +15 -3
- data/lib/ecs_deploy/service.rb +100 -21
- data/lib/ecs_deploy/task_definition.rb +30 -9
- data/lib/ecs_deploy/version.rb +1 -1
- data/lib/ecs_deploy.rb +1 -1
- metadata +113 -14
@@ -1,6 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "logger"
|
2
|
+
require "time"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
require "ecs_deploy/auto_scaler/auto_scaling_group_config"
|
6
|
+
require "ecs_deploy/auto_scaler/instance_drainer"
|
7
|
+
require "ecs_deploy/auto_scaler/service_config"
|
8
|
+
require "ecs_deploy/auto_scaler/spot_fleet_request_config"
|
4
9
|
|
5
10
|
module EcsDeploy
|
6
11
|
module AutoScaler
|
@@ -8,8 +13,8 @@ module EcsDeploy
|
|
8
13
|
attr_reader :logger, :error_logger
|
9
14
|
|
10
15
|
def run(yaml_path, log_file = nil, error_log_file = nil)
|
11
|
-
|
12
|
-
|
16
|
+
@enable_auto_scaling = true
|
17
|
+
setup_signal_handlers
|
13
18
|
@logger = Logger.new(log_file || STDOUT)
|
14
19
|
@logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
15
20
|
STDOUT.sync = true unless log_file
|
@@ -17,90 +22,129 @@ module EcsDeploy
|
|
17
22
|
@error_logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
|
18
23
|
STDERR.sync = true unless error_log_file
|
19
24
|
load_config(yaml_path)
|
20
|
-
service_configs
|
21
|
-
auto_scaling_group_configs
|
22
25
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
ths = (auto_scaling_group_configs + spot_fleet_request_configs).map do |cluster_scaling_config|
|
27
|
+
Thread.new(cluster_scaling_config, &method(:main_loop)).tap { |th| th.abort_on_exception = true }
|
28
|
+
end
|
29
|
+
|
30
|
+
if @config["spot_instance_intrp_warns_queue_urls"]
|
31
|
+
drainer = EcsDeploy::AutoScaler::InstanceDrainer.new(
|
32
|
+
auto_scaling_group_configs: auto_scaling_group_configs,
|
33
|
+
spot_fleet_request_configs: spot_fleet_request_configs,
|
34
|
+
logger: logger,
|
35
|
+
)
|
36
|
+
polling_ths = @config["spot_instance_intrp_warns_queue_urls"].map do |queue_url|
|
37
|
+
Thread.new(queue_url) do |url|
|
38
|
+
drainer.poll_spot_instance_interruption_warnings(url)
|
39
|
+
end.tap { |th| th.abort_on_exception = true }
|
40
|
+
end
|
27
41
|
end
|
28
42
|
|
29
43
|
ths.each(&:join)
|
44
|
+
|
45
|
+
drainer&.stop
|
46
|
+
polling_ths&.each(&:join)
|
30
47
|
end
|
31
48
|
|
32
|
-
def main_loop(
|
33
|
-
loop_with_polling_interval("loop of #{
|
34
|
-
ths =
|
49
|
+
def main_loop(cluster_scaling_config)
|
50
|
+
loop_with_polling_interval("loop of #{cluster_scaling_config.name}") do
|
51
|
+
ths = cluster_scaling_config.service_configs.map do |service_config|
|
35
52
|
Thread.new(service_config) do |s|
|
36
53
|
@logger.debug "Start service scaling of #{s.name}"
|
37
|
-
|
38
|
-
if s.idle?
|
39
|
-
@logger.debug "#{s.name} is idling"
|
40
|
-
next
|
41
|
-
end
|
42
|
-
|
43
|
-
difference = 0
|
44
|
-
s.upscale_triggers.each do |trigger|
|
45
|
-
step = trigger.step || s.step
|
46
|
-
next if difference >= step
|
47
|
-
|
48
|
-
if trigger.match?
|
49
|
-
logger.info "Fire upscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
50
|
-
difference = step
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
if difference == 0 && s.desired_count > s.current_min_task_count
|
55
|
-
s.downscale_triggers.each do |trigger|
|
56
|
-
next unless trigger.match?
|
57
|
-
|
58
|
-
logger.info "Fire downscale trigger of #{s.name} by #{trigger.alarm_name} #{trigger.state}"
|
59
|
-
step = trigger.step || s.step
|
60
|
-
difference = [difference, -step].min
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
if s.current_min_task_count > s.desired_count + difference
|
65
|
-
difference = s.current_min_task_count - s.desired_count
|
66
|
-
end
|
67
|
-
|
68
|
-
if difference >= 0 && s.desired_count > s.max_task_count.max
|
69
|
-
difference = s.max_task_count.max - s.desired_count
|
70
|
-
end
|
71
|
-
|
72
|
-
if difference != 0
|
73
|
-
s.update_service(difference)
|
74
|
-
end
|
54
|
+
s.adjust_desired_count(cluster_scaling_config.cluster_resource_manager)
|
75
55
|
end
|
76
56
|
end
|
77
57
|
ths.each { |th| th.abort_on_exception = true }
|
78
58
|
|
79
59
|
ths.each(&:join)
|
80
60
|
|
81
|
-
@logger.debug "Start
|
61
|
+
@logger.debug "Start cluster scaling of #{cluster_scaling_config.name}"
|
62
|
+
|
63
|
+
required_capacity = cluster_scaling_config.service_configs.sum { |s| s.desired_count * s.required_capacity }
|
64
|
+
cluster_scaling_config.update_desired_capacity(required_capacity)
|
82
65
|
|
83
|
-
|
84
|
-
asg_config.update_auto_scaling_group(total_service_count, configs[0])
|
85
|
-
asg_config.detach_and_terminate_orphan_instances(configs[0])
|
66
|
+
cluster_scaling_config.service_configs.each(&:wait_until_desired_count_updated)
|
86
67
|
end
|
87
68
|
end
|
88
69
|
|
89
70
|
def load_config(yaml_path)
|
90
71
|
@config = YAML.load_file(yaml_path)
|
91
72
|
@polling_interval = @config["polling_interval"] || 30
|
92
|
-
|
73
|
+
if @config["services"]
|
74
|
+
@error_logger&.warn('"services" property in root-level is deprecated. Please define it in "auto_scaling_groups" property or "spot_fleet_requests" property.')
|
75
|
+
@config.delete("services").each do |svc|
|
76
|
+
if svc["auto_scaling_group_name"] && svc["spot_fleet_request_id"]
|
77
|
+
raise "You can specify only one of 'auto_scaling_group_name' or 'spot_fleet_request_name'"
|
78
|
+
end
|
79
|
+
|
80
|
+
svc_region = svc.delete("region")
|
81
|
+
if svc["auto_scaling_group_name"]
|
82
|
+
asg_name = svc.delete("auto_scaling_group_name")
|
83
|
+
asg = @config["auto_scaling_groups"].find { |g| g["region"] == svc_region && g["name"] == asg_name }
|
84
|
+
asg["services"] ||= []
|
85
|
+
asg["services"] << svc
|
86
|
+
asg["cluster"] = svc.delete("cluster")
|
87
|
+
end
|
93
88
|
|
94
|
-
|
95
|
-
|
89
|
+
if svc["spot_fleet_request_id"]
|
90
|
+
sfr_id = svc.delete("spot_fleet_request_id")
|
91
|
+
sfr = @config["spot_fleet_requests"].find { |r| r["region"] == svc_region && r["id"] == sfr_id }
|
92
|
+
sfr["services"] ||= []
|
93
|
+
sfr["services"] << svc
|
94
|
+
sfr["cluster"] = svc.delete("cluster")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
96
98
|
end
|
97
99
|
|
98
100
|
def auto_scaling_group_configs
|
99
|
-
@auto_scaling_group_configs ||= @config["auto_scaling_groups"].
|
101
|
+
@auto_scaling_group_configs ||= (@config["auto_scaling_groups"] || []).each.with_object({}) do |c, configs|
|
102
|
+
configs[c["name"]] ||= {}
|
103
|
+
if configs[c["name"]][c["region"]]
|
104
|
+
raise "Duplicate entry in auto_scaling_groups (name: #{c["name"]}, region: #{c["region"]})"
|
105
|
+
end
|
106
|
+
configs[c["name"]][c["region"]] = AutoScalingGroupConfig.new(c, @logger)
|
107
|
+
end.values.flat_map(&:values)
|
108
|
+
end
|
109
|
+
|
110
|
+
def spot_fleet_request_configs
|
111
|
+
@spot_fleet_request_configs ||= (@config["spot_fleet_requests"] || []).each.with_object({}) do |c, configs|
|
112
|
+
configs[c["id"]] ||= {}
|
113
|
+
if configs[c["id"]][c["region"]]
|
114
|
+
raise "Duplicate entry in spot_fleet_requests (id: #{c["id"]}, region: #{c["region"]})"
|
115
|
+
end
|
116
|
+
configs[c["id"]][c["region"]] = SpotFleetRequestConfig.new(c, @logger)
|
117
|
+
end.values.flat_map(&:values)
|
100
118
|
end
|
101
119
|
|
102
120
|
private
|
103
121
|
|
122
|
+
def setup_signal_handlers
|
123
|
+
# Use a thread and a queue to avoid "log writing failed. can't be called from trap context"
|
124
|
+
# cf. https://bugs.ruby-lang.org/issues/14222#note-3
|
125
|
+
signals = Queue.new
|
126
|
+
%i(TERM INT CONT TSTP).each do |sig|
|
127
|
+
trap(sig) { signals << sig }
|
128
|
+
end
|
129
|
+
|
130
|
+
Thread.new do
|
131
|
+
loop do
|
132
|
+
sig = signals.pop
|
133
|
+
case sig
|
134
|
+
when :INT, :TERM
|
135
|
+
@logger.info "Received SIG#{sig}, shutting down gracefully"
|
136
|
+
@stop = true
|
137
|
+
when :CONT
|
138
|
+
@logger.info "Received SIGCONT, resume auto scaling"
|
139
|
+
@enable_auto_scaling = true
|
140
|
+
when :TSTP
|
141
|
+
@logger.info "Received SIGTSTP, pause auto scaling. Send SIGCONT to resume it."
|
142
|
+
@enable_auto_scaling = false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
104
148
|
def wait_polling_interval?(last_executed_at)
|
105
149
|
current = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
106
150
|
diff = current - last_executed_at
|
@@ -114,6 +158,7 @@ module EcsDeploy
|
|
114
158
|
loop do
|
115
159
|
break if @stop
|
116
160
|
sleep 1
|
161
|
+
next unless @enable_auto_scaling
|
117
162
|
next if wait_polling_interval?(last_executed_at)
|
118
163
|
yield
|
119
164
|
last_executed_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
@@ -123,284 +168,5 @@ module EcsDeploy
|
|
123
168
|
@logger.debug "Stop #{name}"
|
124
169
|
end
|
125
170
|
end
|
126
|
-
|
127
|
-
module ConfigBase
|
128
|
-
def initialize(attributes = {})
|
129
|
-
attributes.each do |key, val|
|
130
|
-
send("#{key}=", val)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
SERVICE_CONFIG_ATTRIBUTES = %i(name cluster region auto_scaling_group_name step max_task_count min_task_count idle_time scheduled_min_task_count cooldown_time_for_reach_max upscale_triggers downscale_triggers desired_count)
|
136
|
-
ServiceConfig = Struct.new(*SERVICE_CONFIG_ATTRIBUTES) do
|
137
|
-
include ConfigBase
|
138
|
-
|
139
|
-
def initialize(attributes = {})
|
140
|
-
super(attributes)
|
141
|
-
self.idle_time ||= 60
|
142
|
-
self.max_task_count = Array(max_task_count)
|
143
|
-
self.upscale_triggers = upscale_triggers.to_a.map do |t|
|
144
|
-
TriggerConfig.new(t.merge(region: region))
|
145
|
-
end
|
146
|
-
self.downscale_triggers = downscale_triggers.to_a.map do |t|
|
147
|
-
TriggerConfig.new(t.merge(region: region))
|
148
|
-
end
|
149
|
-
self.max_task_count.sort!
|
150
|
-
self.desired_count = fetch_service.desired_count
|
151
|
-
@reach_max_at = nil
|
152
|
-
@last_updated_at = nil
|
153
|
-
end
|
154
|
-
|
155
|
-
def client
|
156
|
-
Aws::ECS::Client.new(
|
157
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
158
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
159
|
-
region: region
|
160
|
-
)
|
161
|
-
end
|
162
|
-
|
163
|
-
def idle?
|
164
|
-
return false unless @last_updated_at
|
165
|
-
|
166
|
-
diff = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @last_updated_at
|
167
|
-
diff < idle_time
|
168
|
-
end
|
169
|
-
|
170
|
-
def current_min_task_count
|
171
|
-
return min_task_count if scheduled_min_task_count.nil? || scheduled_min_task_count.empty?
|
172
|
-
|
173
|
-
scheduled_min_task_count.find(-> { {"count" => min_task_count} }) { |s|
|
174
|
-
from = Time.parse(s["from"])
|
175
|
-
to = Time.parse(s["to"])
|
176
|
-
(from..to).cover?(Time.now)
|
177
|
-
}["count"]
|
178
|
-
end
|
179
|
-
|
180
|
-
def overheat?
|
181
|
-
return false unless @reach_max_at
|
182
|
-
(Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @reach_max_at) > cooldown_time_for_reach_max
|
183
|
-
end
|
184
|
-
|
185
|
-
def fetch_service
|
186
|
-
res = client.describe_services(cluster: cluster, services: [name])
|
187
|
-
raise "Service \"#{name}\" is not found" if res.services.empty?
|
188
|
-
res.services[0]
|
189
|
-
rescue => e
|
190
|
-
AutoScaler.error_logger.error(e)
|
191
|
-
end
|
192
|
-
|
193
|
-
def update_service(difference)
|
194
|
-
next_desired_count = desired_count + difference
|
195
|
-
current_level = max_task_level(desired_count)
|
196
|
-
next_level = max_task_level(next_desired_count)
|
197
|
-
if current_level < next_level && overheat? # next max
|
198
|
-
level = next_level
|
199
|
-
@reach_max_at = nil
|
200
|
-
AutoScaler.logger.info "Service \"#{name}\" is overheat, uses next max count"
|
201
|
-
elsif current_level < next_level && !overheat? # wait cooldown
|
202
|
-
level = current_level
|
203
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
204
|
-
@reach_max_at ||= now
|
205
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
206
|
-
elsif current_level == next_level && next_desired_count >= max_task_count[current_level] # reach current max
|
207
|
-
level = current_level
|
208
|
-
now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
209
|
-
@reach_max_at ||= now
|
210
|
-
AutoScaler.logger.info "Service \"#{name}\" waits cooldown elapsed #{(now - @reach_max_at).to_i}sec"
|
211
|
-
elsif current_level == next_level && next_desired_count < max_task_count[current_level]
|
212
|
-
level = current_level
|
213
|
-
@reach_max_at = nil
|
214
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
215
|
-
elsif current_level > next_level
|
216
|
-
level = next_level
|
217
|
-
@reach_max_at = nil
|
218
|
-
AutoScaler.logger.info "Service \"#{name}\" clears cooldown state"
|
219
|
-
end
|
220
|
-
|
221
|
-
cl = client
|
222
|
-
next_desired_count = [next_desired_count, max_task_count[level]].min
|
223
|
-
cl.update_service(
|
224
|
-
cluster: cluster,
|
225
|
-
service: name,
|
226
|
-
desired_count: next_desired_count,
|
227
|
-
)
|
228
|
-
cl.wait_until(:services_stable, cluster: cluster, services: [name]) do |w|
|
229
|
-
w.before_wait do
|
230
|
-
AutoScaler.logger.debug "wait service stable [#{name}]"
|
231
|
-
end
|
232
|
-
end if difference < 0
|
233
|
-
@last_updated_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
|
234
|
-
self.desired_count = next_desired_count
|
235
|
-
AutoScaler.logger.info "Update service \"#{name}\": desired_count -> #{next_desired_count}"
|
236
|
-
rescue => e
|
237
|
-
AutoScaler.error_logger.error(e)
|
238
|
-
end
|
239
|
-
|
240
|
-
def fetch_container_instances
|
241
|
-
arns = []
|
242
|
-
resp = nil
|
243
|
-
cl = client
|
244
|
-
loop do
|
245
|
-
options = {cluster: cluster}
|
246
|
-
options.merge(next_token: resp.next_token) if resp && resp.next_token
|
247
|
-
resp = cl.list_container_instances(options)
|
248
|
-
arns.concat(resp.container_instance_arns)
|
249
|
-
break unless resp.next_token
|
250
|
-
end
|
251
|
-
|
252
|
-
chunk_size = 50
|
253
|
-
container_instances = []
|
254
|
-
arns.each_slice(chunk_size) do |arn_chunk|
|
255
|
-
is = cl.describe_container_instances(cluster: cluster, container_instances: arn_chunk).container_instances
|
256
|
-
container_instances.concat(is)
|
257
|
-
end
|
258
|
-
|
259
|
-
container_instances
|
260
|
-
end
|
261
|
-
|
262
|
-
private
|
263
|
-
|
264
|
-
def max_task_level(count)
|
265
|
-
max_task_count.index { |i| count <= i } || max_task_count.size - 1
|
266
|
-
end
|
267
|
-
end
|
268
|
-
|
269
|
-
TriggerConfig = Struct.new(:alarm_name, :region, :state, :step) do
|
270
|
-
include ConfigBase
|
271
|
-
|
272
|
-
def client
|
273
|
-
Aws::CloudWatch::Client.new(
|
274
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
275
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
276
|
-
region: region
|
277
|
-
)
|
278
|
-
end
|
279
|
-
|
280
|
-
def match?
|
281
|
-
fetch_alarm.state_value == state
|
282
|
-
end
|
283
|
-
|
284
|
-
def fetch_alarm
|
285
|
-
res = client.describe_alarms(alarm_names: [alarm_name])
|
286
|
-
|
287
|
-
raise "Alarm \"#{alarm_name}\" is not found" if res.metric_alarms.empty?
|
288
|
-
res.metric_alarms[0].tap do |alarm|
|
289
|
-
AutoScaler.logger.debug("#{alarm.alarm_name} state is #{alarm.state_value}")
|
290
|
-
end
|
291
|
-
rescue => e
|
292
|
-
AutoScaler.error_logger.error(e)
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
AutoScalingConfig = Struct.new(:name, :region, :buffer) do
|
297
|
-
include ConfigBase
|
298
|
-
|
299
|
-
def client
|
300
|
-
Aws::AutoScaling::Client.new(
|
301
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
302
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
303
|
-
region: region
|
304
|
-
)
|
305
|
-
end
|
306
|
-
|
307
|
-
def ec2_client
|
308
|
-
Aws::EC2::Client.new(
|
309
|
-
access_key_id: EcsDeploy.config.access_key_id,
|
310
|
-
secret_access_key: EcsDeploy.config.secret_access_key,
|
311
|
-
region: region
|
312
|
-
)
|
313
|
-
end
|
314
|
-
|
315
|
-
def instances(reload: false)
|
316
|
-
if reload || @instances.nil?
|
317
|
-
resp = client.describe_auto_scaling_groups({
|
318
|
-
auto_scaling_group_names: [name],
|
319
|
-
})
|
320
|
-
@instances = resp.auto_scaling_groups[0].instances
|
321
|
-
else
|
322
|
-
@instances
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
def update_auto_scaling_group(total_service_count, service_config)
|
327
|
-
desired_capacity = total_service_count + buffer.to_i
|
328
|
-
|
329
|
-
current_asg = client.describe_auto_scaling_groups({
|
330
|
-
auto_scaling_group_names: [name],
|
331
|
-
}).auto_scaling_groups[0]
|
332
|
-
|
333
|
-
if current_asg.desired_capacity > desired_capacity
|
334
|
-
diff = current_asg.desired_capacity - desired_capacity
|
335
|
-
container_instances = service_config.fetch_container_instances
|
336
|
-
deregisterable_instances = container_instances.select do |i|
|
337
|
-
i.pending_tasks_count == 0 && i.running_tasks_count == 0
|
338
|
-
end
|
339
|
-
|
340
|
-
AutoScaler.logger.info "Fetch deregisterable instances: #{deregisterable_instances.map(&:ec2_instance_id).inspect}"
|
341
|
-
|
342
|
-
deregistered_instance_ids = []
|
343
|
-
deregisterable_instances.each do |i|
|
344
|
-
break if deregistered_instance_ids.size >= diff
|
345
|
-
|
346
|
-
begin
|
347
|
-
service_config.client.deregister_container_instance(cluster: service_config.cluster, container_instance: i.container_instance_arn, force: false)
|
348
|
-
deregistered_instance_ids << i.ec2_instance_id
|
349
|
-
rescue Aws::ECS::Errors::InvalidParameterException
|
350
|
-
end
|
351
|
-
end
|
352
|
-
|
353
|
-
AutoScaler.logger.info "Deregistered instances: #{deregistered_instance_ids.inspect}"
|
354
|
-
|
355
|
-
detach_and_terminate_instances(deregistered_instance_ids)
|
356
|
-
|
357
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
358
|
-
elsif current_asg.desired_capacity < desired_capacity
|
359
|
-
client.update_auto_scaling_group(
|
360
|
-
auto_scaling_group_name: name,
|
361
|
-
min_size: 0,
|
362
|
-
max_size: [current_asg.max_size, desired_capacity].max,
|
363
|
-
desired_capacity: desired_capacity,
|
364
|
-
)
|
365
|
-
AutoScaler.logger.info "Update auto scaling group \"#{name}\": desired_capacity -> #{desired_capacity}"
|
366
|
-
end
|
367
|
-
rescue => e
|
368
|
-
AutoScaler.error_logger.error(e)
|
369
|
-
end
|
370
|
-
|
371
|
-
def detach_and_terminate_instances(instance_ids)
|
372
|
-
return if instance_ids.empty?
|
373
|
-
|
374
|
-
client.detach_instances(
|
375
|
-
auto_scaling_group_name: name,
|
376
|
-
instance_ids: instance_ids,
|
377
|
-
should_decrement_desired_capacity: true
|
378
|
-
)
|
379
|
-
|
380
|
-
AutoScaler.logger.info "Detach instances from ASG #{name}: #{instance_ids.inspect}"
|
381
|
-
sleep 3
|
382
|
-
|
383
|
-
ec2_client.terminate_instances(instance_ids: instance_ids)
|
384
|
-
|
385
|
-
AutoScaler.logger.info "Terminated instances: #{instance_ids.inspect}"
|
386
|
-
rescue => e
|
387
|
-
AutoScaler.error_logger.error(e)
|
388
|
-
end
|
389
|
-
|
390
|
-
def detach_and_terminate_orphan_instances(service_config)
|
391
|
-
container_instance_ids = service_config.fetch_container_instances.map(&:ec2_instance_id)
|
392
|
-
orphans = instances(reload: true).reject { |i| container_instance_ids.include?(i.instance_id) }.map(&:instance_id)
|
393
|
-
|
394
|
-
return if orphans.empty?
|
395
|
-
|
396
|
-
targets = ec2_client.describe_instances(instance_ids: orphans).reservations[0].instances.select do |i|
|
397
|
-
(Time.now - i.launch_time) > 600
|
398
|
-
end
|
399
|
-
|
400
|
-
detach_and_terminate_instances(targets.map(&:instance_id))
|
401
|
-
rescue => e
|
402
|
-
AutoScaler.error_logger.error(e)
|
403
|
-
end
|
404
|
-
end
|
405
171
|
end
|
406
172
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'ecs_deploy'
|
2
|
+
require 'ecs_deploy/instance_fluctuation_manager'
|
2
3
|
|
3
4
|
namespace :ecs do
|
4
5
|
task :configure do
|
@@ -7,6 +8,8 @@ namespace :ecs do
|
|
7
8
|
c.deploy_wait_timeout = fetch(:ecs_deploy_wait_timeout) if fetch(:ecs_deploy_wait_timeout)
|
8
9
|
c.ecs_service_role = fetch(:ecs_service_role) if fetch(:ecs_service_role)
|
9
10
|
c.default_region = Array(fetch(:ecs_region))[0] if fetch(:ecs_region)
|
11
|
+
c.ecs_wait_until_services_stable_max_attempts = fetch(:ecs_wait_until_services_stable_max_attempts) if fetch(:ecs_wait_until_services_stable_max_attempts)
|
12
|
+
c.ecs_wait_until_services_stable_delay = fetch(:ecs_wait_until_services_stable_delay) if fetch(:ecs_wait_until_services_stable_delay)
|
10
13
|
end
|
11
14
|
|
12
15
|
if ENV["TARGET_CLUSTER"]
|
@@ -20,7 +23,7 @@ namespace :ecs do
|
|
20
23
|
task register_task_definition: [:configure] do
|
21
24
|
if fetch(:ecs_tasks)
|
22
25
|
regions = Array(fetch(:ecs_region))
|
23
|
-
regions = [EcsDeploy.config.default_region
|
26
|
+
regions = [EcsDeploy.config.default_region] if regions.empty?
|
24
27
|
ecs_registered_tasks = {}
|
25
28
|
regions.each do |region|
|
26
29
|
ecs_registered_tasks[region] = {}
|
@@ -30,9 +33,14 @@ namespace :ecs do
|
|
30
33
|
task_definition_name: t[:name],
|
31
34
|
container_definitions: t[:container_definitions],
|
32
35
|
task_role_arn: t[:task_role_arn],
|
36
|
+
execution_role_arn: t[:execution_role_arn],
|
33
37
|
volumes: t[:volumes],
|
34
38
|
network_mode: t[:network_mode],
|
35
39
|
placement_constraints: t[:placement_constraints],
|
40
|
+
requires_compatibilities: t[:requires_compatibilities],
|
41
|
+
cpu: t[:cpu],
|
42
|
+
memory: t[:memory],
|
43
|
+
tags: t[:tags],
|
36
44
|
)
|
37
45
|
result = task_definition.register
|
38
46
|
ecs_registered_tasks[region][t[:name]] = result
|
@@ -58,6 +66,10 @@ namespace :ecs do
|
|
58
66
|
description: t[:description],
|
59
67
|
target_id: t[:target_id],
|
60
68
|
task_definition_name: t[:task_definition_name],
|
69
|
+
network_configuration: t[:network_configuration],
|
70
|
+
launch_type: t[:launch_type],
|
71
|
+
platform_version: t[:platform_version],
|
72
|
+
group: t[:group],
|
61
73
|
revision: t[:revision],
|
62
74
|
task_count: t[:task_count],
|
63
75
|
role_arn: t[:role_arn],
|
@@ -89,9 +101,20 @@ namespace :ecs do
|
|
89
101
|
task_definition_name: service[:task_definition_name],
|
90
102
|
load_balancers: service[:load_balancers],
|
91
103
|
desired_count: service[:desired_count],
|
104
|
+
launch_type: service[:launch_type],
|
105
|
+
network_configuration: service[:network_configuration],
|
106
|
+
health_check_grace_period_seconds: service[:health_check_grace_period_seconds],
|
107
|
+
delete: service[:delete],
|
108
|
+
enable_ecs_managed_tags: service[:enable_ecs_managed_tags],
|
109
|
+
tags: service[:tags],
|
110
|
+
propagate_tags: service[:propagate_tags],
|
111
|
+
enable_execute_command: service[:enable_execute_command],
|
92
112
|
}
|
93
113
|
service_options[:deployment_configuration] = service[:deployment_configuration] if service[:deployment_configuration]
|
94
|
-
|
114
|
+
service_options[:placement_constraints] = service[:placement_constraints] if service[:placement_constraints]
|
115
|
+
service_options[:placement_strategy] = service[:placement_strategy] if service[:placement_strategy]
|
116
|
+
service_options[:scheduling_strategy] = service[:scheduling_strategy] if service[:scheduling_strategy]
|
117
|
+
s = EcsDeploy::Service.new(**service_options)
|
95
118
|
s.deploy
|
96
119
|
s
|
97
120
|
end
|
@@ -149,9 +172,14 @@ namespace :ecs do
|
|
149
172
|
task_definition_name: rollback_arn,
|
150
173
|
load_balancers: service[:load_balancers],
|
151
174
|
desired_count: service[:desired_count],
|
175
|
+
launch_type: service[:launch_type],
|
176
|
+
network_configuration: service[:network_configuration],
|
177
|
+
health_check_grace_period_seconds: service[:health_check_grace_period_seconds],
|
152
178
|
}
|
153
179
|
service_options[:deployment_configuration] = service[:deployment_configuration] if service[:deployment_configuration]
|
154
|
-
|
180
|
+
service_options[:placement_constraints] = service[:placement_constraints] if service[:placement_constraints]
|
181
|
+
service_options[:placement_strategy] = service[:placement_strategy] if service[:placement_strategy]
|
182
|
+
s = EcsDeploy::Service.new(**service_options)
|
155
183
|
s.deploy
|
156
184
|
EcsDeploy::TaskDefinition.deregister(current_task_definition_arn, region: r)
|
157
185
|
s
|
@@ -160,4 +188,46 @@ namespace :ecs do
|
|
160
188
|
end
|
161
189
|
end
|
162
190
|
end
|
191
|
+
|
192
|
+
task increase_instances_to_max_size: [:configure] do
|
193
|
+
configs = fetch(:ecs_instance_fluctuation_manager_configs, [])
|
194
|
+
unless configs.empty?
|
195
|
+
regions = Array(fetch(:ecs_region))
|
196
|
+
regions = [EcsDeploy.config.default_region] if regions.empty?
|
197
|
+
regions.each do |region|
|
198
|
+
configs.each do |config|
|
199
|
+
logger = config.fetch(:logger, EcsDeploy.logger)
|
200
|
+
m = EcsDeploy::InstanceFluctuationManager.new(
|
201
|
+
region: config[:region] || region,
|
202
|
+
cluster: config[:cluster] || fetch(:ecs_default_cluster),
|
203
|
+
auto_scaling_group_name: config[:auto_scaling_group_name],
|
204
|
+
desired_capacity: config[:desired_capacity],
|
205
|
+
logger: logger
|
206
|
+
)
|
207
|
+
m.increase
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
task terminate_redundant_instances: [:configure] do
|
214
|
+
configs = fetch(:ecs_instance_fluctuation_manager_configs, [])
|
215
|
+
unless configs.empty?
|
216
|
+
regions = Array(fetch(:ecs_region))
|
217
|
+
regions = [EcsDeploy.config.default_region] if regions.empty?
|
218
|
+
regions.each do |region|
|
219
|
+
configs.each do |config|
|
220
|
+
logger = config.fetch(:logger, EcsDeploy.logger)
|
221
|
+
m = EcsDeploy::InstanceFluctuationManager.new(
|
222
|
+
region: config[:region] || region,
|
223
|
+
cluster: config[:cluster] || fetch(:ecs_default_cluster),
|
224
|
+
auto_scaling_group_name: config[:auto_scaling_group_name],
|
225
|
+
desired_capacity: config[:desired_capacity],
|
226
|
+
logger: logger
|
227
|
+
)
|
228
|
+
m.decrease
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
163
233
|
end
|
@@ -6,12 +6,16 @@ module EcsDeploy
|
|
6
6
|
:secret_access_key,
|
7
7
|
:default_region,
|
8
8
|
:deploy_wait_timeout,
|
9
|
-
:ecs_service_role
|
9
|
+
:ecs_service_role,
|
10
|
+
:ecs_wait_until_services_stable_max_attempts,
|
11
|
+
:ecs_wait_until_services_stable_delay
|
10
12
|
|
11
13
|
def initialize
|
12
14
|
@log_level = :info
|
13
15
|
@deploy_wait_timeout = 300
|
14
|
-
|
16
|
+
# The following values are the default values of Aws::ECS::Waiters::ServicesStable
|
17
|
+
@ecs_wait_until_services_stable_max_attempts = 40
|
18
|
+
@ecs_wait_until_services_stable_delay = 15
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|